lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallPtrSet.h"
  28 #include "llvm/ADT/SmallSet.h"
  29 #include "llvm/ADT/SmallVector.h"
  30 #include "llvm/ADT/Statistic.h"
  31 #include "llvm/Analysis/AliasAnalysis.h"
  32 #include "llvm/Analysis/MemoryLocation.h"
  33 #include "llvm/CodeGen/DAGCombine.h"
  34 #include "llvm/CodeGen/ISDOpcodes.h"
  35 #include "llvm/CodeGen/MachineFrameInfo.h"
  36 #include "llvm/CodeGen/MachineFunction.h"
  37 #include "llvm/CodeGen/MachineMemOperand.h"
  38 #include "llvm/CodeGen/RuntimeLibcalls.h"
  39 #include "llvm/CodeGen/SelectionDAG.h"
  40 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  41 #include "llvm/CodeGen/SelectionDAGNodes.h"
  42 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  43 #include "llvm/CodeGen/TargetLowering.h"
  44 #include "llvm/CodeGen/TargetRegisterInfo.h"
  45 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  46 #include "llvm/CodeGen/ValueTypes.h"
  47 #include "llvm/IR/Attributes.h"
  48 #include "llvm/IR/Constant.h"
  49 #include "llvm/IR/DataLayout.h"
  50 #include "llvm/IR/DerivedTypes.h"
  51 #include "llvm/IR/Function.h"
  52 #include "llvm/IR/LLVMContext.h"
  53 #include "llvm/IR/Metadata.h"
  54 #include "llvm/Support/Casting.h"
  55 #include "llvm/Support/CodeGen.h"
  56 #include "llvm/Support/CommandLine.h"
  57 #include "llvm/Support/Compiler.h"
  58 #include "llvm/Support/Debug.h"
  59 #include "llvm/Support/ErrorHandling.h"
  60 #include "llvm/Support/KnownBits.h"
  61 #include "llvm/Support/MachineValueType.h"
  62 #include "llvm/Support/MathExtras.h"
  63 #include "llvm/Support/raw_ostream.h"
  64 #include "llvm/Target/TargetMachine.h"
  65 #include "llvm/Target/TargetOptions.h"
  66 #include <algorithm>
  67 #include <cassert>
  68 #include <cstdint>
  69 #include <functional>
  70 #include <iterator>
  71 #include <string>
  72 #include <tuple>
  73 #include <utility>
  74
  75 using namespace llvm;
  76
  77 #define DEBUG_TYPE "dagcombine"
  78
  79 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  80 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  81 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  82 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  83 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  84 STATISTIC(SlicedLoads, "Number of load sliced");
  85 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  86
  87 static cl::opt<bool>
  88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  89                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  90
  91 static cl::opt<bool>
  92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  93         cl::desc("Enable DAG combiner's use of TBAA"));
  94
  95 #ifndef NDEBUG
  96 static cl::opt<std::string>
  97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  98                    cl::desc("Only use DAG-combiner alias analysis in this"
  99                             " function"));
 100 #endif
 101
 102 /// Hidden option to stress test load slicing, i.e., when this option
 103 /// is enabled, load slicing bypasses most of its profitability guards.
 104 static cl::opt<bool>
 105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 106                   cl::desc("Bypass the profitability model of load slicing"),
 107                   cl::init(false));
 108
 109 static cl::opt<bool>
 110   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 111                     cl::desc("DAG combiner may split indexing from loads"));
 112
 113 static cl::opt<bool>
 114     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
 115                        cl::desc("DAG combiner enable merging multiple stores "
 116                                 "into a wider store"));
 117
 118 static cl::opt<unsigned> TokenFactorInlineLimit(
 119     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
 120     cl::desc("Limit the number of operands to inline for Token Factors"));
 121
 122 static cl::opt<unsigned> StoreMergeDependenceLimit(
 123     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
 124     cl::desc("Limit the number of times for the same StoreNode and RootNode "
 125              "to bail out in store merging dependence check"));
 126
 127 namespace {
 128
 129   class DAGCombiner {
 130     SelectionDAG &DAG;
 131     const TargetLowering &TLI;
 132     CombineLevel Level;
 133     CodeGenOpt::Level OptLevel;
 134     bool LegalOperations = false;
 135     bool LegalTypes = false;
 136     bool ForCodeSize;
 137
 138     /// Worklist of all of the nodes that need to be simplified.
 139     ///
 140     /// This must behave as a stack -- new nodes to process are pushed onto the
 141     /// back and when processing we pop off of the back.
 142     ///
 143     /// The worklist will not contain duplicates but may contain null entries
 144     /// due to nodes being deleted from the underlying DAG.
 145     SmallVector<SDNode *, 64> Worklist;
 146
 147     /// Mapping from an SDNode to its position on the worklist.
 148     ///
 149     /// This is used to find and remove nodes from the worklist (by nulling
 150     /// them) when they are deleted from the underlying DAG. It relies on
 151     /// stable indices of nodes within the worklist.
 152     DenseMap<SDNode *, unsigned> WorklistMap;
 153     /// This records all nodes attempted to add to the worklist since we
 154     /// considered a new worklist entry. As we keep do not add duplicate nodes
 155     /// in the worklist, this is different from the tail of the worklist.
 156     SmallSetVector<SDNode *, 32> PruningList;
 157
 158     /// Set of nodes which have been combined (at least once).
 159     ///
 160     /// This is used to allow us to reliably add any operands of a DAG node
 161     /// which have not yet been combined to the worklist.
 162     SmallPtrSet<SDNode *, 32> CombinedNodes;
 163
 164     /// Map from candidate StoreNode to the pair of RootNode and count.
 165     /// The count is used to track how many times we have seen the StoreNode
 166     /// with the same RootNode bail out in dependence check. If we have seen
 167     /// the bail out for the same pair many times over a limit, we won't
 168     /// consider the StoreNode with the same RootNode as store merging
 169     /// candidate again.
 170     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
 171
 172     // AA - Used for DAG load/store alias analysis.
 173     AliasAnalysis *AA;
 174
 175     /// When an instruction is simplified, add all users of the instruction to
 176     /// the work lists because they might get more simplified now.
 177     void AddUsersToWorklist(SDNode *N) {
 178       for (SDNode *Node : N->uses())
 179         AddToWorklist(Node);
 180     }
 181
 182     // Prune potentially dangling nodes. This is called after
 183     // any visit to a node, but should also be called during a visit after any
 184     // failed combine which may have created a DAG node.
 185     void clearAddedDanglingWorklistEntries() {
 186       // Check any nodes added to the worklist to see if they are prunable.
 187       while (!PruningList.empty()) {
 188         auto *N = PruningList.pop_back_val();
 189         if (N->use_empty())
 190           recursivelyDeleteUnusedNodes(N);
 191       }
 192     }
 193
 194     SDNode *getNextWorklistEntry() {
 195       // Before we do any work, remove nodes that are not in use.
 196       clearAddedDanglingWorklistEntries();
 197       SDNode *N = nullptr;
 198       // The Worklist holds the SDNodes in order, but it may contain null
 199       // entries.
 200       while (!N && !Worklist.empty()) {
 201         N = Worklist.pop_back_val();
 202       }
 203
 204       if (N) {
 205         bool GoodWorklistEntry = WorklistMap.erase(N);
 206         (void)GoodWorklistEntry;
 207         assert(GoodWorklistEntry &&
 208                "Found a worklist entry without a corresponding map entry!");
 209       }
 210       return N;
 211     }
 212
 213     /// Call the node-specific routine that folds each particular type of node.
 214     SDValue visit(SDNode *N);
 215
 216   public:
 217     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 218         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 219           OptLevel(OL), AA(AA) {
 220       ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
 221
 222       MaximumLegalStoreInBits = 0;
 223       for (MVT VT : MVT::all_valuetypes())
 224         if (EVT(VT).isSimple() && VT != MVT::Other &&
 225             TLI.isTypeLegal(EVT(VT)) &&
 226             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 227           MaximumLegalStoreInBits = VT.getSizeInBits();
 228     }
 229
 230     void ConsiderForPruning(SDNode *N) {
 231       // Mark this for potential pruning.
 232       PruningList.insert(N);
 233     }
 234
 235     /// Add to the worklist making sure its instance is at the back (next to be
 236     /// processed.)
 237     void AddToWorklist(SDNode *N) {
 238       assert(N->getOpcode() != ISD::DELETED_NODE &&
 239              "Deleted Node added to Worklist");
 240
 241       // Skip handle nodes as they can't usefully be combined and confuse the
 242       // zero-use deletion strategy.
 243       if (N->getOpcode() == ISD::HANDLENODE)
 244         return;
 245
 246       ConsiderForPruning(N);
 247
 248       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 249         Worklist.push_back(N);
 250     }
 251
 252     /// Remove all instances of N from the worklist.
 253     void removeFromWorklist(SDNode *N) {
 254       CombinedNodes.erase(N);
 255       PruningList.remove(N);
 256       StoreRootCountMap.erase(N);
 257
 258       auto It = WorklistMap.find(N);
 259       if (It == WorklistMap.end())
 260         return; // Not in the worklist.
 261
 262       // Null out the entry rather than erasing it to avoid a linear operation.
 263       Worklist[It->second] = nullptr;
 264       WorklistMap.erase(It);
 265     }
 266
 267     void deleteAndRecombine(SDNode *N);
 268     bool recursivelyDeleteUnusedNodes(SDNode *N);
 269
 270     /// Replaces all uses of the results of one DAG node with new values.
 271     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 272                       bool AddTo = true);
 273
 274     /// Replaces all uses of the results of one DAG node with new values.
 275     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 276       return CombineTo(N, &Res, 1, AddTo);
 277     }
 278
 279     /// Replaces all uses of the results of one DAG node with new values.
 280     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 281                       bool AddTo = true) {
 282       SDValue To[] = { Res0, Res1 };
 283       return CombineTo(N, To, 2, AddTo);
 284     }
 285
 286     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 287
 288   private:
 289     unsigned MaximumLegalStoreInBits;
 290
 291     /// Check the specified integer node value to see if it can be simplified or
 292     /// if things it uses can be simplified by bit propagation.
 293     /// If so, return true.
 294     bool SimplifyDemandedBits(SDValue Op) {
 295       unsigned BitWidth = Op.getScalarValueSizeInBits();
 296       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
 297       return SimplifyDemandedBits(Op, DemandedBits);
 298     }
 299
 300     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 301       EVT VT = Op.getValueType();
 302       unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
 303       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 304       return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
 305     }
 306
 307     /// Check the specified vector node value to see if it can be simplified or
 308     /// if things it uses can be simplified as it only uses some of the
 309     /// elements. If so, return true.
 310     bool SimplifyDemandedVectorElts(SDValue Op) {
 311       unsigned NumElts = Op.getValueType().getVectorNumElements();
 312       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 313       return SimplifyDemandedVectorElts(Op, DemandedElts);
 314     }
 315
 316     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 317                               const APInt &DemandedElts);
 318     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 319                                     bool AssumeSingleUse = false);
 320
 321     bool CombineToPreIndexedLoadStore(SDNode *N);
 322     bool CombineToPostIndexedLoadStore(SDNode *N);
 323     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 324     bool SliceUpLoad(SDNode *N);
 325
 326     // Scalars have size 0 to distinguish from singleton vectors.
 327     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 328     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 329     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 330
 331     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 332     ///   load.
 333     ///
 334     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 335     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 336     /// \param EltNo index of the vector element to load.
 337     /// \param OriginalLoad load that EVE came from to be replaced.
 338     /// \returns EVE on success SDValue() on failure.
 339     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 340                                          SDValue EltNo,
 341                                          LoadSDNode *OriginalLoad);
 342     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 343     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 344     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 345     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 346     SDValue PromoteIntBinOp(SDValue Op);
 347     SDValue PromoteIntShiftOp(SDValue Op);
 348     SDValue PromoteExtend(SDValue Op);
 349     bool PromoteLoad(SDValue Op);
 350
 351     /// Call the node-specific routine that knows how to fold each
 352     /// particular type of node. If that doesn't do anything, try the
 353     /// target-specific DAG combines.
 354     SDValue combine(SDNode *N);
 355
 356     // Visitation implementation - Implement dag node combining for different
 357     // node types.  The semantics are as follows:
 358     // Return Value:
 359     //   SDValue.getNode() == 0 - No change was made
 360     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 361     //   otherwise              - N should be replaced by the returned Operand.
 362     //
 363     SDValue visitTokenFactor(SDNode *N);
 364     SDValue visitMERGE_VALUES(SDNode *N);
 365     SDValue visitADD(SDNode *N);
 366     SDValue visitADDLike(SDNode *N);
 367     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 368     SDValue visitSUB(SDNode *N);
 369     SDValue visitADDSAT(SDNode *N);
 370     SDValue visitSUBSAT(SDNode *N);
 371     SDValue visitADDC(SDNode *N);
 372     SDValue visitADDO(SDNode *N);
 373     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 374     SDValue visitSUBC(SDNode *N);
 375     SDValue visitSUBO(SDNode *N);
 376     SDValue visitADDE(SDNode *N);
 377     SDValue visitADDCARRY(SDNode *N);
 378     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 379     SDValue visitSUBE(SDNode *N);
 380     SDValue visitSUBCARRY(SDNode *N);
 381     SDValue visitMUL(SDNode *N);
 382     SDValue visitMULFIX(SDNode *N);
 383     SDValue useDivRem(SDNode *N);
 384     SDValue visitSDIV(SDNode *N);
 385     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 386     SDValue visitUDIV(SDNode *N);
 387     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 388     SDValue visitREM(SDNode *N);
 389     SDValue visitMULHU(SDNode *N);
 390     SDValue visitMULHS(SDNode *N);
 391     SDValue visitSMUL_LOHI(SDNode *N);
 392     SDValue visitUMUL_LOHI(SDNode *N);
 393     SDValue visitMULO(SDNode *N);
 394     SDValue visitIMINMAX(SDNode *N);
 395     SDValue visitAND(SDNode *N);
 396     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 397     SDValue visitOR(SDNode *N);
 398     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 399     SDValue visitXOR(SDNode *N);
 400     SDValue SimplifyVBinOp(SDNode *N);
 401     SDValue visitSHL(SDNode *N);
 402     SDValue visitSRA(SDNode *N);
 403     SDValue visitSRL(SDNode *N);
 404     SDValue visitFunnelShift(SDNode *N);
 405     SDValue visitRotate(SDNode *N);
 406     SDValue visitABS(SDNode *N);
 407     SDValue visitBSWAP(SDNode *N);
 408     SDValue visitBITREVERSE(SDNode *N);
 409     SDValue visitCTLZ(SDNode *N);
 410     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 411     SDValue visitCTTZ(SDNode *N);
 412     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 413     SDValue visitCTPOP(SDNode *N);
 414     SDValue visitSELECT(SDNode *N);
 415     SDValue visitVSELECT(SDNode *N);
 416     SDValue visitSELECT_CC(SDNode *N);
 417     SDValue visitSETCC(SDNode *N);
 418     SDValue visitSETCCCARRY(SDNode *N);
 419     SDValue visitSIGN_EXTEND(SDNode *N);
 420     SDValue visitZERO_EXTEND(SDNode *N);
 421     SDValue visitANY_EXTEND(SDNode *N);
 422     SDValue visitAssertExt(SDNode *N);
 423     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 424     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 425     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 426     SDValue visitTRUNCATE(SDNode *N);
 427     SDValue visitBITCAST(SDNode *N);
 428     SDValue visitBUILD_PAIR(SDNode *N);
 429     SDValue visitFADD(SDNode *N);
 430     SDValue visitFSUB(SDNode *N);
 431     SDValue visitFMUL(SDNode *N);
 432     SDValue visitFMA(SDNode *N);
 433     SDValue visitFDIV(SDNode *N);
 434     SDValue visitFREM(SDNode *N);
 435     SDValue visitFSQRT(SDNode *N);
 436     SDValue visitFCOPYSIGN(SDNode *N);
 437     SDValue visitFPOW(SDNode *N);
 438     SDValue visitSINT_TO_FP(SDNode *N);
 439     SDValue visitUINT_TO_FP(SDNode *N);
 440     SDValue visitFP_TO_SINT(SDNode *N);
 441     SDValue visitFP_TO_UINT(SDNode *N);
 442     SDValue visitFP_ROUND(SDNode *N);
 443     SDValue visitFP_EXTEND(SDNode *N);
 444     SDValue visitFNEG(SDNode *N);
 445     SDValue visitFABS(SDNode *N);
 446     SDValue visitFCEIL(SDNode *N);
 447     SDValue visitFTRUNC(SDNode *N);
 448     SDValue visitFFLOOR(SDNode *N);
 449     SDValue visitFMINNUM(SDNode *N);
 450     SDValue visitFMAXNUM(SDNode *N);
 451     SDValue visitFMINIMUM(SDNode *N);
 452     SDValue visitFMAXIMUM(SDNode *N);
 453     SDValue visitBRCOND(SDNode *N);
 454     SDValue visitBR_CC(SDNode *N);
 455     SDValue visitLOAD(SDNode *N);
 456
 457     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 458     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 459
 460     SDValue visitSTORE(SDNode *N);
 461     SDValue visitLIFETIME_END(SDNode *N);
 462     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 463     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 464     SDValue visitBUILD_VECTOR(SDNode *N);
 465     SDValue visitCONCAT_VECTORS(SDNode *N);
 466     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 467     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 468     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 469     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 470     SDValue visitMLOAD(SDNode *N);
 471     SDValue visitMSTORE(SDNode *N);
 472     SDValue visitMGATHER(SDNode *N);
 473     SDValue visitMSCATTER(SDNode *N);
 474     SDValue visitFP_TO_FP16(SDNode *N);
 475     SDValue visitFP16_TO_FP(SDNode *N);
 476     SDValue visitVECREDUCE(SDNode *N);
 477
 478     SDValue visitFADDForFMACombine(SDNode *N);
 479     SDValue visitFSUBForFMACombine(SDNode *N);
 480     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 481
 482     SDValue XformToShuffleWithZero(SDNode *N);
 483     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
 484                                                     const SDLoc &DL, SDValue N0,
 485                                                     SDValue N1);
 486     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 487                                       SDValue N1);
 488     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 489                            SDValue N1, SDNodeFlags Flags);
 490
 491     SDValue visitShiftByConstant(SDNode *N);
 492
 493     SDValue foldSelectOfConstants(SDNode *N);
 494     SDValue foldVSelectOfConstants(SDNode *N);
 495     SDValue foldBinOpIntoSelect(SDNode *BO);
 496     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 497     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 498     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 499     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 500                              SDValue N2, SDValue N3, ISD::CondCode CC,
 501                              bool NotExtCompare = false);
 502     SDValue convertSelectOfFPConstantsToLoadOffset(
 503         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 504         ISD::CondCode CC);
 505     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 506                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 507     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 508                               const SDLoc &DL);
 509     SDValue unfoldMaskedMerge(SDNode *N);
 510     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 511     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 512                           const SDLoc &DL, bool foldBooleans);
 513     SDValue rebuildSetCC(SDValue N);
 514
 515     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 516                            SDValue &CC) const;
 517     bool isOneUseSetCC(SDValue N) const;
 518     bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
 519
 520     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 521                                          unsigned HiOp);
 522     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 523     SDValue CombineExtLoad(SDNode *N);
 524     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 525     SDValue combineRepeatedFPDivisors(SDNode *N);
 526     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 527     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 528     SDValue BuildSDIV(SDNode *N);
 529     SDValue BuildSDIVPow2(SDNode *N);
 530     SDValue BuildUDIV(SDNode *N);
 531     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 532     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
 533     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 534     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 535     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 536     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 537                                 SDNodeFlags Flags, bool Reciprocal);
 538     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 539                                 SDNodeFlags Flags, bool Reciprocal);
 540     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 541                                bool DemandHighBits = true);
 542     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 543     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 544                               SDValue InnerPos, SDValue InnerNeg,
 545                               unsigned PosOpcode, unsigned NegOpcode,
 546                               const SDLoc &DL);
 547     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 548     SDValue MatchLoadCombine(SDNode *N);
 549     SDValue MatchStoreCombine(StoreSDNode *N);
 550     SDValue ReduceLoadWidth(SDNode *N);
 551     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 552     SDValue splitMergedValStore(StoreSDNode *ST);
 553     SDValue TransformFPLoadStorePair(SDNode *N);
 554     SDValue convertBuildVecZextToZext(SDNode *N);
 555     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 556     SDValue reduceBuildVecToShuffle(SDNode *N);
 557     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 558                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 559                                   SDValue VecIn2, unsigned LeftIdx,
 560                                   bool DidSplitVec);
 561     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 562
 563     /// Walk up chain skipping non-aliasing memory nodes,
 564     /// looking for aliasing nodes and adding them to the Aliases vector.
 565     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 566                           SmallVectorImpl<SDValue> &Aliases);
 567
 568     /// Return true if there is any possibility that the two addresses overlap.
 569     bool isAlias(SDNode *Op0, SDNode *Op1) const;
 570
 571     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 572     /// chain (aliasing node.)
 573     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 574
 575     /// Try to replace a store and any possibly adjacent stores on
 576     /// consecutive chains with better chains. Return true only if St is
 577     /// replaced.
 578     ///
 579     /// Notice that other chains may still be replaced even if the function
 580     /// returns false.
 581     bool findBetterNeighborChains(StoreSDNode *St);
 582
 583     // Helper for findBetterNeighborChains. Walk up store chain add additional
 584     // chained stores that do not overlap and can be parallelized.
 585     bool parallelizeChainedStores(StoreSDNode *St);
 586
 587     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 588     /// is located in a sequence of memory operations connected by a chain.
 589     struct MemOpLink {
 590       // Ptr to the mem node.
 591       LSBaseSDNode *MemNode;
 592
 593       // Offset from the base ptr.
 594       int64_t OffsetFromBase;
 595
 596       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 597           : MemNode(N), OffsetFromBase(Offset) {}
 598     };
 599
 600     /// This is a helper function for visitMUL to check the profitability
 601     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 602     /// MulNode is the original multiply, AddNode is (add x, c1),
 603     /// and ConstNode is c2.
 604     bool isMulAddWithConstProfitable(SDNode *MulNode,
 605                                      SDValue &AddNode,
 606                                      SDValue &ConstNode);
 607
 608     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 609     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 610     /// the type of the loaded value to be extended.
 611     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 612                           EVT LoadResultTy, EVT &ExtVT);
 613
 614     /// Helper function to calculate whether the given Load/Store can have its
 615     /// width reduced to ExtVT.
 616     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 617                            EVT &MemVT, unsigned ShAmt = 0);
 618
 619     /// Used by BackwardsPropagateMask to find suitable loads.
 620     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 621                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 622                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 623     /// Attempt to propagate a given AND node back to load leaves so that they
 624     /// can be combined into narrow loads.
 625     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 626
 627     /// Helper function for MergeConsecutiveStores which merges the
 628     /// component store chains.
 629     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 630                                 unsigned NumStores);
 631
 632     /// This is a helper function for MergeConsecutiveStores. When the
 633     /// source elements of the consecutive stores are all constants or
 634     /// all extracted vector elements, try to merge them into one
 635     /// larger store introducing bitcasts if necessary.  \return True
 636     /// if a merged store was created.
 637     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 638                                          EVT MemVT, unsigned NumStores,
 639                                          bool IsConstantSrc, bool UseVector,
 640                                          bool UseTrunc);
 641
 642     /// This is a helper function for MergeConsecutiveStores. Stores
 643     /// that potentially may be merged with St are placed in
 644     /// StoreNodes. RootNode is a chain predecessor to all store
 645     /// candidates.
 646     void getStoreMergeCandidates(StoreSDNode *St,
 647                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 648                                  SDNode *&Root);
 649
 650     /// Helper function for MergeConsecutiveStores. Checks if
 651     /// candidate stores have indirect dependency through their
 652     /// operands. RootNode is the predecessor to all stores calculated
 653     /// by getStoreMergeCandidates and is used to prune the dependency check.
 654     /// \return True if safe to merge.
 655     bool checkMergeStoreCandidatesForDependencies(
 656         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 657         SDNode *RootNode);
 658
 659     /// Merge consecutive store operations into a wide store.
 660     /// This optimization uses wide integers or vectors when possible.
 661     /// \return number of stores that were merged into a merged store (the
 662     /// affected nodes are stored as a prefix in \p StoreNodes).
 663     bool MergeConsecutiveStores(StoreSDNode *St);
 664
 665     /// Try to transform a truncation where C is a constant:
 666     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 667     ///
 668     /// \p N needs to be a truncation and its first operand an AND. Other
 669     /// requirements are checked by the function (e.g. that trunc is
 670     /// single-use) and if missed an empty SDValue is returned.
 671     SDValue distributeTruncateThroughAnd(SDNode *N);
 672
 673     /// Helper function to determine whether the target supports operation
 674     /// given by \p Opcode for type \p VT, that is, whether the operation
 675     /// is legal or custom before legalizing operations, and whether is
 676     /// legal (but not custom) after legalization.
 677     bool hasOperation(unsigned Opcode, EVT VT) {
 678       if (LegalOperations)
 679         return TLI.isOperationLegal(Opcode, VT);
 680       return TLI.isOperationLegalOrCustom(Opcode, VT);
 681     }
 682
 683   public:
 684     /// Runs the dag combiner on all nodes in the work list
 685     void Run(CombineLevel AtLevel);
 686
 687     SelectionDAG &getDAG() const { return DAG; }
 688
 689     /// Returns a type large enough to hold any valid shift amount - before type
 690     /// legalization these can be huge.
 691     EVT getShiftAmountTy(EVT LHSTy) {
 692       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 693       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 694     }
 695
 696     /// This method returns true if we are running before type legalization or
 697     /// if the specified VT is legal.
 698     bool isTypeLegal(const EVT &VT) {
 699       if (!LegalTypes) return true;
 700       return TLI.isTypeLegal(VT);
 701     }
 702
 703     /// Convenience wrapper around TargetLowering::getSetCCResultType
 704     EVT getSetCCResultType(EVT VT) const {
 705       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 706     }
 707
 708     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 709                          SDValue OrigLoad, SDValue ExtLoad,
 710                          ISD::NodeType ExtType);
 711   };
 712
 713 /// This class is a DAGUpdateListener that removes any deleted
 714 /// nodes from the worklist.
 715 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 716   DAGCombiner &DC;
 717
 718 public:
 719   explicit WorklistRemover(DAGCombiner &dc)
 720     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 721
 722   void NodeDeleted(SDNode *N, SDNode *E) override {
 723     DC.removeFromWorklist(N);
 724   }
 725 };
 726
 727 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 728   DAGCombiner &DC;
 729
 730 public:
 731   explicit WorklistInserter(DAGCombiner &dc)
 732       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 733
 734   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 735   //        compile time costs in large DAGs, e.g. Halide.
 736   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 737 };
 738
 739 } // end anonymous namespace
 740
 741 //===----------------------------------------------------------------------===//
 742 //  TargetLowering::DAGCombinerInfo implementation
 743 //===----------------------------------------------------------------------===//
 744
 745 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 746   ((DAGCombiner*)DC)->AddToWorklist(N);
 747 }
 748
 749 SDValue TargetLowering::DAGCombinerInfo::
 750 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 751   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 752 }
 753
 754 SDValue TargetLowering::DAGCombinerInfo::
 755 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 756   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 757 }
 758
 759 SDValue TargetLowering::DAGCombinerInfo::
 760 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 761   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 762 }
 763
 764 void TargetLowering::DAGCombinerInfo::
 765 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 766   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 767 }
 768
 769 //===----------------------------------------------------------------------===//
 770 // Helper Functions
 771 //===----------------------------------------------------------------------===//
 772
 773 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 774   removeFromWorklist(N);
 775
 776   // If the operands of this node are only used by the node, they will now be
 777   // dead. Make sure to re-visit them and recursively delete dead nodes.
 778   for (const SDValue &Op : N->ops())
 779     // For an operand generating multiple values, one of the values may
 780     // become dead allowing further simplification (e.g. split index
 781     // arithmetic from an indexed load).
 782     if (Op->hasOneUse() || Op->getNumValues() > 1)
 783       AddToWorklist(Op.getNode());
 784
 785   DAG.DeleteNode(N);
 786 }
 787
 788 // APInts must be the same size for most operations, this helper
 789 // function zero extends the shorter of the pair so that they match.
 790 // We provide an Offset so that we can create bitwidths that won't overflow.
 791 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 792   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 793   LHS = LHS.zextOrSelf(Bits);
 794   RHS = RHS.zextOrSelf(Bits);
 795 }
 796
 797 // Return true if this node is a setcc, or is a select_cc
 798 // that selects between the target values used for true and false, making it
 799 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 800 // the appropriate nodes based on the type of node we are checking. This
 801 // simplifies life a bit for the callers.
 802 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 803                                     SDValue &CC) const {
 804   if (N.getOpcode() == ISD::SETCC) {
 805     LHS = N.getOperand(0);
 806     RHS = N.getOperand(1);
 807     CC  = N.getOperand(2);
 808     return true;
 809   }
 810
 811   if (N.getOpcode() != ISD::SELECT_CC ||
 812       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 813       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 814     return false;
 815
 816   if (TLI.getBooleanContents(N.getValueType()) ==
 817       TargetLowering::UndefinedBooleanContent)
 818     return false;
 819
 820   LHS = N.getOperand(0);
 821   RHS = N.getOperand(1);
 822   CC  = N.getOperand(4);
 823   return true;
 824 }
 825
 826 /// Return true if this is a SetCC-equivalent operation with only one use.
 827 /// If this is true, it allows the users to invert the operation for free when
 828 /// it is profitable to do so.
 829 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 830   SDValue N0, N1, N2;
 831   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 832     return true;
 833   return false;
 834 }
 835
 836 // Returns the SDNode if it is a constant float BuildVector
 837 // or constant float.
 838 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 839   if (isa<ConstantFPSDNode>(N))
 840     return N.getNode();
 841   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 842     return N.getNode();
 843   return nullptr;
 844 }
 845
 846 // Determines if it is a constant integer or a build vector of constant
 847 // integers (and undefs).
 848 // Do not permit build vector implicit truncation.
 849 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 850   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 851     return !(Const->isOpaque() && NoOpaques);
 852   if (N.getOpcode() != ISD::BUILD_VECTOR)
 853     return false;
 854   unsigned BitWidth = N.getScalarValueSizeInBits();
 855   for (const SDValue &Op : N->op_values()) {
 856     if (Op.isUndef())
 857       continue;
 858     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 859     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 860         (Const->isOpaque() && NoOpaques))
 861       return false;
 862   }
 863   return true;
 864 }
 865
 866 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 867 // undef's.
 868 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 869   if (V.getOpcode() != ISD::BUILD_VECTOR)
 870     return false;
 871   return isConstantOrConstantVector(V, NoOpaques) ||
 872          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 873 }
 874
 875 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
 876                                                              const SDLoc &DL,
 877                                                              SDValue N0,
 878                                                              SDValue N1) {
 879   // Currently this only tries to ensure we don't undo the GEP splits done by
 880   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
 881   // we check if the following transformation would be problematic:
 882   // (load/store (add, (add, x, offset1), offset2)) ->
 883   // (load/store (add, x, offset1+offset2)).
 884
 885   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
 886     return false;
 887
 888   if (N0.hasOneUse())
 889     return false;
 890
 891   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
 892   auto *C2 = dyn_cast<ConstantSDNode>(N1);
 893   if (!C1 || !C2)
 894     return false;
 895
 896   const APInt &C1APIntVal = C1->getAPIntValue();
 897   const APInt &C2APIntVal = C2->getAPIntValue();
 898   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
 899     return false;
 900
 901   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
 902   if (CombinedValueIntVal.getBitWidth() > 64)
 903     return false;
 904   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
 905
 906   for (SDNode *Node : N0->uses()) {
 907     auto LoadStore = dyn_cast<MemSDNode>(Node);
 908     if (LoadStore) {
 909       // Is x[offset2] already not a legal addressing mode? If so then
 910       // reassociating the constants breaks nothing (we test offset2 because
 911       // that's the one we hope to fold into the load or store).
 912       TargetLoweringBase::AddrMode AM;
 913       AM.HasBaseReg = true;
 914       AM.BaseOffs = C2APIntVal.getSExtValue();
 915       EVT VT = LoadStore->getMemoryVT();
 916       unsigned AS = LoadStore->getAddressSpace();
 917       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
 918       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
 919         continue;
 920
 921       // Would x[offset1+offset2] still be a legal addressing mode?
 922       AM.BaseOffs = CombinedValue;
 923       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
 924         return true;
 925     }
 926   }
 927
 928   return false;
 929 }
 930
 931 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
 932 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
 933 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
 934                                                SDValue N0, SDValue N1) {
 935   EVT VT = N0.getValueType();
 936
 937   if (N0.getOpcode() != Opc)
 938     return SDValue();
 939
 940   // Don't reassociate reductions.
 941   if (N0->getFlags().hasVectorReduction())
 942     return SDValue();
 943
 944   if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
 945     if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
 946       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
 947       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
 948         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
 949       return SDValue();
 950     }
 951     if (N0.hasOneUse()) {
 952       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
 953       //              iff (op x, c1) has one use
 954       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
 955       if (!OpNode.getNode())
 956         return SDValue();
 957       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
 958     }
 959   }
 960   return SDValue();
 961 }
 962
 963 // Try to reassociate commutative binops.
 964 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 965                                     SDValue N1, SDNodeFlags Flags) {
 966   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
 967   // Don't reassociate reductions.
 968   if (Flags.hasVectorReduction())
 969     return SDValue();
 970
 971   // Floating-point reassociation is not allowed without loose FP math.
 972   if (N0.getValueType().isFloatingPoint() ||
 973       N1.getValueType().isFloatingPoint())
 974     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
 975       return SDValue();
 976
 977   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
 978     return Combined;
 979   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
 980     return Combined;
 981   return SDValue();
 982 }
 983
 984 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 985                                bool AddTo) {
 986   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
 987   ++NodesCombined;
 988   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
 989              To[0].getNode()->dump(&DAG);
 990              dbgs() << " and " << NumTo - 1 << " other values\n");
 991   for (unsigned i = 0, e = NumTo; i != e; ++i)
 992     assert((!To[i].getNode() ||
 993             N->getValueType(i) == To[i].getValueType()) &&
 994            "Cannot combine value to value of different type!");
 995
 996   WorklistRemover DeadNodes(*this);
 997   DAG.ReplaceAllUsesWith(N, To);
 998   if (AddTo) {
 999     // Push the new nodes and any users onto the worklist
1000     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1001       if (To[i].getNode()) {
1002         AddToWorklist(To[i].getNode());
1003         AddUsersToWorklist(To[i].getNode());
1004       }
1005     }
1006   }
1007
1008   // Finally, if the node is now dead, remove it from the graph.  The node
1009   // may not be dead if the replacement process recursively simplified to
1010   // something else needing this node.
1011   if (N->use_empty())
1012     deleteAndRecombine(N);
1013   return SDValue(N, 0);
1014 }
1015
1016 void DAGCombiner::
1017 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1018   // Replace all uses.  If any nodes become isomorphic to other nodes and
1019   // are deleted, make sure to remove them from our worklist.
1020   WorklistRemover DeadNodes(*this);
1021   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1022
1023   // Push the new node and any (possibly new) users onto the worklist.
1024   AddToWorklist(TLO.New.getNode());
1025   AddUsersToWorklist(TLO.New.getNode());
1026
1027   // Finally, if the node is now dead, remove it from the graph.  The node
1028   // may not be dead if the replacement process recursively simplified to
1029   // something else needing this node.
1030   if (TLO.Old.getNode()->use_empty())
1031     deleteAndRecombine(TLO.Old.getNode());
1032 }
1033
1034 /// Check the specified integer node value to see if it can be simplified or if
1035 /// things it uses can be simplified by bit propagation. If so, return true.
1036 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1037                                        const APInt &DemandedElts) {
1038   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1039   KnownBits Known;
1040   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1041     return false;
1042
1043   // Revisit the node.
1044   AddToWorklist(Op.getNode());
1045
1046   // Replace the old value with the new one.
1047   ++NodesCombined;
1048   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1049              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1050              dbgs() << '\n');
1051
1052   CommitTargetLoweringOpt(TLO);
1053   return true;
1054 }
1055
1056 /// Check the specified vector node value to see if it can be simplified or
1057 /// if things it uses can be simplified as it only uses some of the elements.
1058 /// If so, return true.
1059 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1060                                              const APInt &DemandedElts,
1061                                              bool AssumeSingleUse) {
1062   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1063   APInt KnownUndef, KnownZero;
1064   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1065                                       TLO, 0, AssumeSingleUse))
1066     return false;
1067
1068   // Revisit the node.
1069   AddToWorklist(Op.getNode());
1070
1071   // Replace the old value with the new one.
1072   ++NodesCombined;
1073   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1074              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1075              dbgs() << '\n');
1076
1077   CommitTargetLoweringOpt(TLO);
1078   return true;
1079 }
1080
1081 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1082   SDLoc DL(Load);
1083   EVT VT = Load->getValueType(0);
1084   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1085
1086   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1087              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1088   WorklistRemover DeadNodes(*this);
1089   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1090   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1091   deleteAndRecombine(Load);
1092   AddToWorklist(Trunc.getNode());
1093 }
1094
1095 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1096   Replace = false;
1097   SDLoc DL(Op);
1098   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1099     LoadSDNode *LD = cast<LoadSDNode>(Op);
1100     EVT MemVT = LD->getMemoryVT();
1101     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1102                                                       : LD->getExtensionType();
1103     Replace = true;
1104     return DAG.getExtLoad(ExtType, DL, PVT,
1105                           LD->getChain(), LD->getBasePtr(),
1106                           MemVT, LD->getMemOperand());
1107   }
1108
1109   unsigned Opc = Op.getOpcode();
1110   switch (Opc) {
1111   default: break;
1112   case ISD::AssertSext:
1113     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1114       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1115     break;
1116   case ISD::AssertZext:
1117     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1118       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1119     break;
1120   case ISD::Constant: {
1121     unsigned ExtOpc =
1122       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1123     return DAG.getNode(ExtOpc, DL, PVT, Op);
1124   }
1125   }
1126
1127   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1128     return SDValue();
1129   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1130 }
1131
1132 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1133   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1134     return SDValue();
1135   EVT OldVT = Op.getValueType();
1136   SDLoc DL(Op);
1137   bool Replace = false;
1138   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1139   if (!NewOp.getNode())
1140     return SDValue();
1141   AddToWorklist(NewOp.getNode());
1142
1143   if (Replace)
1144     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1145   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1146                      DAG.getValueType(OldVT));
1147 }
1148
1149 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1150   EVT OldVT = Op.getValueType();
1151   SDLoc DL(Op);
1152   bool Replace = false;
1153   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1154   if (!NewOp.getNode())
1155     return SDValue();
1156   AddToWorklist(NewOp.getNode());
1157
1158   if (Replace)
1159     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1160   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1161 }
1162
1163 /// Promote the specified integer binary operation if the target indicates it is
1164 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1165 /// i32 since i16 instructions are longer.
1166 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1167   if (!LegalOperations)
1168     return SDValue();
1169
1170   EVT VT = Op.getValueType();
1171   if (VT.isVector() || !VT.isInteger())
1172     return SDValue();
1173
1174   // If operation type is 'undesirable', e.g. i16 on x86, consider
1175   // promoting it.
1176   unsigned Opc = Op.getOpcode();
1177   if (TLI.isTypeDesirableForOp(Opc, VT))
1178     return SDValue();
1179
1180   EVT PVT = VT;
1181   // Consult target whether it is a good idea to promote this operation and
1182   // what's the right type to promote it to.
1183   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1184     assert(PVT != VT && "Don't know what type to promote to!");
1185
1186     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1187
1188     bool Replace0 = false;
1189     SDValue N0 = Op.getOperand(0);
1190     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1191
1192     bool Replace1 = false;
1193     SDValue N1 = Op.getOperand(1);
1194     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1195     SDLoc DL(Op);
1196
1197     SDValue RV =
1198         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1199
1200     // We are always replacing N0/N1's use in N and only need
1201     // additional replacements if there are additional uses.
1202     Replace0 &= !N0->hasOneUse();
1203     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1204
1205     // Combine Op here so it is preserved past replacements.
1206     CombineTo(Op.getNode(), RV);
1207
1208     // If operands have a use ordering, make sure we deal with
1209     // predecessor first.
1210     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1211       std::swap(N0, N1);
1212       std::swap(NN0, NN1);
1213     }
1214
1215     if (Replace0) {
1216       AddToWorklist(NN0.getNode());
1217       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1218     }
1219     if (Replace1) {
1220       AddToWorklist(NN1.getNode());
1221       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1222     }
1223     return Op;
1224   }
1225   return SDValue();
1226 }
1227
1228 /// Promote the specified integer shift operation if the target indicates it is
1229 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1230 /// i32 since i16 instructions are longer.
1231 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1232   if (!LegalOperations)
1233     return SDValue();
1234
1235   EVT VT = Op.getValueType();
1236   if (VT.isVector() || !VT.isInteger())
1237     return SDValue();
1238
1239   // If operation type is 'undesirable', e.g. i16 on x86, consider
1240   // promoting it.
1241   unsigned Opc = Op.getOpcode();
1242   if (TLI.isTypeDesirableForOp(Opc, VT))
1243     return SDValue();
1244
1245   EVT PVT = VT;
1246   // Consult target whether it is a good idea to promote this operation and
1247   // what's the right type to promote it to.
1248   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1249     assert(PVT != VT && "Don't know what type to promote to!");
1250
1251     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1252
1253     bool Replace = false;
1254     SDValue N0 = Op.getOperand(0);
1255     SDValue N1 = Op.getOperand(1);
1256     if (Opc == ISD::SRA)
1257       N0 = SExtPromoteOperand(N0, PVT);
1258     else if (Opc == ISD::SRL)
1259       N0 = ZExtPromoteOperand(N0, PVT);
1260     else
1261       N0 = PromoteOperand(N0, PVT, Replace);
1262
1263     if (!N0.getNode())
1264       return SDValue();
1265
1266     SDLoc DL(Op);
1267     SDValue RV =
1268         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1269
1270     if (Replace)
1271       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1272
1273     // Deal with Op being deleted.
1274     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1275       return RV;
1276   }
1277   return SDValue();
1278 }
1279
1280 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1281   if (!LegalOperations)
1282     return SDValue();
1283
1284   EVT VT = Op.getValueType();
1285   if (VT.isVector() || !VT.isInteger())
1286     return SDValue();
1287
1288   // If operation type is 'undesirable', e.g. i16 on x86, consider
1289   // promoting it.
1290   unsigned Opc = Op.getOpcode();
1291   if (TLI.isTypeDesirableForOp(Opc, VT))
1292     return SDValue();
1293
1294   EVT PVT = VT;
1295   // Consult target whether it is a good idea to promote this operation and
1296   // what's the right type to promote it to.
1297   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1298     assert(PVT != VT && "Don't know what type to promote to!");
1299     // fold (aext (aext x)) -> (aext x)
1300     // fold (aext (zext x)) -> (zext x)
1301     // fold (aext (sext x)) -> (sext x)
1302     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1303     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1304   }
1305   return SDValue();
1306 }
1307
1308 bool DAGCombiner::PromoteLoad(SDValue Op) {
1309   if (!LegalOperations)
1310     return false;
1311
1312   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1313     return false;
1314
1315   EVT VT = Op.getValueType();
1316   if (VT.isVector() || !VT.isInteger())
1317     return false;
1318
1319   // If operation type is 'undesirable', e.g. i16 on x86, consider
1320   // promoting it.
1321   unsigned Opc = Op.getOpcode();
1322   if (TLI.isTypeDesirableForOp(Opc, VT))
1323     return false;
1324
1325   EVT PVT = VT;
1326   // Consult target whether it is a good idea to promote this operation and
1327   // what's the right type to promote it to.
1328   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1329     assert(PVT != VT && "Don't know what type to promote to!");
1330
1331     SDLoc DL(Op);
1332     SDNode *N = Op.getNode();
1333     LoadSDNode *LD = cast<LoadSDNode>(N);
1334     EVT MemVT = LD->getMemoryVT();
1335     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1336                                                       : LD->getExtensionType();
1337     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1338                                    LD->getChain(), LD->getBasePtr(),
1339                                    MemVT, LD->getMemOperand());
1340     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1341
1342     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1343                Result.getNode()->dump(&DAG); dbgs() << '\n');
1344     WorklistRemover DeadNodes(*this);
1345     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1346     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1347     deleteAndRecombine(N);
1348     AddToWorklist(Result.getNode());
1349     return true;
1350   }
1351   return false;
1352 }
1353
1354 /// Recursively delete a node which has no uses and any operands for
1355 /// which it is the only use.
1356 ///
1357 /// Note that this both deletes the nodes and removes them from the worklist.
1358 /// It also adds any nodes who have had a user deleted to the worklist as they
1359 /// may now have only one use and subject to other combines.
1360 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1361   if (!N->use_empty())
1362     return false;
1363
1364   SmallSetVector<SDNode *, 16> Nodes;
1365   Nodes.insert(N);
1366   do {
1367     N = Nodes.pop_back_val();
1368     if (!N)
1369       continue;
1370
1371     if (N->use_empty()) {
1372       for (const SDValue &ChildN : N->op_values())
1373         Nodes.insert(ChildN.getNode());
1374
1375       removeFromWorklist(N);
1376       DAG.DeleteNode(N);
1377     } else {
1378       AddToWorklist(N);
1379     }
1380   } while (!Nodes.empty());
1381   return true;
1382 }
1383
1384 //===----------------------------------------------------------------------===//
1385 //  Main DAG Combiner implementation
1386 //===----------------------------------------------------------------------===//
1387
1388 void DAGCombiner::Run(CombineLevel AtLevel) {
1389   // set the instance variables, so that the various visit routines may use it.
1390   Level = AtLevel;
1391   LegalOperations = Level >= AfterLegalizeVectorOps;
1392   LegalTypes = Level >= AfterLegalizeTypes;
1393
1394   WorklistInserter AddNodes(*this);
1395
1396   // Add all the dag nodes to the worklist.
1397   for (SDNode &Node : DAG.allnodes())
1398     AddToWorklist(&Node);
1399
1400   // Create a dummy node (which is not added to allnodes), that adds a reference
1401   // to the root node, preventing it from being deleted, and tracking any
1402   // changes of the root.
1403   HandleSDNode Dummy(DAG.getRoot());
1404
1405   // While we have a valid worklist entry node, try to combine it.
1406   while (SDNode *N = getNextWorklistEntry()) {
1407     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1408     // N is deleted from the DAG, since they too may now be dead or may have a
1409     // reduced number of uses, allowing other xforms.
1410     if (recursivelyDeleteUnusedNodes(N))
1411       continue;
1412
1413     WorklistRemover DeadNodes(*this);
1414
1415     // If this combine is running after legalizing the DAG, re-legalize any
1416     // nodes pulled off the worklist.
1417     if (Level == AfterLegalizeDAG) {
1418       SmallSetVector<SDNode *, 16> UpdatedNodes;
1419       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1420
1421       for (SDNode *LN : UpdatedNodes) {
1422         AddUsersToWorklist(LN);
1423         AddToWorklist(LN);
1424       }
1425       if (!NIsValid)
1426         continue;
1427     }
1428
1429     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1430
1431     // Add any operands of the new node which have not yet been combined to the
1432     // worklist as well. Because the worklist uniques things already, this
1433     // won't repeatedly process the same operand.
1434     CombinedNodes.insert(N);
1435     for (const SDValue &ChildN : N->op_values())
1436       if (!CombinedNodes.count(ChildN.getNode()))
1437         AddToWorklist(ChildN.getNode());
1438
1439     SDValue RV = combine(N);
1440
1441     if (!RV.getNode())
1442       continue;
1443
1444     ++NodesCombined;
1445
1446     // If we get back the same node we passed in, rather than a new node or
1447     // zero, we know that the node must have defined multiple values and
1448     // CombineTo was used.  Since CombineTo takes care of the worklist
1449     // mechanics for us, we have no work to do in this case.
1450     if (RV.getNode() == N)
1451       continue;
1452
1453     assert(N->getOpcode() != ISD::DELETED_NODE &&
1454            RV.getOpcode() != ISD::DELETED_NODE &&
1455            "Node was deleted but visit returned new node!");
1456
1457     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1458
1459     if (N->getNumValues() == RV.getNode()->getNumValues())
1460       DAG.ReplaceAllUsesWith(N, RV.getNode());
1461     else {
1462       assert(N->getValueType(0) == RV.getValueType() &&
1463              N->getNumValues() == 1 && "Type mismatch");
1464       DAG.ReplaceAllUsesWith(N, &RV);
1465     }
1466
1467     // Push the new node and any users onto the worklist
1468     AddToWorklist(RV.getNode());
1469     AddUsersToWorklist(RV.getNode());
1470
1471     // Finally, if the node is now dead, remove it from the graph.  The node
1472     // may not be dead if the replacement process recursively simplified to
1473     // something else needing this node. This will also take care of adding any
1474     // operands which have lost a user to the worklist.
1475     recursivelyDeleteUnusedNodes(N);
1476   }
1477
1478   // If the root changed (e.g. it was a dead load, update the root).
1479   DAG.setRoot(Dummy.getValue());
1480   DAG.RemoveDeadNodes();
1481 }
1482
1483 SDValue DAGCombiner::visit(SDNode *N) {
1484   switch (N->getOpcode()) {
1485   default: break;
1486   case ISD::TokenFactor:        return visitTokenFactor(N);
1487   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1488   case ISD::ADD:                return visitADD(N);
1489   case ISD::SUB:                return visitSUB(N);
1490   case ISD::SADDSAT:
1491   case ISD::UADDSAT:            return visitADDSAT(N);
1492   case ISD::SSUBSAT:
1493   case ISD::USUBSAT:            return visitSUBSAT(N);
1494   case ISD::ADDC:               return visitADDC(N);
1495   case ISD::SADDO:
1496   case ISD::UADDO:              return visitADDO(N);
1497   case ISD::SUBC:               return visitSUBC(N);
1498   case ISD::SSUBO:
1499   case ISD::USUBO:              return visitSUBO(N);
1500   case ISD::ADDE:               return visitADDE(N);
1501   case ISD::ADDCARRY:           return visitADDCARRY(N);
1502   case ISD::SUBE:               return visitSUBE(N);
1503   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1504   case ISD::SMULFIX:
1505   case ISD::SMULFIXSAT:
1506   case ISD::UMULFIX:
1507   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1508   case ISD::MUL:                return visitMUL(N);
1509   case ISD::SDIV:               return visitSDIV(N);
1510   case ISD::UDIV:               return visitUDIV(N);
1511   case ISD::SREM:
1512   case ISD::UREM:               return visitREM(N);
1513   case ISD::MULHU:              return visitMULHU(N);
1514   case ISD::MULHS:              return visitMULHS(N);
1515   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1516   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1517   case ISD::SMULO:
1518   case ISD::UMULO:              return visitMULO(N);
1519   case ISD::SMIN:
1520   case ISD::SMAX:
1521   case ISD::UMIN:
1522   case ISD::UMAX:               return visitIMINMAX(N);
1523   case ISD::AND:                return visitAND(N);
1524   case ISD::OR:                 return visitOR(N);
1525   case ISD::XOR:                return visitXOR(N);
1526   case ISD::SHL:                return visitSHL(N);
1527   case ISD::SRA:                return visitSRA(N);
1528   case ISD::SRL:                return visitSRL(N);
1529   case ISD::ROTR:
1530   case ISD::ROTL:               return visitRotate(N);
1531   case ISD::FSHL:
1532   case ISD::FSHR:               return visitFunnelShift(N);
1533   case ISD::ABS:                return visitABS(N);
1534   case ISD::BSWAP:              return visitBSWAP(N);
1535   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1536   case ISD::CTLZ:               return visitCTLZ(N);
1537   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1538   case ISD::CTTZ:               return visitCTTZ(N);
1539   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1540   case ISD::CTPOP:              return visitCTPOP(N);
1541   case ISD::SELECT:             return visitSELECT(N);
1542   case ISD::VSELECT:            return visitVSELECT(N);
1543   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1544   case ISD::SETCC:              return visitSETCC(N);
1545   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1546   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1547   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1548   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1549   case ISD::AssertSext:
1550   case ISD::AssertZext:         return visitAssertExt(N);
1551   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1552   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1553   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1554   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1555   case ISD::BITCAST:            return visitBITCAST(N);
1556   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1557   case ISD::FADD:               return visitFADD(N);
1558   case ISD::FSUB:               return visitFSUB(N);
1559   case ISD::FMUL:               return visitFMUL(N);
1560   case ISD::FMA:                return visitFMA(N);
1561   case ISD::FDIV:               return visitFDIV(N);
1562   case ISD::FREM:               return visitFREM(N);
1563   case ISD::FSQRT:              return visitFSQRT(N);
1564   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1565   case ISD::FPOW:               return visitFPOW(N);
1566   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1567   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1568   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1569   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1570   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1571   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1572   case ISD::FNEG:               return visitFNEG(N);
1573   case ISD::FABS:               return visitFABS(N);
1574   case ISD::FFLOOR:             return visitFFLOOR(N);
1575   case ISD::FMINNUM:            return visitFMINNUM(N);
1576   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1577   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1578   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1579   case ISD::FCEIL:              return visitFCEIL(N);
1580   case ISD::FTRUNC:             return visitFTRUNC(N);
1581   case ISD::BRCOND:             return visitBRCOND(N);
1582   case ISD::BR_CC:              return visitBR_CC(N);
1583   case ISD::LOAD:               return visitLOAD(N);
1584   case ISD::STORE:              return visitSTORE(N);
1585   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1586   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1587   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1588   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1589   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1590   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1591   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1592   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1593   case ISD::MGATHER:            return visitMGATHER(N);
1594   case ISD::MLOAD:              return visitMLOAD(N);
1595   case ISD::MSCATTER:           return visitMSCATTER(N);
1596   case ISD::MSTORE:             return visitMSTORE(N);
1597   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1598   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1599   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1600   case ISD::VECREDUCE_FADD:
1601   case ISD::VECREDUCE_FMUL:
1602   case ISD::VECREDUCE_ADD:
1603   case ISD::VECREDUCE_MUL:
1604   case ISD::VECREDUCE_AND:
1605   case ISD::VECREDUCE_OR:
1606   case ISD::VECREDUCE_XOR:
1607   case ISD::VECREDUCE_SMAX:
1608   case ISD::VECREDUCE_SMIN:
1609   case ISD::VECREDUCE_UMAX:
1610   case ISD::VECREDUCE_UMIN:
1611   case ISD::VECREDUCE_FMAX:
1612   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1613   }
1614   return SDValue();
1615 }
1616
1617 SDValue DAGCombiner::combine(SDNode *N) {
1618   SDValue RV = visit(N);
1619
1620   // If nothing happened, try a target-specific DAG combine.
1621   if (!RV.getNode()) {
1622     assert(N->getOpcode() != ISD::DELETED_NODE &&
1623            "Node was deleted but visit returned NULL!");
1624
1625     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1626         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1627
1628       // Expose the DAG combiner to the target combiner impls.
1629       TargetLowering::DAGCombinerInfo
1630         DagCombineInfo(DAG, Level, false, this);
1631
1632       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1633     }
1634   }
1635
1636   // If nothing happened still, try promoting the operation.
1637   if (!RV.getNode()) {
1638     switch (N->getOpcode()) {
1639     default: break;
1640     case ISD::ADD:
1641     case ISD::SUB:
1642     case ISD::MUL:
1643     case ISD::AND:
1644     case ISD::OR:
1645     case ISD::XOR:
1646       RV = PromoteIntBinOp(SDValue(N, 0));
1647       break;
1648     case ISD::SHL:
1649     case ISD::SRA:
1650     case ISD::SRL:
1651       RV = PromoteIntShiftOp(SDValue(N, 0));
1652       break;
1653     case ISD::SIGN_EXTEND:
1654     case ISD::ZERO_EXTEND:
1655     case ISD::ANY_EXTEND:
1656       RV = PromoteExtend(SDValue(N, 0));
1657       break;
1658     case ISD::LOAD:
1659       if (PromoteLoad(SDValue(N, 0)))
1660         RV = SDValue(N, 0);
1661       break;
1662     }
1663   }
1664
1665   // If N is a commutative binary node, try to eliminate it if the commuted
1666   // version is already present in the DAG.
1667   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1668       N->getNumValues() == 1) {
1669     SDValue N0 = N->getOperand(0);
1670     SDValue N1 = N->getOperand(1);
1671
1672     // Constant operands are canonicalized to RHS.
1673     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1674       SDValue Ops[] = {N1, N0};
1675       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1676                                             N->getFlags());
1677       if (CSENode)
1678         return SDValue(CSENode, 0);
1679     }
1680   }
1681
1682   return RV;
1683 }
1684
1685 /// Given a node, return its input chain if it has one, otherwise return a null
1686 /// sd operand.
1687 static SDValue getInputChainForNode(SDNode *N) {
1688   if (unsigned NumOps = N->getNumOperands()) {
1689     if (N->getOperand(0).getValueType() == MVT::Other)
1690       return N->getOperand(0);
1691     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1692       return N->getOperand(NumOps-1);
1693     for (unsigned i = 1; i < NumOps-1; ++i)
1694       if (N->getOperand(i).getValueType() == MVT::Other)
1695         return N->getOperand(i);
1696   }
1697   return SDValue();
1698 }
1699
1700 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1701   // If N has two operands, where one has an input chain equal to the other,
1702   // the 'other' chain is redundant.
1703   if (N->getNumOperands() == 2) {
1704     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1705       return N->getOperand(0);
1706     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1707       return N->getOperand(1);
1708   }
1709
1710   // Don't simplify token factors if optnone.
1711   if (OptLevel == CodeGenOpt::None)
1712     return SDValue();
1713
1714   // If the sole user is a token factor, we should make sure we have a
1715   // chance to merge them together. This prevents TF chains from inhibiting
1716   // optimizations.
1717   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1718     AddToWorklist(*(N->use_begin()));
1719
1720   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1721   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1722   SmallPtrSet<SDNode*, 16> SeenOps;
1723   bool Changed = false;             // If we should replace this token factor.
1724
1725   // Start out with this token factor.
1726   TFs.push_back(N);
1727
1728   // Iterate through token factors.  The TFs grows when new token factors are
1729   // encountered.
1730   for (unsigned i = 0; i < TFs.size(); ++i) {
1731     // Limit number of nodes to inline, to avoid quadratic compile times.
1732     // We have to add the outstanding Token Factors to Ops, otherwise we might
1733     // drop Ops from the resulting Token Factors.
1734     if (Ops.size() > TokenFactorInlineLimit) {
1735       for (unsigned j = i; j < TFs.size(); j++)
1736         Ops.emplace_back(TFs[j], 0);
1737       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1738       // combiner worklist later.
1739       TFs.resize(i);
1740       break;
1741     }
1742
1743     SDNode *TF = TFs[i];
1744     // Check each of the operands.
1745     for (const SDValue &Op : TF->op_values()) {
1746       switch (Op.getOpcode()) {
1747       case ISD::EntryToken:
1748         // Entry tokens don't need to be added to the list. They are
1749         // redundant.
1750         Changed = true;
1751         break;
1752
1753       case ISD::TokenFactor:
1754         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1755           // Queue up for processing.
1756           TFs.push_back(Op.getNode());
1757           Changed = true;
1758           break;
1759         }
1760         LLVM_FALLTHROUGH;
1761
1762       default:
1763         // Only add if it isn't already in the list.
1764         if (SeenOps.insert(Op.getNode()).second)
1765           Ops.push_back(Op);
1766         else
1767           Changed = true;
1768         break;
1769       }
1770     }
1771   }
1772
1773   // Re-visit inlined Token Factors, to clean them up in case they have been
1774   // removed. Skip the first Token Factor, as this is the current node.
1775   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1776     AddToWorklist(TFs[i]);
1777
1778   // Remove Nodes that are chained to another node in the list. Do so
1779   // by walking up chains breath-first stopping when we've seen
1780   // another operand. In general we must climb to the EntryNode, but we can exit
1781   // early if we find all remaining work is associated with just one operand as
1782   // no further pruning is possible.
1783
1784   // List of nodes to search through and original Ops from which they originate.
1785   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1786   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1787   SmallPtrSet<SDNode *, 16> SeenChains;
1788   bool DidPruneOps = false;
1789
1790   unsigned NumLeftToConsider = 0;
1791   for (const SDValue &Op : Ops) {
1792     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1793     OpWorkCount.push_back(1);
1794   }
1795
1796   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1797     // If this is an Op, we can remove the op from the list. Remark any
1798     // search associated with it as from the current OpNumber.
1799     if (SeenOps.count(Op) != 0) {
1800       Changed = true;
1801       DidPruneOps = true;
1802       unsigned OrigOpNumber = 0;
1803       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1804         OrigOpNumber++;
1805       assert((OrigOpNumber != Ops.size()) &&
1806              "expected to find TokenFactor Operand");
1807       // Re-mark worklist from OrigOpNumber to OpNumber
1808       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1809         if (Worklist[i].second == OrigOpNumber) {
1810           Worklist[i].second = OpNumber;
1811         }
1812       }
1813       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1814       OpWorkCount[OrigOpNumber] = 0;
1815       NumLeftToConsider--;
1816     }
1817     // Add if it's a new chain
1818     if (SeenChains.insert(Op).second) {
1819       OpWorkCount[OpNumber]++;
1820       Worklist.push_back(std::make_pair(Op, OpNumber));
1821     }
1822   };
1823
1824   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1825     // We need at least be consider at least 2 Ops to prune.
1826     if (NumLeftToConsider <= 1)
1827       break;
1828     auto CurNode = Worklist[i].first;
1829     auto CurOpNumber = Worklist[i].second;
1830     assert((OpWorkCount[CurOpNumber] > 0) &&
1831            "Node should not appear in worklist");
1832     switch (CurNode->getOpcode()) {
1833     case ISD::EntryToken:
1834       // Hitting EntryToken is the only way for the search to terminate without
1835       // hitting
1836       // another operand's search. Prevent us from marking this operand
1837       // considered.
1838       NumLeftToConsider++;
1839       break;
1840     case ISD::TokenFactor:
1841       for (const SDValue &Op : CurNode->op_values())
1842         AddToWorklist(i, Op.getNode(), CurOpNumber);
1843       break;
1844     case ISD::LIFETIME_START:
1845     case ISD::LIFETIME_END:
1846     case ISD::CopyFromReg:
1847     case ISD::CopyToReg:
1848       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1849       break;
1850     default:
1851       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1852         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1853       break;
1854     }
1855     OpWorkCount[CurOpNumber]--;
1856     if (OpWorkCount[CurOpNumber] == 0)
1857       NumLeftToConsider--;
1858   }
1859
1860   // If we've changed things around then replace token factor.
1861   if (Changed) {
1862     SDValue Result;
1863     if (Ops.empty()) {
1864       // The entry token is the only possible outcome.
1865       Result = DAG.getEntryNode();
1866     } else {
1867       if (DidPruneOps) {
1868         SmallVector<SDValue, 8> PrunedOps;
1869         //
1870         for (const SDValue &Op : Ops) {
1871           if (SeenChains.count(Op.getNode()) == 0)
1872             PrunedOps.push_back(Op);
1873         }
1874         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1875       } else {
1876         Result = DAG.getTokenFactor(SDLoc(N), Ops);
1877       }
1878     }
1879     return Result;
1880   }
1881   return SDValue();
1882 }
1883
1884 /// MERGE_VALUES can always be eliminated.
1885 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1886   WorklistRemover DeadNodes(*this);
1887   // Replacing results may cause a different MERGE_VALUES to suddenly
1888   // be CSE'd with N, and carry its uses with it. Iterate until no
1889   // uses remain, to ensure that the node can be safely deleted.
1890   // First add the users of this node to the work list so that they
1891   // can be tried again once they have new operands.
1892   AddUsersToWorklist(N);
1893   do {
1894     // Do as a single replacement to avoid rewalking use lists.
1895     SmallVector<SDValue, 8> Ops;
1896     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1897       Ops.push_back(N->getOperand(i));
1898     DAG.ReplaceAllUsesWith(N, Ops.data());
1899   } while (!N->use_empty());
1900   deleteAndRecombine(N);
1901   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1902 }
1903
1904 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1905 /// ConstantSDNode pointer else nullptr.
1906 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1907   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1908   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1909 }
1910
1911 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1912   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
1913          "Unexpected binary operator");
1914
1915   // Don't do this unless the old select is going away. We want to eliminate the
1916   // binary operator, not replace a binop with a select.
1917   // TODO: Handle ISD::SELECT_CC.
1918   unsigned SelOpNo = 0;
1919   SDValue Sel = BO->getOperand(0);
1920   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1921     SelOpNo = 1;
1922     Sel = BO->getOperand(1);
1923   }
1924
1925   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1926     return SDValue();
1927
1928   SDValue CT = Sel.getOperand(1);
1929   if (!isConstantOrConstantVector(CT, true) &&
1930       !isConstantFPBuildVectorOrConstantFP(CT))
1931     return SDValue();
1932
1933   SDValue CF = Sel.getOperand(2);
1934   if (!isConstantOrConstantVector(CF, true) &&
1935       !isConstantFPBuildVectorOrConstantFP(CF))
1936     return SDValue();
1937
1938   // Bail out if any constants are opaque because we can't constant fold those.
1939   // The exception is "and" and "or" with either 0 or -1 in which case we can
1940   // propagate non constant operands into select. I.e.:
1941   // and (select Cond, 0, -1), X --> select Cond, 0, X
1942   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1943   auto BinOpcode = BO->getOpcode();
1944   bool CanFoldNonConst =
1945       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1946       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1947       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
1948
1949   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1950   if (!CanFoldNonConst &&
1951       !isConstantOrConstantVector(CBO, true) &&
1952       !isConstantFPBuildVectorOrConstantFP(CBO))
1953     return SDValue();
1954
1955   EVT VT = Sel.getValueType();
1956
1957   // In case of shift value and shift amount may have different VT. For instance
1958   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1959   // swapped operands and value types do not match. NB: x86 is fine if operands
1960   // are not swapped with shift amount VT being not bigger than shifted value.
1961   // TODO: that is possible to check for a shift operation, correct VTs and
1962   // still perform optimization on x86 if needed.
1963   if (SelOpNo && VT != CBO.getValueType())
1964     return SDValue();
1965
1966   // We have a select-of-constants followed by a binary operator with a
1967   // constant. Eliminate the binop by pulling the constant math into the select.
1968   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1969   SDLoc DL(Sel);
1970   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1971                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1972   if (!CanFoldNonConst && !NewCT.isUndef() &&
1973       !isConstantOrConstantVector(NewCT, true) &&
1974       !isConstantFPBuildVectorOrConstantFP(NewCT))
1975     return SDValue();
1976
1977   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1978                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1979   if (!CanFoldNonConst && !NewCF.isUndef() &&
1980       !isConstantOrConstantVector(NewCF, true) &&
1981       !isConstantFPBuildVectorOrConstantFP(NewCF))
1982     return SDValue();
1983
1984   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1985   SelectOp->setFlags(BO->getFlags());
1986   return SelectOp;
1987 }
1988
1989 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1990   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1991          "Expecting add or sub");
1992
1993   // Match a constant operand and a zext operand for the math instruction:
1994   // add Z, C
1995   // sub C, Z
1996   bool IsAdd = N->getOpcode() == ISD::ADD;
1997   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
1998   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
1999   auto *CN = dyn_cast<ConstantSDNode>(C);
2000   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2001     return SDValue();
2002
2003   // Match the zext operand as a setcc of a boolean.
2004   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2005       Z.getOperand(0).getValueType() != MVT::i1)
2006     return SDValue();
2007
2008   // Match the compare as: setcc (X & 1), 0, eq.
2009   SDValue SetCC = Z.getOperand(0);
2010   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2011   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2012       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2013       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2014     return SDValue();
2015
2016   // We are adding/subtracting a constant and an inverted low bit. Turn that
2017   // into a subtract/add of the low bit with incremented/decremented constant:
2018   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2019   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2020   EVT VT = C.getValueType();
2021   SDLoc DL(N);
2022   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2023   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2024                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2025   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2026 }
2027
2028 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2029 /// a shift and add with a different constant.
2030 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2031   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2032          "Expecting add or sub");
2033
2034   // We need a constant operand for the add/sub, and the other operand is a
2035   // logical shift right: add (srl), C or sub C, (srl).
2036   // TODO - support non-uniform vector amounts.
2037   bool IsAdd = N->getOpcode() == ISD::ADD;
2038   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2039   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2040   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2041   if (!C || ShiftOp.getOpcode() != ISD::SRL)
2042     return SDValue();
2043
2044   // The shift must be of a 'not' value.
2045   SDValue Not = ShiftOp.getOperand(0);
2046   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2047     return SDValue();
2048
2049   // The shift must be moving the sign bit to the least-significant-bit.
2050   EVT VT = ShiftOp.getValueType();
2051   SDValue ShAmt = ShiftOp.getOperand(1);
2052   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2053   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2054     return SDValue();
2055
2056   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2057   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2058   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2059   SDLoc DL(N);
2060   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2061   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2062   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2063   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2064 }
2065
2066 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2067 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2068 /// are no common bits set in the operands).
2069 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2070   SDValue N0 = N->getOperand(0);
2071   SDValue N1 = N->getOperand(1);
2072   EVT VT = N0.getValueType();
2073   SDLoc DL(N);
2074
2075   // fold vector ops
2076   if (VT.isVector()) {
2077     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2078       return FoldedVOp;
2079
2080     // fold (add x, 0) -> x, vector edition
2081     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2082       return N0;
2083     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2084       return N1;
2085   }
2086
2087   // fold (add x, undef) -> undef
2088   if (N0.isUndef())
2089     return N0;
2090
2091   if (N1.isUndef())
2092     return N1;
2093
2094   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2095     // canonicalize constant to RHS
2096     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2097       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2098     // fold (add c1, c2) -> c1+c2
2099     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2100                                       N1.getNode());
2101   }
2102
2103   // fold (add x, 0) -> x
2104   if (isNullConstant(N1))
2105     return N0;
2106
2107   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2108     // fold ((A-c1)+c2) -> (A+(c2-c1))
2109     if (N0.getOpcode() == ISD::SUB &&
2110         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2111       SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2112                                                N0.getOperand(1).getNode());
2113       assert(Sub && "Constant folding failed");
2114       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2115     }
2116
2117     // fold ((c1-A)+c2) -> (c1+c2)-A
2118     if (N0.getOpcode() == ISD::SUB &&
2119         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2120       SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
2121                                                N0.getOperand(0).getNode());
2122       assert(Add && "Constant folding failed");
2123       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2124     }
2125
2126     // add (sext i1 X), 1 -> zext (not i1 X)
2127     // We don't transform this pattern:
2128     //   add (zext i1 X), -1 -> sext (not i1 X)
2129     // because most (?) targets generate better code for the zext form.
2130     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2131         isOneOrOneSplat(N1)) {
2132       SDValue X = N0.getOperand(0);
2133       if ((!LegalOperations ||
2134            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2135             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2136           X.getScalarValueSizeInBits() == 1) {
2137         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2138         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2139       }
2140     }
2141
2142     // Undo the add -> or combine to merge constant offsets from a frame index.
2143     if (N0.getOpcode() == ISD::OR &&
2144         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2145         isa<ConstantSDNode>(N0.getOperand(1)) &&
2146         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2147       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2148       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2149     }
2150   }
2151
2152   if (SDValue NewSel = foldBinOpIntoSelect(N))
2153     return NewSel;
2154
2155   // reassociate add
2156   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2157     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2158       return RADD;
2159   }
2160   // fold ((0-A) + B) -> B-A
2161   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2162     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2163
2164   // fold (A + (0-B)) -> A-B
2165   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2166     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2167
2168   // fold (A+(B-A)) -> B
2169   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2170     return N1.getOperand(0);
2171
2172   // fold ((B-A)+A) -> B
2173   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2174     return N0.getOperand(0);
2175
2176   // fold ((A-B)+(C-A)) -> (C-B)
2177   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2178       N0.getOperand(0) == N1.getOperand(1))
2179     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2180                        N0.getOperand(1));
2181
2182   // fold ((A-B)+(B-C)) -> (A-C)
2183   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2184       N0.getOperand(1) == N1.getOperand(0))
2185     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2186                        N1.getOperand(1));
2187
2188   // fold (A+(B-(A+C))) to (B-C)
2189   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2190       N0 == N1.getOperand(1).getOperand(0))
2191     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2192                        N1.getOperand(1).getOperand(1));
2193
2194   // fold (A+(B-(C+A))) to (B-C)
2195   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2196       N0 == N1.getOperand(1).getOperand(1))
2197     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2198                        N1.getOperand(1).getOperand(0));
2199
2200   // fold (A+((B-A)+or-C)) to (B+or-C)
2201   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2202       N1.getOperand(0).getOpcode() == ISD::SUB &&
2203       N0 == N1.getOperand(0).getOperand(1))
2204     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2205                        N1.getOperand(1));
2206
2207   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2208   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2209     SDValue N00 = N0.getOperand(0);
2210     SDValue N01 = N0.getOperand(1);
2211     SDValue N10 = N1.getOperand(0);
2212     SDValue N11 = N1.getOperand(1);
2213
2214     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2215       return DAG.getNode(ISD::SUB, DL, VT,
2216                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2217                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2218   }
2219
2220   // fold (add (umax X, C), -C) --> (usubsat X, C)
2221   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2222     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2223       return (!Max && !Op) ||
2224              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2225     };
2226     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2227                                   /*AllowUndefs*/ true))
2228       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2229                          N0.getOperand(1));
2230   }
2231
2232   if (SimplifyDemandedBits(SDValue(N, 0)))
2233     return SDValue(N, 0);
2234
2235   if (isOneOrOneSplat(N1)) {
2236     // fold (add (xor a, -1), 1) -> (sub 0, a)
2237     if (isBitwiseNot(N0))
2238       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2239                          N0.getOperand(0));
2240
2241     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2242     if (N0.getOpcode() == ISD::ADD ||
2243         N0.getOpcode() == ISD::UADDO ||
2244         N0.getOpcode() == ISD::SADDO) {
2245       SDValue A, Xor;
2246
2247       if (isBitwiseNot(N0.getOperand(0))) {
2248         A = N0.getOperand(1);
2249         Xor = N0.getOperand(0);
2250       } else if (isBitwiseNot(N0.getOperand(1))) {
2251         A = N0.getOperand(0);
2252         Xor = N0.getOperand(1);
2253       }
2254
2255       if (Xor)
2256         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2257     }
2258
2259     // Look for:
2260     //   add (add x, y), 1
2261     // And if the target does not like this form then turn into:
2262     //   sub y, (xor x, -1)
2263     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2264         N0.getOpcode() == ISD::ADD) {
2265       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2266                                 DAG.getAllOnesConstant(DL, VT));
2267       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2268     }
2269   }
2270
2271   // (x - y) + -1  ->  add (xor y, -1), x
2272   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2273       isAllOnesOrAllOnesSplat(N1)) {
2274     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2275     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2276   }
2277
2278   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2279     return Combined;
2280
2281   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2282     return Combined;
2283
2284   return SDValue();
2285 }
2286
2287 SDValue DAGCombiner::visitADD(SDNode *N) {
2288   SDValue N0 = N->getOperand(0);
2289   SDValue N1 = N->getOperand(1);
2290   EVT VT = N0.getValueType();
2291   SDLoc DL(N);
2292
2293   if (SDValue Combined = visitADDLike(N))
2294     return Combined;
2295
2296   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2297     return V;
2298
2299   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2300     return V;
2301
2302   // fold (a+b) -> (a|b) iff a and b share no bits.
2303   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2304       DAG.haveNoCommonBitsSet(N0, N1))
2305     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2306
2307   return SDValue();
2308 }
2309
2310 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2311   unsigned Opcode = N->getOpcode();
2312   SDValue N0 = N->getOperand(0);
2313   SDValue N1 = N->getOperand(1);
2314   EVT VT = N0.getValueType();
2315   SDLoc DL(N);
2316
2317   // fold vector ops
2318   if (VT.isVector()) {
2319     // TODO SimplifyVBinOp
2320
2321     // fold (add_sat x, 0) -> x, vector edition
2322     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2323       return N0;
2324     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2325       return N1;
2326   }
2327
2328   // fold (add_sat x, undef) -> -1
2329   if (N0.isUndef() || N1.isUndef())
2330     return DAG.getAllOnesConstant(DL, VT);
2331
2332   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2333     // canonicalize constant to RHS
2334     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2335       return DAG.getNode(Opcode, DL, VT, N1, N0);
2336     // fold (add_sat c1, c2) -> c3
2337     return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2338                                       N1.getNode());
2339   }
2340
2341   // fold (add_sat x, 0) -> x
2342   if (isNullConstant(N1))
2343     return N0;
2344
2345   // If it cannot overflow, transform into an add.
2346   if (Opcode == ISD::UADDSAT)
2347     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2348       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2349
2350   return SDValue();
2351 }
2352
2353 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2354   bool Masked = false;
2355
2356   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2357   while (true) {
2358     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2359       V = V.getOperand(0);
2360       continue;
2361     }
2362
2363     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2364       Masked = true;
2365       V = V.getOperand(0);
2366       continue;
2367     }
2368
2369     break;
2370   }
2371
2372   // If this is not a carry, return.
2373   if (V.getResNo() != 1)
2374     return SDValue();
2375
2376   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2377       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2378     return SDValue();
2379
2380   EVT VT = V.getNode()->getValueType(0);
2381   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2382     return SDValue();
2383
2384   // If the result is masked, then no matter what kind of bool it is we can
2385   // return. If it isn't, then we need to make sure the bool type is either 0 or
2386   // 1 and not other values.
2387   if (Masked ||
2388       TLI.getBooleanContents(V.getValueType()) ==
2389           TargetLoweringBase::ZeroOrOneBooleanContent)
2390     return V;
2391
2392   return SDValue();
2393 }
2394
2395 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2396 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2397 /// the opcode and bypass the mask operation.
2398 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2399                                  SelectionDAG &DAG, const SDLoc &DL) {
2400   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2401     return SDValue();
2402
2403   EVT VT = N0.getValueType();
2404   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2405     return SDValue();
2406
2407   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2408   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2409   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2410 }
2411
2412 /// Helper for doing combines based on N0 and N1 being added to each other.
2413 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2414                                           SDNode *LocReference) {
2415   EVT VT = N0.getValueType();
2416   SDLoc DL(LocReference);
2417
2418   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2419   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2420       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2421     return DAG.getNode(ISD::SUB, DL, VT, N0,
2422                        DAG.getNode(ISD::SHL, DL, VT,
2423                                    N1.getOperand(0).getOperand(1),
2424                                    N1.getOperand(1)));
2425
2426   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2427     return V;
2428
2429   // Look for:
2430   //   add (add x, 1), y
2431   // And if the target does not like this form then turn into:
2432   //   sub y, (xor x, -1)
2433   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2434       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2435     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2436                               DAG.getAllOnesConstant(DL, VT));
2437     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2438   }
2439
2440   // Hoist one-use subtraction by non-opaque constant:
2441   //   (x - C) + y  ->  (x + y) - C
2442   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2443   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2444       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2445     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2446     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2447   }
2448   // Hoist one-use subtraction from non-opaque constant:
2449   //   (C - x) + y  ->  (y - x) + C
2450   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2451       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2452     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2453     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2454   }
2455
2456   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2457   // rather than 'add 0/-1' (the zext should get folded).
2458   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2459   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2460       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2461       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2462     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2463     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2464   }
2465
2466   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2467   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2468     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2469     if (TN->getVT() == MVT::i1) {
2470       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2471                                  DAG.getConstant(1, DL, VT));
2472       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2473     }
2474   }
2475
2476   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2477   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2478       N1.getResNo() == 0)
2479     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2480                        N0, N1.getOperand(0), N1.getOperand(2));
2481
2482   // (add X, Carry) -> (addcarry X, 0, Carry)
2483   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2484     if (SDValue Carry = getAsCarry(TLI, N1))
2485       return DAG.getNode(ISD::ADDCARRY, DL,
2486                          DAG.getVTList(VT, Carry.getValueType()), N0,
2487                          DAG.getConstant(0, DL, VT), Carry);
2488
2489   return SDValue();
2490 }
2491
2492 SDValue DAGCombiner::visitADDC(SDNode *N) {
2493   SDValue N0 = N->getOperand(0);
2494   SDValue N1 = N->getOperand(1);
2495   EVT VT = N0.getValueType();
2496   SDLoc DL(N);
2497
2498   // If the flag result is dead, turn this into an ADD.
2499   if (!N->hasAnyUseOfValue(1))
2500     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2501                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2502
2503   // canonicalize constant to RHS.
2504   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2505   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2506   if (N0C && !N1C)
2507     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2508
2509   // fold (addc x, 0) -> x + no carry out
2510   if (isNullConstant(N1))
2511     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2512                                         DL, MVT::Glue));
2513
2514   // If it cannot overflow, transform into an add.
2515   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2516     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2517                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2518
2519   return SDValue();
2520 }
2521
2522 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2523                            SelectionDAG &DAG, const TargetLowering &TLI) {
2524   EVT VT = V.getValueType();
2525
2526   SDValue Cst;
2527   switch (TLI.getBooleanContents(VT)) {
2528   case TargetLowering::ZeroOrOneBooleanContent:
2529   case TargetLowering::UndefinedBooleanContent:
2530     Cst = DAG.getConstant(1, DL, VT);
2531     break;
2532   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2533     Cst = DAG.getAllOnesConstant(DL, VT);
2534     break;
2535   }
2536
2537   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2538 }
2539
2540 /**
2541  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2542  * then the flip also occurs if computing the inverse is the same cost.
2543  * This function returns an empty SDValue in case it cannot flip the boolean
2544  * without increasing the cost of the computation. If you want to flip a boolean
2545  * no matter what, use flipBoolean.
2546  */
2547 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2548                                   const TargetLowering &TLI,
2549                                   bool Force) {
2550   if (Force && isa<ConstantSDNode>(V))
2551     return flipBoolean(V, SDLoc(V), DAG, TLI);
2552
2553   if (V.getOpcode() != ISD::XOR)
2554     return SDValue();
2555
2556   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2557   if (!Const)
2558     return SDValue();
2559
2560   EVT VT = V.getValueType();
2561
2562   bool IsFlip = false;
2563   switch(TLI.getBooleanContents(VT)) {
2564     case TargetLowering::ZeroOrOneBooleanContent:
2565       IsFlip = Const->isOne();
2566       break;
2567     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2568       IsFlip = Const->isAllOnesValue();
2569       break;
2570     case TargetLowering::UndefinedBooleanContent:
2571       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2572       break;
2573   }
2574
2575   if (IsFlip)
2576     return V.getOperand(0);
2577   if (Force)
2578     return flipBoolean(V, SDLoc(V), DAG, TLI);
2579   return SDValue();
2580 }
2581
2582 SDValue DAGCombiner::visitADDO(SDNode *N) {
2583   SDValue N0 = N->getOperand(0);
2584   SDValue N1 = N->getOperand(1);
2585   EVT VT = N0.getValueType();
2586   bool IsSigned = (ISD::SADDO == N->getOpcode());
2587
2588   EVT CarryVT = N->getValueType(1);
2589   SDLoc DL(N);
2590
2591   // If the flag result is dead, turn this into an ADD.
2592   if (!N->hasAnyUseOfValue(1))
2593     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2594                      DAG.getUNDEF(CarryVT));
2595
2596   // canonicalize constant to RHS.
2597   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2598       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2599     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2600
2601   // fold (addo x, 0) -> x + no carry out
2602   if (isNullOrNullSplat(N1))
2603     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2604
2605   if (!IsSigned) {
2606     // If it cannot overflow, transform into an add.
2607     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2608       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2609                        DAG.getConstant(0, DL, CarryVT));
2610
2611     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2612     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2613       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2614                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2615       return CombineTo(N, Sub,
2616                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2617     }
2618
2619     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2620       return Combined;
2621
2622     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2623       return Combined;
2624   }
2625
2626   return SDValue();
2627 }
2628
2629 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2630   EVT VT = N0.getValueType();
2631   if (VT.isVector())
2632     return SDValue();
2633
2634   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2635   // If Y + 1 cannot overflow.
2636   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2637     SDValue Y = N1.getOperand(0);
2638     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2639     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2640       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2641                          N1.getOperand(2));
2642   }
2643
2644   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2645   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2646     if (SDValue Carry = getAsCarry(TLI, N1))
2647       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2648                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2649
2650   return SDValue();
2651 }
2652
2653 SDValue DAGCombiner::visitADDE(SDNode *N) {
2654   SDValue N0 = N->getOperand(0);
2655   SDValue N1 = N->getOperand(1);
2656   SDValue CarryIn = N->getOperand(2);
2657
2658   // canonicalize constant to RHS
2659   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2660   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2661   if (N0C && !N1C)
2662     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2663                        N1, N0, CarryIn);
2664
2665   // fold (adde x, y, false) -> (addc x, y)
2666   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2667     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2668
2669   return SDValue();
2670 }
2671
2672 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2673   SDValue N0 = N->getOperand(0);
2674   SDValue N1 = N->getOperand(1);
2675   SDValue CarryIn = N->getOperand(2);
2676   SDLoc DL(N);
2677
2678   // canonicalize constant to RHS
2679   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2680   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2681   if (N0C && !N1C)
2682     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2683
2684   // fold (addcarry x, y, false) -> (uaddo x, y)
2685   if (isNullConstant(CarryIn)) {
2686     if (!LegalOperations ||
2687         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2688       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2689   }
2690
2691   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2692   if (isNullConstant(N0) && isNullConstant(N1)) {
2693     EVT VT = N0.getValueType();
2694     EVT CarryVT = CarryIn.getValueType();
2695     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2696     AddToWorklist(CarryExt.getNode());
2697     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2698                                     DAG.getConstant(1, DL, VT)),
2699                      DAG.getConstant(0, DL, CarryVT));
2700   }
2701
2702   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2703     return Combined;
2704
2705   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2706     return Combined;
2707
2708   return SDValue();
2709 }
2710
2711 /**
2712  * If we are facing some sort of diamond carry propapagtion pattern try to
2713  * break it up to generate something like:
2714  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2715  *
2716  * The end result is usually an increase in operation required, but because the
2717  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2718  *
2719  * Patterns typically look something like
2720  *            (uaddo A, B)
2721  *             /       \
2722  *          Carry      Sum
2723  *            |          \
2724  *            | (addcarry *, 0, Z)
2725  *            |       /
2726  *             \   Carry
2727  *              |   /
2728  * (addcarry X, *, *)
2729  *
2730  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2731  * produce a combine with a single path for carry propagation.
2732  */
2733 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2734                                       SDValue X, SDValue Carry0, SDValue Carry1,
2735                                       SDNode *N) {
2736   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2737     return SDValue();
2738   if (Carry1.getOpcode() != ISD::UADDO)
2739     return SDValue();
2740
2741   SDValue Z;
2742
2743   /**
2744    * First look for a suitable Z. It will present itself in the form of
2745    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2746    */
2747   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2748       isNullConstant(Carry0.getOperand(1))) {
2749     Z = Carry0.getOperand(2);
2750   } else if (Carry0.getOpcode() == ISD::UADDO &&
2751              isOneConstant(Carry0.getOperand(1))) {
2752     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2753     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2754   } else {
2755     // We couldn't find a suitable Z.
2756     return SDValue();
2757   }
2758
2759
2760   auto cancelDiamond = [&](SDValue A,SDValue B) {
2761     SDLoc DL(N);
2762     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2763     Combiner.AddToWorklist(NewY.getNode());
2764     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2765                        DAG.getConstant(0, DL, X.getValueType()),
2766                        NewY.getValue(1));
2767   };
2768
2769   /**
2770    *      (uaddo A, B)
2771    *           |
2772    *          Sum
2773    *           |
2774    * (addcarry *, 0, Z)
2775    */
2776   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2777     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2778   }
2779
2780   /**
2781    * (addcarry A, 0, Z)
2782    *         |
2783    *        Sum
2784    *         |
2785    *  (uaddo *, B)
2786    */
2787   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2788     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2789   }
2790
2791   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2792     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2793   }
2794
2795   return SDValue();
2796 }
2797
2798 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2799                                        SDNode *N) {
2800   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2801   if (isBitwiseNot(N0))
2802     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2803       SDLoc DL(N);
2804       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2805                                 N0.getOperand(0), NotC);
2806       return CombineTo(N, Sub,
2807                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2808     }
2809
2810   // Iff the flag result is dead:
2811   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2812   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2813   // or the dependency between the instructions.
2814   if ((N0.getOpcode() == ISD::ADD ||
2815        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
2816         N0.getValue(1) != CarryIn)) &&
2817       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2818     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2819                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2820
2821   /**
2822    * When one of the addcarry argument is itself a carry, we may be facing
2823    * a diamond carry propagation. In which case we try to transform the DAG
2824    * to ensure linear carry propagation if that is possible.
2825    */
2826   if (auto Y = getAsCarry(TLI, N1)) {
2827     // Because both are carries, Y and Z can be swapped.
2828     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2829       return R;
2830     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
2831       return R;
2832   }
2833
2834   return SDValue();
2835 }
2836
2837 // Since it may not be valid to emit a fold to zero for vector initializers
2838 // check if we can before folding.
2839 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2840                              SelectionDAG &DAG, bool LegalOperations) {
2841   if (!VT.isVector())
2842     return DAG.getConstant(0, DL, VT);
2843   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2844     return DAG.getConstant(0, DL, VT);
2845   return SDValue();
2846 }
2847
2848 SDValue DAGCombiner::visitSUB(SDNode *N) {
2849   SDValue N0 = N->getOperand(0);
2850   SDValue N1 = N->getOperand(1);
2851   EVT VT = N0.getValueType();
2852   SDLoc DL(N);
2853
2854   // fold vector ops
2855   if (VT.isVector()) {
2856     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2857       return FoldedVOp;
2858
2859     // fold (sub x, 0) -> x, vector edition
2860     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2861       return N0;
2862   }
2863
2864   // fold (sub x, x) -> 0
2865   // FIXME: Refactor this and xor and other similar operations together.
2866   if (N0 == N1)
2867     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2868   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2869       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2870     // fold (sub c1, c2) -> c1-c2
2871     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2872                                       N1.getNode());
2873   }
2874
2875   if (SDValue NewSel = foldBinOpIntoSelect(N))
2876     return NewSel;
2877
2878   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2879
2880   // fold (sub x, c) -> (add x, -c)
2881   if (N1C) {
2882     return DAG.getNode(ISD::ADD, DL, VT, N0,
2883                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2884   }
2885
2886   if (isNullOrNullSplat(N0)) {
2887     unsigned BitWidth = VT.getScalarSizeInBits();
2888     // Right-shifting everything out but the sign bit followed by negation is
2889     // the same as flipping arithmetic/logical shift type without the negation:
2890     // -(X >>u 31) -> (X >>s 31)
2891     // -(X >>s 31) -> (X >>u 31)
2892     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2893       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2894       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
2895         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2896         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2897           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2898       }
2899     }
2900
2901     // 0 - X --> 0 if the sub is NUW.
2902     if (N->getFlags().hasNoUnsignedWrap())
2903       return N0;
2904
2905     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2906       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2907       // N1 must be 0 because negating the minimum signed value is undefined.
2908       if (N->getFlags().hasNoSignedWrap())
2909         return N0;
2910
2911       // 0 - X --> X if X is 0 or the minimum signed value.
2912       return N1;
2913     }
2914   }
2915
2916   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2917   if (isAllOnesOrAllOnesSplat(N0))
2918     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2919
2920   // fold (A - (0-B)) -> A+B
2921   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2922     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2923
2924   // fold A-(A-B) -> B
2925   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2926     return N1.getOperand(1);
2927
2928   // fold (A+B)-A -> B
2929   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2930     return N0.getOperand(1);
2931
2932   // fold (A+B)-B -> A
2933   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2934     return N0.getOperand(0);
2935
2936   // fold (A+C1)-C2 -> A+(C1-C2)
2937   if (N0.getOpcode() == ISD::ADD &&
2938       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2939       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2940     SDValue NewC = DAG.FoldConstantArithmetic(
2941         ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
2942     assert(NewC && "Constant folding failed");
2943     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
2944   }
2945
2946   // fold C2-(A+C1) -> (C2-C1)-A
2947   if (N1.getOpcode() == ISD::ADD) {
2948     SDValue N11 = N1.getOperand(1);
2949     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2950         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2951       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2952                                                 N11.getNode());
2953       assert(NewC && "Constant folding failed");
2954       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2955     }
2956   }
2957
2958   // fold (A-C1)-C2 -> A-(C1+C2)
2959   if (N0.getOpcode() == ISD::SUB &&
2960       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2961       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2962     SDValue NewC = DAG.FoldConstantArithmetic(
2963         ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
2964     assert(NewC && "Constant folding failed");
2965     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
2966   }
2967
2968   // fold (c1-A)-c2 -> (c1-c2)-A
2969   if (N0.getOpcode() == ISD::SUB &&
2970       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2971       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
2972     SDValue NewC = DAG.FoldConstantArithmetic(
2973         ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
2974     assert(NewC && "Constant folding failed");
2975     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
2976   }
2977
2978   // fold ((A+(B+or-C))-B) -> A+or-C
2979   if (N0.getOpcode() == ISD::ADD &&
2980       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2981        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2982       N0.getOperand(1).getOperand(0) == N1)
2983     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2984                        N0.getOperand(1).getOperand(1));
2985
2986   // fold ((A+(C+B))-B) -> A+C
2987   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2988       N0.getOperand(1).getOperand(1) == N1)
2989     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2990                        N0.getOperand(1).getOperand(0));
2991
2992   // fold ((A-(B-C))-C) -> A-B
2993   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2994       N0.getOperand(1).getOperand(1) == N1)
2995     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2996                        N0.getOperand(1).getOperand(0));
2997
2998   // fold (A-(B-C)) -> A+(C-B)
2999   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3000     return DAG.getNode(ISD::ADD, DL, VT, N0,
3001                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3002                                    N1.getOperand(0)));
3003
3004   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3005   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3006     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3007         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3008       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3009                                 N1.getOperand(0).getOperand(1),
3010                                 N1.getOperand(1));
3011       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3012     }
3013     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3014         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3015       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3016                                 N1.getOperand(0),
3017                                 N1.getOperand(1).getOperand(1));
3018       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3019     }
3020   }
3021
3022   // If either operand of a sub is undef, the result is undef
3023   if (N0.isUndef())
3024     return N0;
3025   if (N1.isUndef())
3026     return N1;
3027
3028   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3029     return V;
3030
3031   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3032     return V;
3033
3034   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3035     return V;
3036
3037   // (x - y) - 1  ->  add (xor y, -1), x
3038   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3039     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3040                               DAG.getAllOnesConstant(DL, VT));
3041     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3042   }
3043
3044   // Look for:
3045   //   sub y, (xor x, -1)
3046   // And if the target does not like this form then turn into:
3047   //   add (add x, y), 1
3048   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3049     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3050     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3051   }
3052
3053   // Hoist one-use addition by non-opaque constant:
3054   //   (x + C) - y  ->  (x - y) + C
3055   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3056       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3057     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3058     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3059   }
3060   // y - (x + C)  ->  (y - x) - C
3061   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3062       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3063     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3064     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3065   }
3066   // (x - C) - y  ->  (x - y) - C
3067   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3068   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3069       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3070     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3071     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3072   }
3073   // (C - x) - y  ->  C - (x + y)
3074   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3075       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3076     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3077     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3078   }
3079
3080   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3081   // rather than 'sub 0/1' (the sext should get folded).
3082   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3083   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3084       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3085       TLI.getBooleanContents(VT) ==
3086           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3087     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3088     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3089   }
3090
3091   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3092   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3093     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3094       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3095       SDValue S0 = N1.getOperand(0);
3096       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3097         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3098         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3099           if (C->getAPIntValue() == (OpSizeInBits - 1))
3100             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3101       }
3102     }
3103   }
3104
3105   // If the relocation model supports it, consider symbol offsets.
3106   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3107     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3108       // fold (sub Sym, c) -> Sym-c
3109       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3110         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3111                                     GA->getOffset() -
3112                                         (uint64_t)N1C->getSExtValue());
3113       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3114       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3115         if (GA->getGlobal() == GB->getGlobal())
3116           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3117                                  DL, VT);
3118     }
3119
3120   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3121   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3122     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3123     if (TN->getVT() == MVT::i1) {
3124       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3125                                  DAG.getConstant(1, DL, VT));
3126       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3127     }
3128   }
3129
3130   // Prefer an add for more folding potential and possibly better codegen:
3131   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3132   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3133     SDValue ShAmt = N1.getOperand(1);
3134     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3135     if (ShAmtC &&
3136         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3137       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3138       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3139     }
3140   }
3141
3142   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3143     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3144     if (SDValue Carry = getAsCarry(TLI, N0)) {
3145       SDValue X = N1;
3146       SDValue Zero = DAG.getConstant(0, DL, VT);
3147       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3148       return DAG.getNode(ISD::ADDCARRY, DL,
3149                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3150                          Carry);
3151     }
3152   }
3153
3154   return SDValue();
3155 }
3156
3157 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3158   SDValue N0 = N->getOperand(0);
3159   SDValue N1 = N->getOperand(1);
3160   EVT VT = N0.getValueType();
3161   SDLoc DL(N);
3162
3163   // fold vector ops
3164   if (VT.isVector()) {
3165     // TODO SimplifyVBinOp
3166
3167     // fold (sub_sat x, 0) -> x, vector edition
3168     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3169       return N0;
3170   }
3171
3172   // fold (sub_sat x, undef) -> 0
3173   if (N0.isUndef() || N1.isUndef())
3174     return DAG.getConstant(0, DL, VT);
3175
3176   // fold (sub_sat x, x) -> 0
3177   if (N0 == N1)
3178     return DAG.getConstant(0, DL, VT);
3179
3180   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3181       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
3182     // fold (sub_sat c1, c2) -> c3
3183     return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3184                                       N1.getNode());
3185   }
3186
3187   // fold (sub_sat x, 0) -> x
3188   if (isNullConstant(N1))
3189     return N0;
3190
3191   return SDValue();
3192 }
3193
3194 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3195   SDValue N0 = N->getOperand(0);
3196   SDValue N1 = N->getOperand(1);
3197   EVT VT = N0.getValueType();
3198   SDLoc DL(N);
3199
3200   // If the flag result is dead, turn this into an SUB.
3201   if (!N->hasAnyUseOfValue(1))
3202     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3203                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3204
3205   // fold (subc x, x) -> 0 + no borrow
3206   if (N0 == N1)
3207     return CombineTo(N, DAG.getConstant(0, DL, VT),
3208                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3209
3210   // fold (subc x, 0) -> x + no borrow
3211   if (isNullConstant(N1))
3212     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3213
3214   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3215   if (isAllOnesConstant(N0))
3216     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3217                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3218
3219   return SDValue();
3220 }
3221
3222 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3223   SDValue N0 = N->getOperand(0);
3224   SDValue N1 = N->getOperand(1);
3225   EVT VT = N0.getValueType();
3226   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3227
3228   EVT CarryVT = N->getValueType(1);
3229   SDLoc DL(N);
3230
3231   // If the flag result is dead, turn this into an SUB.
3232   if (!N->hasAnyUseOfValue(1))
3233     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3234                      DAG.getUNDEF(CarryVT));
3235
3236   // fold (subo x, x) -> 0 + no borrow
3237   if (N0 == N1)
3238     return CombineTo(N, DAG.getConstant(0, DL, VT),
3239                      DAG.getConstant(0, DL, CarryVT));
3240
3241   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3242
3243   // fold (subox, c) -> (addo x, -c)
3244   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3245     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3246                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3247   }
3248
3249   // fold (subo x, 0) -> x + no borrow
3250   if (isNullOrNullSplat(N1))
3251     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3252
3253   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3254   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3255     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3256                      DAG.getConstant(0, DL, CarryVT));
3257
3258   return SDValue();
3259 }
3260
3261 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3262   SDValue N0 = N->getOperand(0);
3263   SDValue N1 = N->getOperand(1);
3264   SDValue CarryIn = N->getOperand(2);
3265
3266   // fold (sube x, y, false) -> (subc x, y)
3267   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3268     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3269
3270   return SDValue();
3271 }
3272
3273 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3274   SDValue N0 = N->getOperand(0);
3275   SDValue N1 = N->getOperand(1);
3276   SDValue CarryIn = N->getOperand(2);
3277
3278   // fold (subcarry x, y, false) -> (usubo x, y)
3279   if (isNullConstant(CarryIn)) {
3280     if (!LegalOperations ||
3281         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3282       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3283   }
3284
3285   return SDValue();
3286 }
3287
3288 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3289 // UMULFIXSAT here.
3290 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3291   SDValue N0 = N->getOperand(0);
3292   SDValue N1 = N->getOperand(1);
3293   SDValue Scale = N->getOperand(2);
3294   EVT VT = N0.getValueType();
3295
3296   // fold (mulfix x, undef, scale) -> 0
3297   if (N0.isUndef() || N1.isUndef())
3298     return DAG.getConstant(0, SDLoc(N), VT);
3299
3300   // Canonicalize constant to RHS (vector doesn't have to splat)
3301   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3302      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3303     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3304
3305   // fold (mulfix x, 0, scale) -> 0
3306   if (isNullConstant(N1))
3307     return DAG.getConstant(0, SDLoc(N), VT);
3308
3309   return SDValue();
3310 }
3311
3312 SDValue DAGCombiner::visitMUL(SDNode *N) {
3313   SDValue N0 = N->getOperand(0);
3314   SDValue N1 = N->getOperand(1);
3315   EVT VT = N0.getValueType();
3316
3317   // fold (mul x, undef) -> 0
3318   if (N0.isUndef() || N1.isUndef())
3319     return DAG.getConstant(0, SDLoc(N), VT);
3320
3321   bool N0IsConst = false;
3322   bool N1IsConst = false;
3323   bool N1IsOpaqueConst = false;
3324   bool N0IsOpaqueConst = false;
3325   APInt ConstValue0, ConstValue1;
3326   // fold vector ops
3327   if (VT.isVector()) {
3328     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3329       return FoldedVOp;
3330
3331     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3332     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3333     assert((!N0IsConst ||
3334             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3335            "Splat APInt should be element width");
3336     assert((!N1IsConst ||
3337             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3338            "Splat APInt should be element width");
3339   } else {
3340     N0IsConst = isa<ConstantSDNode>(N0);
3341     if (N0IsConst) {
3342       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3343       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3344     }
3345     N1IsConst = isa<ConstantSDNode>(N1);
3346     if (N1IsConst) {
3347       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3348       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3349     }
3350   }
3351
3352   // fold (mul c1, c2) -> c1*c2
3353   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3354     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3355                                       N0.getNode(), N1.getNode());
3356
3357   // canonicalize constant to RHS (vector doesn't have to splat)
3358   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3359      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3360     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3361   // fold (mul x, 0) -> 0
3362   if (N1IsConst && ConstValue1.isNullValue())
3363     return N1;
3364   // fold (mul x, 1) -> x
3365   if (N1IsConst && ConstValue1.isOneValue())
3366     return N0;
3367
3368   if (SDValue NewSel = foldBinOpIntoSelect(N))
3369     return NewSel;
3370
3371   // fold (mul x, -1) -> 0-x
3372   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3373     SDLoc DL(N);
3374     return DAG.getNode(ISD::SUB, DL, VT,
3375                        DAG.getConstant(0, DL, VT), N0);
3376   }
3377   // fold (mul x, (1 << c)) -> x << c
3378   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3379       DAG.isKnownToBeAPowerOfTwo(N1) &&
3380       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3381     SDLoc DL(N);
3382     SDValue LogBase2 = BuildLogBase2(N1, DL);
3383     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3384     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3385     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3386   }
3387   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3388   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3389     unsigned Log2Val = (-ConstValue1).logBase2();
3390     SDLoc DL(N);
3391     // FIXME: If the input is something that is easily negated (e.g. a
3392     // single-use add), we should put the negate there.
3393     return DAG.getNode(ISD::SUB, DL, VT,
3394                        DAG.getConstant(0, DL, VT),
3395                        DAG.getNode(ISD::SHL, DL, VT, N0,
3396                             DAG.getConstant(Log2Val, DL,
3397                                       getShiftAmountTy(N0.getValueType()))));
3398   }
3399
3400   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3401   // mul x, (2^N + 1) --> add (shl x, N), x
3402   // mul x, (2^N - 1) --> sub (shl x, N), x
3403   // Examples: x * 33 --> (x << 5) + x
3404   //           x * 15 --> (x << 4) - x
3405   //           x * -33 --> -((x << 5) + x)
3406   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3407   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3408     // TODO: We could handle more general decomposition of any constant by
3409     //       having the target set a limit on number of ops and making a
3410     //       callback to determine that sequence (similar to sqrt expansion).
3411     unsigned MathOp = ISD::DELETED_NODE;
3412     APInt MulC = ConstValue1.abs();
3413     if ((MulC - 1).isPowerOf2())
3414       MathOp = ISD::ADD;
3415     else if ((MulC + 1).isPowerOf2())
3416       MathOp = ISD::SUB;
3417
3418     if (MathOp != ISD::DELETED_NODE) {
3419       unsigned ShAmt =
3420           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3421       assert(ShAmt < VT.getScalarSizeInBits() &&
3422              "multiply-by-constant generated out of bounds shift");
3423       SDLoc DL(N);
3424       SDValue Shl =
3425           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3426       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3427       if (ConstValue1.isNegative())
3428         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3429       return R;
3430     }
3431   }
3432
3433   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3434   if (N0.getOpcode() == ISD::SHL &&
3435       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3436       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3437     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3438     if (isConstantOrConstantVector(C3))
3439       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3440   }
3441
3442   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3443   // use.
3444   {
3445     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3446
3447     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3448     if (N0.getOpcode() == ISD::SHL &&
3449         isConstantOrConstantVector(N0.getOperand(1)) &&
3450         N0.getNode()->hasOneUse()) {
3451       Sh = N0; Y = N1;
3452     } else if (N1.getOpcode() == ISD::SHL &&
3453                isConstantOrConstantVector(N1.getOperand(1)) &&
3454                N1.getNode()->hasOneUse()) {
3455       Sh = N1; Y = N0;
3456     }
3457
3458     if (Sh.getNode()) {
3459       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3460       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3461     }
3462   }
3463
3464   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3465   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3466       N0.getOpcode() == ISD::ADD &&
3467       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3468       isMulAddWithConstProfitable(N, N0, N1))
3469       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3470                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3471                                      N0.getOperand(0), N1),
3472                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3473                                      N0.getOperand(1), N1));
3474
3475   // reassociate mul
3476   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3477     return RMUL;
3478
3479   return SDValue();
3480 }
3481
3482 /// Return true if divmod libcall is available.
3483 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3484                                      const TargetLowering &TLI) {
3485   RTLIB::Libcall LC;
3486   EVT NodeType = Node->getValueType(0);
3487   if (!NodeType.isSimple())
3488     return false;
3489   switch (NodeType.getSimpleVT().SimpleTy) {
3490   default: return false; // No libcall for vector types.
3491   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3492   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3493   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3494   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3495   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3496   }
3497
3498   return TLI.getLibcallName(LC) != nullptr;
3499 }
3500
3501 /// Issue divrem if both quotient and remainder are needed.
3502 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3503   if (Node->use_empty())
3504     return SDValue(); // This is a dead node, leave it alone.
3505
3506   unsigned Opcode = Node->getOpcode();
3507   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3508   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3509
3510   // DivMod lib calls can still work on non-legal types if using lib-calls.
3511   EVT VT = Node->getValueType(0);
3512   if (VT.isVector() || !VT.isInteger())
3513     return SDValue();
3514
3515   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3516     return SDValue();
3517
3518   // If DIVREM is going to get expanded into a libcall,
3519   // but there is no libcall available, then don't combine.
3520   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3521       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3522     return SDValue();
3523
3524   // If div is legal, it's better to do the normal expansion
3525   unsigned OtherOpcode = 0;
3526   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3527     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3528     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3529       return SDValue();
3530   } else {
3531     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3532     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3533       return SDValue();
3534   }
3535
3536   SDValue Op0 = Node->getOperand(0);
3537   SDValue Op1 = Node->getOperand(1);
3538   SDValue combined;
3539   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3540          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3541     SDNode *User = *UI;
3542     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3543         User->use_empty())
3544       continue;
3545     // Convert the other matching node(s), too;
3546     // otherwise, the DIVREM may get target-legalized into something
3547     // target-specific that we won't be able to recognize.
3548     unsigned UserOpc = User->getOpcode();
3549     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3550         User->getOperand(0) == Op0 &&
3551         User->getOperand(1) == Op1) {
3552       if (!combined) {
3553         if (UserOpc == OtherOpcode) {
3554           SDVTList VTs = DAG.getVTList(VT, VT);
3555           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3556         } else if (UserOpc == DivRemOpc) {
3557           combined = SDValue(User, 0);
3558         } else {
3559           assert(UserOpc == Opcode);
3560           continue;
3561         }
3562       }
3563       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3564         CombineTo(User, combined);
3565       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3566         CombineTo(User, combined.getValue(1));
3567     }
3568   }
3569   return combined;
3570 }
3571
3572 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3573   SDValue N0 = N->getOperand(0);
3574   SDValue N1 = N->getOperand(1);
3575   EVT VT = N->getValueType(0);
3576   SDLoc DL(N);
3577
3578   unsigned Opc = N->getOpcode();
3579   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3580   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3581
3582   // X / undef -> undef
3583   // X % undef -> undef
3584   // X / 0 -> undef
3585   // X % 0 -> undef
3586   // NOTE: This includes vectors where any divisor element is zero/undef.
3587   if (DAG.isUndef(Opc, {N0, N1}))
3588     return DAG.getUNDEF(VT);
3589
3590   // undef / X -> 0
3591   // undef % X -> 0
3592   if (N0.isUndef())
3593     return DAG.getConstant(0, DL, VT);
3594
3595   // 0 / X -> 0
3596   // 0 % X -> 0
3597   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3598   if (N0C && N0C->isNullValue())
3599     return N0;
3600
3601   // X / X -> 1
3602   // X % X -> 0
3603   if (N0 == N1)
3604     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3605
3606   // X / 1 -> X
3607   // X % 1 -> 0
3608   // If this is a boolean op (single-bit element type), we can't have
3609   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3610   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3611   // it's a 1.
3612   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3613     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3614
3615   return SDValue();
3616 }
3617
3618 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3619   SDValue N0 = N->getOperand(0);
3620   SDValue N1 = N->getOperand(1);
3621   EVT VT = N->getValueType(0);
3622   EVT CCVT = getSetCCResultType(VT);
3623
3624   // fold vector ops
3625   if (VT.isVector())
3626     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3627       return FoldedVOp;
3628
3629   SDLoc DL(N);
3630
3631   // fold (sdiv c1, c2) -> c1/c2
3632   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3633   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3634   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3635     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3636   // fold (sdiv X, -1) -> 0-X
3637   if (N1C && N1C->isAllOnesValue())
3638     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3639   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3640   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3641     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3642                          DAG.getConstant(1, DL, VT),
3643                          DAG.getConstant(0, DL, VT));
3644
3645   if (SDValue V = simplifyDivRem(N, DAG))
3646     return V;
3647
3648   if (SDValue NewSel = foldBinOpIntoSelect(N))
3649     return NewSel;
3650
3651   // If we know the sign bits of both operands are zero, strength reduce to a
3652   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3653   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3654     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3655
3656   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3657     // If the corresponding remainder node exists, update its users with
3658     // (Dividend - (Quotient * Divisor).
3659     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3660                                               { N0, N1 })) {
3661       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3662       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3663       AddToWorklist(Mul.getNode());
3664       AddToWorklist(Sub.getNode());
3665       CombineTo(RemNode, Sub);
3666     }
3667     return V;
3668   }
3669
3670   // sdiv, srem -> sdivrem
3671   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3672   // true.  Otherwise, we break the simplification logic in visitREM().
3673   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3674   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3675     if (SDValue DivRem = useDivRem(N))
3676         return DivRem;
3677
3678   return SDValue();
3679 }
3680
3681 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3682   SDLoc DL(N);
3683   EVT VT = N->getValueType(0);
3684   EVT CCVT = getSetCCResultType(VT);
3685   unsigned BitWidth = VT.getScalarSizeInBits();
3686
3687   // Helper for determining whether a value is a power-2 constant scalar or a
3688   // vector of such elements.
3689   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3690     if (C->isNullValue() || C->isOpaque())
3691       return false;
3692     if (C->getAPIntValue().isPowerOf2())
3693       return true;
3694     if ((-C->getAPIntValue()).isPowerOf2())
3695       return true;
3696     return false;
3697   };
3698
3699   // fold (sdiv X, pow2) -> simple ops after legalize
3700   // FIXME: We check for the exact bit here because the generic lowering gives
3701   // better results in that case. The target-specific lowering should learn how
3702   // to handle exact sdivs efficiently.
3703   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3704     // Target-specific implementation of sdiv x, pow2.
3705     if (SDValue Res = BuildSDIVPow2(N))
3706       return Res;
3707
3708     // Create constants that are functions of the shift amount value.
3709     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3710     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3711     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3712     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3713     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3714     if (!isConstantOrConstantVector(Inexact))
3715       return SDValue();
3716
3717     // Splat the sign bit into the register
3718     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3719                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3720     AddToWorklist(Sign.getNode());
3721
3722     // Add (N0 < 0) ? abs2 - 1 : 0;
3723     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3724     AddToWorklist(Srl.getNode());
3725     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3726     AddToWorklist(Add.getNode());
3727     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3728     AddToWorklist(Sra.getNode());
3729
3730     // Special case: (sdiv X, 1) -> X
3731     // Special Case: (sdiv X, -1) -> 0-X
3732     SDValue One = DAG.getConstant(1, DL, VT);
3733     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3734     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3735     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3736     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3737     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3738
3739     // If dividing by a positive value, we're done. Otherwise, the result must
3740     // be negated.
3741     SDValue Zero = DAG.getConstant(0, DL, VT);
3742     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3743
3744     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3745     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3746     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3747     return Res;
3748   }
3749
3750   // If integer divide is expensive and we satisfy the requirements, emit an
3751   // alternate sequence.  Targets may check function attributes for size/speed
3752   // trade-offs.
3753   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3754   if (isConstantOrConstantVector(N1) &&
3755       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3756     if (SDValue Op = BuildSDIV(N))
3757       return Op;
3758
3759   return SDValue();
3760 }
3761
3762 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3763   SDValue N0 = N->getOperand(0);
3764   SDValue N1 = N->getOperand(1);
3765   EVT VT = N->getValueType(0);
3766   EVT CCVT = getSetCCResultType(VT);
3767
3768   // fold vector ops
3769   if (VT.isVector())
3770     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3771       return FoldedVOp;
3772
3773   SDLoc DL(N);
3774
3775   // fold (udiv c1, c2) -> c1/c2
3776   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3777   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3778   if (N0C && N1C)
3779     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3780                                                     N0C, N1C))
3781       return Folded;
3782   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3783   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3784     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3785                          DAG.getConstant(1, DL, VT),
3786                          DAG.getConstant(0, DL, VT));
3787
3788   if (SDValue V = simplifyDivRem(N, DAG))
3789     return V;
3790
3791   if (SDValue NewSel = foldBinOpIntoSelect(N))
3792     return NewSel;
3793
3794   if (SDValue V = visitUDIVLike(N0, N1, N)) {
3795     // If the corresponding remainder node exists, update its users with
3796     // (Dividend - (Quotient * Divisor).
3797     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3798                                               { N0, N1 })) {
3799       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3800       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3801       AddToWorklist(Mul.getNode());
3802       AddToWorklist(Sub.getNode());
3803       CombineTo(RemNode, Sub);
3804     }
3805     return V;
3806   }
3807
3808   // sdiv, srem -> sdivrem
3809   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3810   // true.  Otherwise, we break the simplification logic in visitREM().
3811   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3812   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3813     if (SDValue DivRem = useDivRem(N))
3814         return DivRem;
3815
3816   return SDValue();
3817 }
3818
3819 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3820   SDLoc DL(N);
3821   EVT VT = N->getValueType(0);
3822
3823   // fold (udiv x, (1 << c)) -> x >>u c
3824   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3825       DAG.isKnownToBeAPowerOfTwo(N1)) {
3826     SDValue LogBase2 = BuildLogBase2(N1, DL);
3827     AddToWorklist(LogBase2.getNode());
3828
3829     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3830     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3831     AddToWorklist(Trunc.getNode());
3832     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3833   }
3834
3835   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3836   if (N1.getOpcode() == ISD::SHL) {
3837     SDValue N10 = N1.getOperand(0);
3838     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3839         DAG.isKnownToBeAPowerOfTwo(N10)) {
3840       SDValue LogBase2 = BuildLogBase2(N10, DL);
3841       AddToWorklist(LogBase2.getNode());
3842
3843       EVT ADDVT = N1.getOperand(1).getValueType();
3844       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3845       AddToWorklist(Trunc.getNode());
3846       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3847       AddToWorklist(Add.getNode());
3848       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3849     }
3850   }
3851
3852   // fold (udiv x, c) -> alternate
3853   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3854   if (isConstantOrConstantVector(N1) &&
3855       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3856     if (SDValue Op = BuildUDIV(N))
3857       return Op;
3858
3859   return SDValue();
3860 }
3861
3862 // handles ISD::SREM and ISD::UREM
3863 SDValue DAGCombiner::visitREM(SDNode *N) {
3864   unsigned Opcode = N->getOpcode();
3865   SDValue N0 = N->getOperand(0);
3866   SDValue N1 = N->getOperand(1);
3867   EVT VT = N->getValueType(0);
3868   EVT CCVT = getSetCCResultType(VT);
3869
3870   bool isSigned = (Opcode == ISD::SREM);
3871   SDLoc DL(N);
3872
3873   // fold (rem c1, c2) -> c1%c2
3874   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3875   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3876   if (N0C && N1C)
3877     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3878       return Folded;
3879   // fold (urem X, -1) -> select(X == -1, 0, x)
3880   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3881     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3882                          DAG.getConstant(0, DL, VT), N0);
3883
3884   if (SDValue V = simplifyDivRem(N, DAG))
3885     return V;
3886
3887   if (SDValue NewSel = foldBinOpIntoSelect(N))
3888     return NewSel;
3889
3890   if (isSigned) {
3891     // If we know the sign bits of both operands are zero, strength reduce to a
3892     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3893     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3894       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3895   } else {
3896     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3897     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3898       // fold (urem x, pow2) -> (and x, pow2-1)
3899       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3900       AddToWorklist(Add.getNode());
3901       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3902     }
3903     if (N1.getOpcode() == ISD::SHL &&
3904         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3905       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3906       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3907       AddToWorklist(Add.getNode());
3908       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3909     }
3910   }
3911
3912   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3913
3914   // If X/C can be simplified by the division-by-constant logic, lower
3915   // X%C to the equivalent of X-X/C*C.
3916   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3917   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3918   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3919   // combine will not return a DIVREM.  Regardless, checking cheapness here
3920   // makes sense since the simplification results in fatter code.
3921   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3922     SDValue OptimizedDiv =
3923         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3924     if (OptimizedDiv.getNode()) {
3925       // If the equivalent Div node also exists, update its users.
3926       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3927       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3928                                                 { N0, N1 }))
3929         CombineTo(DivNode, OptimizedDiv);
3930       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3931       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3932       AddToWorklist(OptimizedDiv.getNode());
3933       AddToWorklist(Mul.getNode());
3934       return Sub;
3935     }
3936   }
3937
3938   // sdiv, srem -> sdivrem
3939   if (SDValue DivRem = useDivRem(N))
3940     return DivRem.getValue(1);
3941
3942   return SDValue();
3943 }
3944
3945 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3946   SDValue N0 = N->getOperand(0);
3947   SDValue N1 = N->getOperand(1);
3948   EVT VT = N->getValueType(0);
3949   SDLoc DL(N);
3950
3951   if (VT.isVector()) {
3952     // fold (mulhs x, 0) -> 0
3953     // do not return N0/N1, because undef node may exist.
3954     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
3955         ISD::isBuildVectorAllZeros(N1.getNode()))
3956       return DAG.getConstant(0, DL, VT);
3957   }
3958
3959   // fold (mulhs x, 0) -> 0
3960   if (isNullConstant(N1))
3961     return N1;
3962   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3963   if (isOneConstant(N1))
3964     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3965                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
3966                                        getShiftAmountTy(N0.getValueType())));
3967
3968   // fold (mulhs x, undef) -> 0
3969   if (N0.isUndef() || N1.isUndef())
3970     return DAG.getConstant(0, DL, VT);
3971
3972   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3973   // plus a shift.
3974   if (VT.isSimple() && !VT.isVector()) {
3975     MVT Simple = VT.getSimpleVT();
3976     unsigned SimpleSize = Simple.getSizeInBits();
3977     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3978     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3979       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3980       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3981       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3982       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3983             DAG.getConstant(SimpleSize, DL,
3984                             getShiftAmountTy(N1.getValueType())));
3985       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3986     }
3987   }
3988
3989   return SDValue();
3990 }
3991
3992 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3993   SDValue N0 = N->getOperand(0);
3994   SDValue N1 = N->getOperand(1);
3995   EVT VT = N->getValueType(0);
3996   SDLoc DL(N);
3997
3998   if (VT.isVector()) {
3999     // fold (mulhu x, 0) -> 0
4000     // do not return N0/N1, because undef node may exist.
4001     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4002         ISD::isBuildVectorAllZeros(N1.getNode()))
4003       return DAG.getConstant(0, DL, VT);
4004   }
4005
4006   // fold (mulhu x, 0) -> 0
4007   if (isNullConstant(N1))
4008     return N1;
4009   // fold (mulhu x, 1) -> 0
4010   if (isOneConstant(N1))
4011     return DAG.getConstant(0, DL, N0.getValueType());
4012   // fold (mulhu x, undef) -> 0
4013   if (N0.isUndef() || N1.isUndef())
4014     return DAG.getConstant(0, DL, VT);
4015
4016   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4017   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4018       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4019     unsigned NumEltBits = VT.getScalarSizeInBits();
4020     SDValue LogBase2 = BuildLogBase2(N1, DL);
4021     SDValue SRLAmt = DAG.getNode(
4022         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4023     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4024     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4025     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4026   }
4027
4028   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4029   // plus a shift.
4030   if (VT.isSimple() && !VT.isVector()) {
4031     MVT Simple = VT.getSimpleVT();
4032     unsigned SimpleSize = Simple.getSizeInBits();
4033     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4034     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4035       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4036       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4037       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4038       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4039             DAG.getConstant(SimpleSize, DL,
4040                             getShiftAmountTy(N1.getValueType())));
4041       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4042     }
4043   }
4044
4045   return SDValue();
4046 }
4047
4048 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4049 /// give the opcodes for the two computations that are being performed. Return
4050 /// true if a simplification was made.
4051 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4052                                                 unsigned HiOp) {
4053   // If the high half is not needed, just compute the low half.
4054   bool HiExists = N->hasAnyUseOfValue(1);
4055   if (!HiExists && (!LegalOperations ||
4056                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4057     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4058     return CombineTo(N, Res, Res);
4059   }
4060
4061   // If the low half is not needed, just compute the high half.
4062   bool LoExists = N->hasAnyUseOfValue(0);
4063   if (!LoExists && (!LegalOperations ||
4064                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4065     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4066     return CombineTo(N, Res, Res);
4067   }
4068
4069   // If both halves are used, return as it is.
4070   if (LoExists && HiExists)
4071     return SDValue();
4072
4073   // If the two computed results can be simplified separately, separate them.
4074   if (LoExists) {
4075     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4076     AddToWorklist(Lo.getNode());
4077     SDValue LoOpt = combine(Lo.getNode());
4078     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4079         (!LegalOperations ||
4080          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4081       return CombineTo(N, LoOpt, LoOpt);
4082   }
4083
4084   if (HiExists) {
4085     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4086     AddToWorklist(Hi.getNode());
4087     SDValue HiOpt = combine(Hi.getNode());
4088     if (HiOpt.getNode() && HiOpt != Hi &&
4089         (!LegalOperations ||
4090          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4091       return CombineTo(N, HiOpt, HiOpt);
4092   }
4093
4094   return SDValue();
4095 }
4096
4097 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4098   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4099     return Res;
4100
4101   EVT VT = N->getValueType(0);
4102   SDLoc DL(N);
4103
4104   // If the type is twice as wide is legal, transform the mulhu to a wider
4105   // multiply plus a shift.
4106   if (VT.isSimple() && !VT.isVector()) {
4107     MVT Simple = VT.getSimpleVT();
4108     unsigned SimpleSize = Simple.getSizeInBits();
4109     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4110     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4111       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4112       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4113       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4114       // Compute the high part as N1.
4115       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4116             DAG.getConstant(SimpleSize, DL,
4117                             getShiftAmountTy(Lo.getValueType())));
4118       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4119       // Compute the low part as N0.
4120       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4121       return CombineTo(N, Lo, Hi);
4122     }
4123   }
4124
4125   return SDValue();
4126 }
4127
4128 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4129   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4130     return Res;
4131
4132   EVT VT = N->getValueType(0);
4133   SDLoc DL(N);
4134
4135   // (umul_lohi N0, 0) -> (0, 0)
4136   if (isNullConstant(N->getOperand(1))) {
4137     SDValue Zero = DAG.getConstant(0, DL, VT);
4138     return CombineTo(N, Zero, Zero);
4139   }
4140
4141   // (umul_lohi N0, 1) -> (N0, 0)
4142   if (isOneConstant(N->getOperand(1))) {
4143     SDValue Zero = DAG.getConstant(0, DL, VT);
4144     return CombineTo(N, N->getOperand(0), Zero);
4145   }
4146
4147   // If the type is twice as wide is legal, transform the mulhu to a wider
4148   // multiply plus a shift.
4149   if (VT.isSimple() && !VT.isVector()) {
4150     MVT Simple = VT.getSimpleVT();
4151     unsigned SimpleSize = Simple.getSizeInBits();
4152     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4153     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4154       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4155       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4156       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4157       // Compute the high part as N1.
4158       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4159             DAG.getConstant(SimpleSize, DL,
4160                             getShiftAmountTy(Lo.getValueType())));
4161       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4162       // Compute the low part as N0.
4163       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4164       return CombineTo(N, Lo, Hi);
4165     }
4166   }
4167
4168   return SDValue();
4169 }
4170
4171 SDValue DAGCombiner::visitMULO(SDNode *N) {
4172   SDValue N0 = N->getOperand(0);
4173   SDValue N1 = N->getOperand(1);
4174   EVT VT = N0.getValueType();
4175   bool IsSigned = (ISD::SMULO == N->getOpcode());
4176
4177   EVT CarryVT = N->getValueType(1);
4178   SDLoc DL(N);
4179
4180   // canonicalize constant to RHS.
4181   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4182       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4183     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4184
4185   // fold (mulo x, 0) -> 0 + no carry out
4186   if (isNullOrNullSplat(N1))
4187     return CombineTo(N, DAG.getConstant(0, DL, VT),
4188                      DAG.getConstant(0, DL, CarryVT));
4189
4190   // (mulo x, 2) -> (addo x, x)
4191   if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4192     if (C2->getAPIntValue() == 2)
4193       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4194                          N->getVTList(), N0, N0);
4195
4196   return SDValue();
4197 }
4198
4199 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4200   SDValue N0 = N->getOperand(0);
4201   SDValue N1 = N->getOperand(1);
4202   EVT VT = N0.getValueType();
4203
4204   // fold vector ops
4205   if (VT.isVector())
4206     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4207       return FoldedVOp;
4208
4209   // fold operation with constant operands.
4210   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4211   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4212   if (N0C && N1C)
4213     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4214
4215   // canonicalize constant to RHS
4216   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4217      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4218     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4219
4220   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4221   // Only do this if the current op isn't legal and the flipped is.
4222   unsigned Opcode = N->getOpcode();
4223   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4224   if (!TLI.isOperationLegal(Opcode, VT) &&
4225       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4226       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4227     unsigned AltOpcode;
4228     switch (Opcode) {
4229     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4230     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4231     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4232     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4233     default: llvm_unreachable("Unknown MINMAX opcode");
4234     }
4235     if (TLI.isOperationLegal(AltOpcode, VT))
4236       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4237   }
4238
4239   return SDValue();
4240 }
4241
4242 /// If this is a bitwise logic instruction and both operands have the same
4243 /// opcode, try to sink the other opcode after the logic instruction.
4244 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4245   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4246   EVT VT = N0.getValueType();
4247   unsigned LogicOpcode = N->getOpcode();
4248   unsigned HandOpcode = N0.getOpcode();
4249   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4250           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4251   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4252
4253   // Bail early if none of these transforms apply.
4254   if (N0.getNumOperands() == 0)
4255     return SDValue();
4256
4257   // FIXME: We should check number of uses of the operands to not increase
4258   //        the instruction count for all transforms.
4259
4260   // Handle size-changing casts.
4261   SDValue X = N0.getOperand(0);
4262   SDValue Y = N1.getOperand(0);
4263   EVT XVT = X.getValueType();
4264   SDLoc DL(N);
4265   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4266       HandOpcode == ISD::SIGN_EXTEND) {
4267     // If both operands have other uses, this transform would create extra
4268     // instructions without eliminating anything.
4269     if (!N0.hasOneUse() && !N1.hasOneUse())
4270       return SDValue();
4271     // We need matching integer source types.
4272     if (XVT != Y.getValueType())
4273       return SDValue();
4274     // Don't create an illegal op during or after legalization. Don't ever
4275     // create an unsupported vector op.
4276     if ((VT.isVector() || LegalOperations) &&
4277         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4278       return SDValue();
4279     // Avoid infinite looping with PromoteIntBinOp.
4280     // TODO: Should we apply desirable/legal constraints to all opcodes?
4281     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4282         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4283       return SDValue();
4284     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4285     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4286     return DAG.getNode(HandOpcode, DL, VT, Logic);
4287   }
4288
4289   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4290   if (HandOpcode == ISD::TRUNCATE) {
4291     // If both operands have other uses, this transform would create extra
4292     // instructions without eliminating anything.
4293     if (!N0.hasOneUse() && !N1.hasOneUse())
4294       return SDValue();
4295     // We need matching source types.
4296     if (XVT != Y.getValueType())
4297       return SDValue();
4298     // Don't create an illegal op during or after legalization.
4299     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4300       return SDValue();
4301     // Be extra careful sinking truncate. If it's free, there's no benefit in
4302     // widening a binop. Also, don't create a logic op on an illegal type.
4303     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4304       return SDValue();
4305     if (!TLI.isTypeLegal(XVT))
4306       return SDValue();
4307     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4308     return DAG.getNode(HandOpcode, DL, VT, Logic);
4309   }
4310
4311   // For binops SHL/SRL/SRA/AND:
4312   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4313   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4314        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4315       N0.getOperand(1) == N1.getOperand(1)) {
4316     // If either operand has other uses, this transform is not an improvement.
4317     if (!N0.hasOneUse() || !N1.hasOneUse())
4318       return SDValue();
4319     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4320     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4321   }
4322
4323   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4324   if (HandOpcode == ISD::BSWAP) {
4325     // If either operand has other uses, this transform is not an improvement.
4326     if (!N0.hasOneUse() || !N1.hasOneUse())
4327       return SDValue();
4328     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4329     return DAG.getNode(HandOpcode, DL, VT, Logic);
4330   }
4331
4332   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4333   // Only perform this optimization up until type legalization, before
4334   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4335   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4336   // we don't want to undo this promotion.
4337   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4338   // on scalars.
4339   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4340        Level <= AfterLegalizeTypes) {
4341     // Input types must be integer and the same.
4342     if (XVT.isInteger() && XVT == Y.getValueType()) {
4343       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4344       return DAG.getNode(HandOpcode, DL, VT, Logic);
4345     }
4346   }
4347
4348   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4349   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4350   // If both shuffles use the same mask, and both shuffle within a single
4351   // vector, then it is worthwhile to move the swizzle after the operation.
4352   // The type-legalizer generates this pattern when loading illegal
4353   // vector types from memory. In many cases this allows additional shuffle
4354   // optimizations.
4355   // There are other cases where moving the shuffle after the xor/and/or
4356   // is profitable even if shuffles don't perform a swizzle.
4357   // If both shuffles use the same mask, and both shuffles have the same first
4358   // or second operand, then it might still be profitable to move the shuffle
4359   // after the xor/and/or operation.
4360   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4361     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4362     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4363     assert(X.getValueType() == Y.getValueType() &&
4364            "Inputs to shuffles are not the same type");
4365
4366     // Check that both shuffles use the same mask. The masks are known to be of
4367     // the same length because the result vector type is the same.
4368     // Check also that shuffles have only one use to avoid introducing extra
4369     // instructions.
4370     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4371         !SVN0->getMask().equals(SVN1->getMask()))
4372       return SDValue();
4373
4374     // Don't try to fold this node if it requires introducing a
4375     // build vector of all zeros that might be illegal at this stage.
4376     SDValue ShOp = N0.getOperand(1);
4377     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4378       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4379
4380     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4381     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4382       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4383                                   N0.getOperand(0), N1.getOperand(0));
4384       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4385     }
4386
4387     // Don't try to fold this node if it requires introducing a
4388     // build vector of all zeros that might be illegal at this stage.
4389     ShOp = N0.getOperand(0);
4390     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4391       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4392
4393     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4394     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4395       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4396                                   N1.getOperand(1));
4397       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4398     }
4399   }
4400
4401   return SDValue();
4402 }
4403
4404 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4405 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4406                                        const SDLoc &DL) {
4407   SDValue LL, LR, RL, RR, N0CC, N1CC;
4408   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4409       !isSetCCEquivalent(N1, RL, RR, N1CC))
4410     return SDValue();
4411
4412   assert(N0.getValueType() == N1.getValueType() &&
4413          "Unexpected operand types for bitwise logic op");
4414   assert(LL.getValueType() == LR.getValueType() &&
4415          RL.getValueType() == RR.getValueType() &&
4416          "Unexpected operand types for setcc");
4417
4418   // If we're here post-legalization or the logic op type is not i1, the logic
4419   // op type must match a setcc result type. Also, all folds require new
4420   // operations on the left and right operands, so those types must match.
4421   EVT VT = N0.getValueType();
4422   EVT OpVT = LL.getValueType();
4423   if (LegalOperations || VT.getScalarType() != MVT::i1)
4424     if (VT != getSetCCResultType(OpVT))
4425       return SDValue();
4426   if (OpVT != RL.getValueType())
4427     return SDValue();
4428
4429   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4430   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4431   bool IsInteger = OpVT.isInteger();
4432   if (LR == RR && CC0 == CC1 && IsInteger) {
4433     bool IsZero = isNullOrNullSplat(LR);
4434     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4435
4436     // All bits clear?
4437     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4438     // All sign bits clear?
4439     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4440     // Any bits set?
4441     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4442     // Any sign bits set?
4443     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4444
4445     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4446     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4447     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4448     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4449     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4450       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4451       AddToWorklist(Or.getNode());
4452       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4453     }
4454
4455     // All bits set?
4456     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4457     // All sign bits set?
4458     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4459     // Any bits clear?
4460     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4461     // Any sign bits clear?
4462     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4463
4464     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4465     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4466     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4467     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4468     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4469       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4470       AddToWorklist(And.getNode());
4471       return DAG.getSetCC(DL, VT, And, LR, CC1);
4472     }
4473   }
4474
4475   // TODO: What is the 'or' equivalent of this fold?
4476   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4477   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4478       IsInteger && CC0 == ISD::SETNE &&
4479       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4480        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4481     SDValue One = DAG.getConstant(1, DL, OpVT);
4482     SDValue Two = DAG.getConstant(2, DL, OpVT);
4483     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4484     AddToWorklist(Add.getNode());
4485     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4486   }
4487
4488   // Try more general transforms if the predicates match and the only user of
4489   // the compares is the 'and' or 'or'.
4490   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4491       N0.hasOneUse() && N1.hasOneUse()) {
4492     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4493     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4494     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4495       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4496       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4497       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4498       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4499       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4500     }
4501
4502     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4503     // TODO - support non-uniform vector amounts.
4504     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4505       // Match a shared variable operand and 2 non-opaque constant operands.
4506       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4507       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4508       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4509         // Canonicalize larger constant as C0.
4510         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4511           std::swap(C0, C1);
4512
4513         // The difference of the constants must be a single bit.
4514         const APInt &C0Val = C0->getAPIntValue();
4515         const APInt &C1Val = C1->getAPIntValue();
4516         if ((C0Val - C1Val).isPowerOf2()) {
4517           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4518           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4519           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4520           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4521           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4522           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4523           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4524           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4525         }
4526       }
4527     }
4528   }
4529
4530   // Canonicalize equivalent operands to LL == RL.
4531   if (LL == RR && LR == RL) {
4532     CC1 = ISD::getSetCCSwappedOperands(CC1);
4533     std::swap(RL, RR);
4534   }
4535
4536   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4537   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4538   if (LL == RL && LR == RR) {
4539     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4540                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4541     if (NewCC != ISD::SETCC_INVALID &&
4542         (!LegalOperations ||
4543          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4544           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4545       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4546   }
4547
4548   return SDValue();
4549 }
4550
4551 /// This contains all DAGCombine rules which reduce two values combined by
4552 /// an And operation to a single value. This makes them reusable in the context
4553 /// of visitSELECT(). Rules involving constants are not included as
4554 /// visitSELECT() already handles those cases.
4555 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4556   EVT VT = N1.getValueType();
4557   SDLoc DL(N);
4558
4559   // fold (and x, undef) -> 0
4560   if (N0.isUndef() || N1.isUndef())
4561     return DAG.getConstant(0, DL, VT);
4562
4563   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4564     return V;
4565
4566   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4567       VT.getSizeInBits() <= 64) {
4568     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4569       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4570         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4571         // immediate for an add, but it is legal if its top c2 bits are set,
4572         // transform the ADD so the immediate doesn't need to be materialized
4573         // in a register.
4574         APInt ADDC = ADDI->getAPIntValue();
4575         APInt SRLC = SRLI->getAPIntValue();
4576         if (ADDC.getMinSignedBits() <= 64 &&
4577             SRLC.ult(VT.getSizeInBits()) &&
4578             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4579           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4580                                              SRLC.getZExtValue());
4581           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4582             ADDC |= Mask;
4583             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4584               SDLoc DL0(N0);
4585               SDValue NewAdd =
4586                 DAG.getNode(ISD::ADD, DL0, VT,
4587                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4588               CombineTo(N0.getNode(), NewAdd);
4589               // Return N so it doesn't get rechecked!
4590               return SDValue(N, 0);
4591             }
4592           }
4593         }
4594       }
4595     }
4596   }
4597
4598   // Reduce bit extract of low half of an integer to the narrower type.
4599   // (and (srl i64:x, K), KMask) ->
4600   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4601   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4602     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4603       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4604         unsigned Size = VT.getSizeInBits();
4605         const APInt &AndMask = CAnd->getAPIntValue();
4606         unsigned ShiftBits = CShift->getZExtValue();
4607
4608         // Bail out, this node will probably disappear anyway.
4609         if (ShiftBits == 0)
4610           return SDValue();
4611
4612         unsigned MaskBits = AndMask.countTrailingOnes();
4613         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4614
4615         if (AndMask.isMask() &&
4616             // Required bits must not span the two halves of the integer and
4617             // must fit in the half size type.
4618             (ShiftBits + MaskBits <= Size / 2) &&
4619             TLI.isNarrowingProfitable(VT, HalfVT) &&
4620             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4621             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4622             TLI.isTruncateFree(VT, HalfVT) &&
4623             TLI.isZExtFree(HalfVT, VT)) {
4624           // The isNarrowingProfitable is to avoid regressions on PPC and
4625           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4626           // on downstream users of this. Those patterns could probably be
4627           // extended to handle extensions mixed in.
4628
4629           SDValue SL(N0);
4630           assert(MaskBits <= Size);
4631
4632           // Extracting the highest bit of the low half.
4633           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4634           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4635                                       N0.getOperand(0));
4636
4637           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4638           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4639           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4640           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4641           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4642         }
4643       }
4644     }
4645   }
4646
4647   return SDValue();
4648 }
4649
4650 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4651                                    EVT LoadResultTy, EVT &ExtVT) {
4652   if (!AndC->getAPIntValue().isMask())
4653     return false;
4654
4655   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4656
4657   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4658   EVT LoadedVT = LoadN->getMemoryVT();
4659
4660   if (ExtVT == LoadedVT &&
4661       (!LegalOperations ||
4662        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4663     // ZEXTLOAD will match without needing to change the size of the value being
4664     // loaded.
4665     return true;
4666   }
4667
4668   // Do not change the width of a volatile or atomic loads.
4669   if (!LoadN->isSimple())
4670     return false;
4671
4672   // Do not generate loads of non-round integer types since these can
4673   // be expensive (and would be wrong if the type is not byte sized).
4674   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4675     return false;
4676
4677   if (LegalOperations &&
4678       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4679     return false;
4680
4681   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4682     return false;
4683
4684   return true;
4685 }
4686
4687 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4688                                     ISD::LoadExtType ExtType, EVT &MemVT,
4689                                     unsigned ShAmt) {
4690   if (!LDST)
4691     return false;
4692   // Only allow byte offsets.
4693   if (ShAmt % 8)
4694     return false;
4695
4696   // Do not generate loads of non-round integer types since these can
4697   // be expensive (and would be wrong if the type is not byte sized).
4698   if (!MemVT.isRound())
4699     return false;
4700
4701   // Don't change the width of a volatile or atomic loads.
4702   if (!LDST->isSimple())
4703     return false;
4704
4705   // Verify that we are actually reducing a load width here.
4706   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4707     return false;
4708
4709   // Ensure that this isn't going to produce an unsupported unaligned access.
4710   if (ShAmt &&
4711       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4712                               LDST->getAddressSpace(), ShAmt / 8,
4713                               LDST->getMemOperand()->getFlags()))
4714     return false;
4715
4716   // It's not possible to generate a constant of extended or untyped type.
4717   EVT PtrType = LDST->getBasePtr().getValueType();
4718   if (PtrType == MVT::Untyped || PtrType.isExtended())
4719     return false;
4720
4721   if (isa<LoadSDNode>(LDST)) {
4722     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4723     // Don't transform one with multiple uses, this would require adding a new
4724     // load.
4725     if (!SDValue(Load, 0).hasOneUse())
4726       return false;
4727
4728     if (LegalOperations &&
4729         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4730       return false;
4731
4732     // For the transform to be legal, the load must produce only two values
4733     // (the value loaded and the chain).  Don't transform a pre-increment
4734     // load, for example, which produces an extra value.  Otherwise the
4735     // transformation is not equivalent, and the downstream logic to replace
4736     // uses gets things wrong.
4737     if (Load->getNumValues() > 2)
4738       return false;
4739
4740     // If the load that we're shrinking is an extload and we're not just
4741     // discarding the extension we can't simply shrink the load. Bail.
4742     // TODO: It would be possible to merge the extensions in some cases.
4743     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4744         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4745       return false;
4746
4747     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4748       return false;
4749   } else {
4750     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4751     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4752     // Can't write outside the original store
4753     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4754       return false;
4755
4756     if (LegalOperations &&
4757         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4758       return false;
4759   }
4760   return true;
4761 }
4762
4763 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4764                                     SmallVectorImpl<LoadSDNode*> &Loads,
4765                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4766                                     ConstantSDNode *Mask,
4767                                     SDNode *&NodeToMask) {
4768   // Recursively search for the operands, looking for loads which can be
4769   // narrowed.
4770   for (SDValue Op : N->op_values()) {
4771     if (Op.getValueType().isVector())
4772       return false;
4773
4774     // Some constants may need fixing up later if they are too large.
4775     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4776       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4777           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4778         NodesWithConsts.insert(N);
4779       continue;
4780     }
4781
4782     if (!Op.hasOneUse())
4783       return false;
4784
4785     switch(Op.getOpcode()) {
4786     case ISD::LOAD: {
4787       auto *Load = cast<LoadSDNode>(Op);
4788       EVT ExtVT;
4789       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4790           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4791
4792         // ZEXTLOAD is already small enough.
4793         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4794             ExtVT.bitsGE(Load->getMemoryVT()))
4795           continue;
4796
4797         // Use LE to convert equal sized loads to zext.
4798         if (ExtVT.bitsLE(Load->getMemoryVT()))
4799           Loads.push_back(Load);
4800
4801         continue;
4802       }
4803       return false;
4804     }
4805     case ISD::ZERO_EXTEND:
4806     case ISD::AssertZext: {
4807       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4808       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4809       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4810         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4811         Op.getOperand(0).getValueType();
4812
4813       // We can accept extending nodes if the mask is wider or an equal
4814       // width to the original type.
4815       if (ExtVT.bitsGE(VT))
4816         continue;
4817       break;
4818     }
4819     case ISD::OR:
4820     case ISD::XOR:
4821     case ISD::AND:
4822       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4823                              NodeToMask))
4824         return false;
4825       continue;
4826     }
4827
4828     // Allow one node which will masked along with any loads found.
4829     if (NodeToMask)
4830       return false;
4831
4832     // Also ensure that the node to be masked only produces one data result.
4833     NodeToMask = Op.getNode();
4834     if (NodeToMask->getNumValues() > 1) {
4835       bool HasValue = false;
4836       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4837         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4838         if (VT != MVT::Glue && VT != MVT::Other) {
4839           if (HasValue) {
4840             NodeToMask = nullptr;
4841             return false;
4842           }
4843           HasValue = true;
4844         }
4845       }
4846       assert(HasValue && "Node to be masked has no data result?");
4847     }
4848   }
4849   return true;
4850 }
4851
4852 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4853   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4854   if (!Mask)
4855     return false;
4856
4857   if (!Mask->getAPIntValue().isMask())
4858     return false;
4859
4860   // No need to do anything if the and directly uses a load.
4861   if (isa<LoadSDNode>(N->getOperand(0)))
4862     return false;
4863
4864   SmallVector<LoadSDNode*, 8> Loads;
4865   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4866   SDNode *FixupNode = nullptr;
4867   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4868     if (Loads.size() == 0)
4869       return false;
4870
4871     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4872     SDValue MaskOp = N->getOperand(1);
4873
4874     // If it exists, fixup the single node we allow in the tree that needs
4875     // masking.
4876     if (FixupNode) {
4877       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4878       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4879                                 FixupNode->getValueType(0),
4880                                 SDValue(FixupNode, 0), MaskOp);
4881       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4882       if (And.getOpcode() == ISD ::AND)
4883         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4884     }
4885
4886     // Narrow any constants that need it.
4887     for (auto *LogicN : NodesWithConsts) {
4888       SDValue Op0 = LogicN->getOperand(0);
4889       SDValue Op1 = LogicN->getOperand(1);
4890
4891       if (isa<ConstantSDNode>(Op0))
4892           std::swap(Op0, Op1);
4893
4894       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4895                                 Op1, MaskOp);
4896
4897       DAG.UpdateNodeOperands(LogicN, Op0, And);
4898     }
4899
4900     // Create narrow loads.
4901     for (auto *Load : Loads) {
4902       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4903       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4904                                 SDValue(Load, 0), MaskOp);
4905       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4906       if (And.getOpcode() == ISD ::AND)
4907         And = SDValue(
4908             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4909       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4910       assert(NewLoad &&
4911              "Shouldn't be masking the load if it can't be narrowed");
4912       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4913     }
4914     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4915     return true;
4916   }
4917   return false;
4918 }
4919
4920 // Unfold
4921 //    x &  (-1 'logical shift' y)
4922 // To
4923 //    (x 'opposite logical shift' y) 'logical shift' y
4924 // if it is better for performance.
4925 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4926   assert(N->getOpcode() == ISD::AND);
4927
4928   SDValue N0 = N->getOperand(0);
4929   SDValue N1 = N->getOperand(1);
4930
4931   // Do we actually prefer shifts over mask?
4932   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
4933     return SDValue();
4934
4935   // Try to match  (-1 '[outer] logical shift' y)
4936   unsigned OuterShift;
4937   unsigned InnerShift; // The opposite direction to the OuterShift.
4938   SDValue Y;           // Shift amount.
4939   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4940     if (!M.hasOneUse())
4941       return false;
4942     OuterShift = M->getOpcode();
4943     if (OuterShift == ISD::SHL)
4944       InnerShift = ISD::SRL;
4945     else if (OuterShift == ISD::SRL)
4946       InnerShift = ISD::SHL;
4947     else
4948       return false;
4949     if (!isAllOnesConstant(M->getOperand(0)))
4950       return false;
4951     Y = M->getOperand(1);
4952     return true;
4953   };
4954
4955   SDValue X;
4956   if (matchMask(N1))
4957     X = N0;
4958   else if (matchMask(N0))
4959     X = N1;
4960   else
4961     return SDValue();
4962
4963   SDLoc DL(N);
4964   EVT VT = N->getValueType(0);
4965
4966   //     tmp = x   'opposite logical shift' y
4967   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4968   //     ret = tmp 'logical shift' y
4969   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4970
4971   return T1;
4972 }
4973
4974 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
4975 /// For a target with a bit test, this is expected to become test + set and save
4976 /// at least 1 instruction.
4977 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
4978   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
4979
4980   // This is probably not worthwhile without a supported type.
4981   EVT VT = And->getValueType(0);
4982   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4983   if (!TLI.isTypeLegal(VT))
4984     return SDValue();
4985
4986   // Look through an optional extension and find a 'not'.
4987   // TODO: Should we favor test+set even without the 'not' op?
4988   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
4989   if (Not.getOpcode() == ISD::ANY_EXTEND)
4990     Not = Not.getOperand(0);
4991   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
4992     return SDValue();
4993
4994   // Look though an optional truncation. The source operand may not be the same
4995   // type as the original 'and', but that is ok because we are masking off
4996   // everything but the low bit.
4997   SDValue Srl = Not.getOperand(0);
4998   if (Srl.getOpcode() == ISD::TRUNCATE)
4999     Srl = Srl.getOperand(0);
5000
5001   // Match a shift-right by constant.
5002   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5003       !isa<ConstantSDNode>(Srl.getOperand(1)))
5004     return SDValue();
5005
5006   // We might have looked through casts that make this transform invalid.
5007   // TODO: If the source type is wider than the result type, do the mask and
5008   //       compare in the source type.
5009   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5010   unsigned VTBitWidth = VT.getSizeInBits();
5011   if (ShiftAmt.uge(VTBitWidth))
5012     return SDValue();
5013
5014   // Turn this into a bit-test pattern using mask op + setcc:
5015   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5016   SDLoc DL(And);
5017   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5018   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5019   SDValue Mask = DAG.getConstant(
5020       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5021   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5022   SDValue Zero = DAG.getConstant(0, DL, VT);
5023   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5024   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5025 }
5026
5027 SDValue DAGCombiner::visitAND(SDNode *N) {
5028   SDValue N0 = N->getOperand(0);
5029   SDValue N1 = N->getOperand(1);
5030   EVT VT = N1.getValueType();
5031
5032   // x & x --> x
5033   if (N0 == N1)
5034     return N0;
5035
5036   // fold vector ops
5037   if (VT.isVector()) {
5038     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5039       return FoldedVOp;
5040
5041     // fold (and x, 0) -> 0, vector edition
5042     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5043       // do not return N0, because undef node may exist in N0
5044       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5045                              SDLoc(N), N0.getValueType());
5046     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5047       // do not return N1, because undef node may exist in N1
5048       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5049                              SDLoc(N), N1.getValueType());
5050
5051     // fold (and x, -1) -> x, vector edition
5052     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5053       return N1;
5054     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5055       return N0;
5056   }
5057
5058   // fold (and c1, c2) -> c1&c2
5059   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5060   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5061   if (N0C && N1C && !N1C->isOpaque())
5062     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
5063   // canonicalize constant to RHS
5064   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5065       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5066     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5067   // fold (and x, -1) -> x
5068   if (isAllOnesConstant(N1))
5069     return N0;
5070   // if (and x, c) is known to be zero, return 0
5071   unsigned BitWidth = VT.getScalarSizeInBits();
5072   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5073                                    APInt::getAllOnesValue(BitWidth)))
5074     return DAG.getConstant(0, SDLoc(N), VT);
5075
5076   if (SDValue NewSel = foldBinOpIntoSelect(N))
5077     return NewSel;
5078
5079   // reassociate and
5080   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5081     return RAND;
5082
5083   // Try to convert a constant mask AND into a shuffle clear mask.
5084   if (VT.isVector())
5085     if (SDValue Shuffle = XformToShuffleWithZero(N))
5086       return Shuffle;
5087
5088   // fold (and (or x, C), D) -> D if (C & D) == D
5089   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5090     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5091   };
5092   if (N0.getOpcode() == ISD::OR &&
5093       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5094     return N1;
5095   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5096   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5097     SDValue N0Op0 = N0.getOperand(0);
5098     APInt Mask = ~N1C->getAPIntValue();
5099     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5100     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5101       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5102                                  N0.getValueType(), N0Op0);
5103
5104       // Replace uses of the AND with uses of the Zero extend node.
5105       CombineTo(N, Zext);
5106
5107       // We actually want to replace all uses of the any_extend with the
5108       // zero_extend, to avoid duplicating things.  This will later cause this
5109       // AND to be folded.
5110       CombineTo(N0.getNode(), Zext);
5111       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5112     }
5113   }
5114
5115   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5116   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5117   // already be zero by virtue of the width of the base type of the load.
5118   //
5119   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5120   // more cases.
5121   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5122        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5123        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5124        N0.getOperand(0).getResNo() == 0) ||
5125       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5126     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5127                                          N0 : N0.getOperand(0) );
5128
5129     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5130     // This can be a pure constant or a vector splat, in which case we treat the
5131     // vector as a scalar and use the splat value.
5132     APInt Constant = APInt::getNullValue(1);
5133     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5134       Constant = C->getAPIntValue();
5135     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5136       APInt SplatValue, SplatUndef;
5137       unsigned SplatBitSize;
5138       bool HasAnyUndefs;
5139       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5140                                              SplatBitSize, HasAnyUndefs);
5141       if (IsSplat) {
5142         // Undef bits can contribute to a possible optimisation if set, so
5143         // set them.
5144         SplatValue |= SplatUndef;
5145
5146         // The splat value may be something like "0x00FFFFFF", which means 0 for
5147         // the first vector value and FF for the rest, repeating. We need a mask
5148         // that will apply equally to all members of the vector, so AND all the
5149         // lanes of the constant together.
5150         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5151
5152         // If the splat value has been compressed to a bitlength lower
5153         // than the size of the vector lane, we need to re-expand it to
5154         // the lane size.
5155         if (EltBitWidth > SplatBitSize)
5156           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5157                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5158             SplatValue |= SplatValue.shl(SplatBitSize);
5159
5160         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5161         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5162         if ((SplatBitSize % EltBitWidth) == 0) {
5163           Constant = APInt::getAllOnesValue(EltBitWidth);
5164           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5165             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5166         }
5167       }
5168     }
5169
5170     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5171     // actually legal and isn't going to get expanded, else this is a false
5172     // optimisation.
5173     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5174                                                     Load->getValueType(0),
5175                                                     Load->getMemoryVT());
5176
5177     // Resize the constant to the same size as the original memory access before
5178     // extension. If it is still the AllOnesValue then this AND is completely
5179     // unneeded.
5180     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5181
5182     bool B;
5183     switch (Load->getExtensionType()) {
5184     default: B = false; break;
5185     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5186     case ISD::ZEXTLOAD:
5187     case ISD::NON_EXTLOAD: B = true; break;
5188     }
5189
5190     if (B && Constant.isAllOnesValue()) {
5191       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5192       // preserve semantics once we get rid of the AND.
5193       SDValue NewLoad(Load, 0);
5194
5195       // Fold the AND away. NewLoad may get replaced immediately.
5196       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5197
5198       if (Load->getExtensionType() == ISD::EXTLOAD) {
5199         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5200                               Load->getValueType(0), SDLoc(Load),
5201                               Load->getChain(), Load->getBasePtr(),
5202                               Load->getOffset(), Load->getMemoryVT(),
5203                               Load->getMemOperand());
5204         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5205         if (Load->getNumValues() == 3) {
5206           // PRE/POST_INC loads have 3 values.
5207           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5208                            NewLoad.getValue(2) };
5209           CombineTo(Load, To, 3, true);
5210         } else {
5211           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5212         }
5213       }
5214
5215       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5216     }
5217   }
5218
5219   // fold (and (load x), 255) -> (zextload x, i8)
5220   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5221   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5222   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5223                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5224                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5225     if (SDValue Res = ReduceLoadWidth(N)) {
5226       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5227         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5228       AddToWorklist(N);
5229       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5230       return SDValue(N, 0);
5231     }
5232   }
5233
5234   if (Level >= AfterLegalizeTypes) {
5235     // Attempt to propagate the AND back up to the leaves which, if they're
5236     // loads, can be combined to narrow loads and the AND node can be removed.
5237     // Perform after legalization so that extend nodes will already be
5238     // combined into the loads.
5239     if (BackwardsPropagateMask(N, DAG)) {
5240       return SDValue(N, 0);
5241     }
5242   }
5243
5244   if (SDValue Combined = visitANDLike(N0, N1, N))
5245     return Combined;
5246
5247   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5248   if (N0.getOpcode() == N1.getOpcode())
5249     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5250       return V;
5251
5252   // Masking the negated extension of a boolean is just the zero-extended
5253   // boolean:
5254   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5255   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5256   //
5257   // Note: the SimplifyDemandedBits fold below can make an information-losing
5258   // transform, and then we have no way to find this better fold.
5259   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5260     if (isNullOrNullSplat(N0.getOperand(0))) {
5261       SDValue SubRHS = N0.getOperand(1);
5262       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5263           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5264         return SubRHS;
5265       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5266           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5267         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5268     }
5269   }
5270
5271   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5272   // fold (and (sra)) -> (and (srl)) when possible.
5273   if (SimplifyDemandedBits(SDValue(N, 0)))
5274     return SDValue(N, 0);
5275
5276   // fold (zext_inreg (extload x)) -> (zextload x)
5277   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5278   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5279       (ISD::isEXTLoad(N0.getNode()) ||
5280        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5281     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5282     EVT MemVT = LN0->getMemoryVT();
5283     // If we zero all the possible extended bits, then we can turn this into
5284     // a zextload if we are running before legalize or the operation is legal.
5285     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5286     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5287     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5288     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5289         ((!LegalOperations && LN0->isSimple()) ||
5290          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5291       SDValue ExtLoad =
5292           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5293                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5294       AddToWorklist(N);
5295       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5296       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5297     }
5298   }
5299
5300   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5301   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5302     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5303                                            N0.getOperand(1), false))
5304       return BSwap;
5305   }
5306
5307   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5308     return Shifts;
5309
5310   if (TLI.hasBitTest(N0, N1))
5311     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5312       return V;
5313
5314   return SDValue();
5315 }
5316
5317 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5318 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5319                                         bool DemandHighBits) {
5320   if (!LegalOperations)
5321     return SDValue();
5322
5323   EVT VT = N->getValueType(0);
5324   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5325     return SDValue();
5326   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5327     return SDValue();
5328
5329   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5330   bool LookPassAnd0 = false;
5331   bool LookPassAnd1 = false;
5332   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5333       std::swap(N0, N1);
5334   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5335       std::swap(N0, N1);
5336   if (N0.getOpcode() == ISD::AND) {
5337     if (!N0.getNode()->hasOneUse())
5338       return SDValue();
5339     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5340     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5341     // This is needed for X86.
5342     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5343                   N01C->getZExtValue() != 0xFFFF))
5344       return SDValue();
5345     N0 = N0.getOperand(0);
5346     LookPassAnd0 = true;
5347   }
5348
5349   if (N1.getOpcode() == ISD::AND) {
5350     if (!N1.getNode()->hasOneUse())
5351       return SDValue();
5352     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5353     if (!N11C || N11C->getZExtValue() != 0xFF)
5354       return SDValue();
5355     N1 = N1.getOperand(0);
5356     LookPassAnd1 = true;
5357   }
5358
5359   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5360     std::swap(N0, N1);
5361   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5362     return SDValue();
5363   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5364     return SDValue();
5365
5366   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5367   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5368   if (!N01C || !N11C)
5369     return SDValue();
5370   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5371     return SDValue();
5372
5373   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5374   SDValue N00 = N0->getOperand(0);
5375   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5376     if (!N00.getNode()->hasOneUse())
5377       return SDValue();
5378     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5379     if (!N001C || N001C->getZExtValue() != 0xFF)
5380       return SDValue();
5381     N00 = N00.getOperand(0);
5382     LookPassAnd0 = true;
5383   }
5384
5385   SDValue N10 = N1->getOperand(0);
5386   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5387     if (!N10.getNode()->hasOneUse())
5388       return SDValue();
5389     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5390     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5391     // for X86.
5392     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5393                    N101C->getZExtValue() != 0xFFFF))
5394       return SDValue();
5395     N10 = N10.getOperand(0);
5396     LookPassAnd1 = true;
5397   }
5398
5399   if (N00 != N10)
5400     return SDValue();
5401
5402   // Make sure everything beyond the low halfword gets set to zero since the SRL
5403   // 16 will clear the top bits.
5404   unsigned OpSizeInBits = VT.getSizeInBits();
5405   if (DemandHighBits && OpSizeInBits > 16) {
5406     // If the left-shift isn't masked out then the only way this is a bswap is
5407     // if all bits beyond the low 8 are 0. In that case the entire pattern
5408     // reduces to a left shift anyway: leave it for other parts of the combiner.
5409     if (!LookPassAnd0)
5410       return SDValue();
5411
5412     // However, if the right shift isn't masked out then it might be because
5413     // it's not needed. See if we can spot that too.
5414     if (!LookPassAnd1 &&
5415         !DAG.MaskedValueIsZero(
5416             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5417       return SDValue();
5418   }
5419
5420   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5421   if (OpSizeInBits > 16) {
5422     SDLoc DL(N);
5423     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5424                       DAG.getConstant(OpSizeInBits - 16, DL,
5425                                       getShiftAmountTy(VT)));
5426   }
5427   return Res;
5428 }
5429
5430 /// Return true if the specified node is an element that makes up a 32-bit
5431 /// packed halfword byteswap.
5432 /// ((x & 0x000000ff) << 8) |
5433 /// ((x & 0x0000ff00) >> 8) |
5434 /// ((x & 0x00ff0000) << 8) |
5435 /// ((x & 0xff000000) >> 8)
5436 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5437   if (!N.getNode()->hasOneUse())
5438     return false;
5439
5440   unsigned Opc = N.getOpcode();
5441   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5442     return false;
5443
5444   SDValue N0 = N.getOperand(0);
5445   unsigned Opc0 = N0.getOpcode();
5446   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5447     return false;
5448
5449   ConstantSDNode *N1C = nullptr;
5450   // SHL or SRL: look upstream for AND mask operand
5451   if (Opc == ISD::AND)
5452     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5453   else if (Opc0 == ISD::AND)
5454     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5455   if (!N1C)
5456     return false;
5457
5458   unsigned MaskByteOffset;
5459   switch (N1C->getZExtValue()) {
5460   default:
5461     return false;
5462   case 0xFF:       MaskByteOffset = 0; break;
5463   case 0xFF00:     MaskByteOffset = 1; break;
5464   case 0xFFFF:
5465     // In case demanded bits didn't clear the bits that will be shifted out.
5466     // This is needed for X86.
5467     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5468       MaskByteOffset = 1;
5469       break;
5470     }
5471     return false;
5472   case 0xFF0000:   MaskByteOffset = 2; break;
5473   case 0xFF000000: MaskByteOffset = 3; break;
5474   }
5475
5476   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5477   if (Opc == ISD::AND) {
5478     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5479       // (x >> 8) & 0xff
5480       // (x >> 8) & 0xff0000
5481       if (Opc0 != ISD::SRL)
5482         return false;
5483       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5484       if (!C || C->getZExtValue() != 8)
5485         return false;
5486     } else {
5487       // (x << 8) & 0xff00
5488       // (x << 8) & 0xff000000
5489       if (Opc0 != ISD::SHL)
5490         return false;
5491       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5492       if (!C || C->getZExtValue() != 8)
5493         return false;
5494     }
5495   } else if (Opc == ISD::SHL) {
5496     // (x & 0xff) << 8
5497     // (x & 0xff0000) << 8
5498     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5499       return false;
5500     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5501     if (!C || C->getZExtValue() != 8)
5502       return false;
5503   } else { // Opc == ISD::SRL
5504     // (x & 0xff00) >> 8
5505     // (x & 0xff000000) >> 8
5506     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5507       return false;
5508     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5509     if (!C || C->getZExtValue() != 8)
5510       return false;
5511   }
5512
5513   if (Parts[MaskByteOffset])
5514     return false;
5515
5516   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5517   return true;
5518 }
5519
5520 /// Match a 32-bit packed halfword bswap. That is
5521 /// ((x & 0x000000ff) << 8) |
5522 /// ((x & 0x0000ff00) >> 8) |
5523 /// ((x & 0x00ff0000) << 8) |
5524 /// ((x & 0xff000000) >> 8)
5525 /// => (rotl (bswap x), 16)
5526 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5527   if (!LegalOperations)
5528     return SDValue();
5529
5530   EVT VT = N->getValueType(0);
5531   if (VT != MVT::i32)
5532     return SDValue();
5533   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5534     return SDValue();
5535
5536   // Look for either
5537   // (or (or (and), (and)), (or (and), (and)))
5538   // (or (or (or (and), (and)), (and)), (and))
5539   if (N0.getOpcode() != ISD::OR)
5540     return SDValue();
5541   SDValue N00 = N0.getOperand(0);
5542   SDValue N01 = N0.getOperand(1);
5543   SDNode *Parts[4] = {};
5544
5545   if (N1.getOpcode() == ISD::OR &&
5546       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5547     // (or (or (and), (and)), (or (and), (and)))
5548     if (!isBSwapHWordElement(N00, Parts))
5549       return SDValue();
5550
5551     if (!isBSwapHWordElement(N01, Parts))
5552       return SDValue();
5553     SDValue N10 = N1.getOperand(0);
5554     if (!isBSwapHWordElement(N10, Parts))
5555       return SDValue();
5556     SDValue N11 = N1.getOperand(1);
5557     if (!isBSwapHWordElement(N11, Parts))
5558       return SDValue();
5559   } else {
5560     // (or (or (or (and), (and)), (and)), (and))
5561     if (!isBSwapHWordElement(N1, Parts))
5562       return SDValue();
5563     if (!isBSwapHWordElement(N01, Parts))
5564       return SDValue();
5565     if (N00.getOpcode() != ISD::OR)
5566       return SDValue();
5567     SDValue N000 = N00.getOperand(0);
5568     if (!isBSwapHWordElement(N000, Parts))
5569       return SDValue();
5570     SDValue N001 = N00.getOperand(1);
5571     if (!isBSwapHWordElement(N001, Parts))
5572       return SDValue();
5573   }
5574
5575   // Make sure the parts are all coming from the same node.
5576   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5577     return SDValue();
5578
5579   SDLoc DL(N);
5580   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5581                               SDValue(Parts[0], 0));
5582
5583   // Result of the bswap should be rotated by 16. If it's not legal, then
5584   // do  (x << 16) | (x >> 16).
5585   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5586   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5587     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5588   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5589     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5590   return DAG.getNode(ISD::OR, DL, VT,
5591                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5592                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5593 }
5594
5595 /// This contains all DAGCombine rules which reduce two values combined by
5596 /// an Or operation to a single value \see visitANDLike().
5597 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5598   EVT VT = N1.getValueType();
5599   SDLoc DL(N);
5600
5601   // fold (or x, undef) -> -1
5602   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5603     return DAG.getAllOnesConstant(DL, VT);
5604
5605   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5606     return V;
5607
5608   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5609   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5610       // Don't increase # computations.
5611       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5612     // We can only do this xform if we know that bits from X that are set in C2
5613     // but not in C1 are already zero.  Likewise for Y.
5614     if (const ConstantSDNode *N0O1C =
5615         getAsNonOpaqueConstant(N0.getOperand(1))) {
5616       if (const ConstantSDNode *N1O1C =
5617           getAsNonOpaqueConstant(N1.getOperand(1))) {
5618         // We can only do this xform if we know that bits from X that are set in
5619         // C2 but not in C1 are already zero.  Likewise for Y.
5620         const APInt &LHSMask = N0O1C->getAPIntValue();
5621         const APInt &RHSMask = N1O1C->getAPIntValue();
5622
5623         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5624             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5625           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5626                                   N0.getOperand(0), N1.getOperand(0));
5627           return DAG.getNode(ISD::AND, DL, VT, X,
5628                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5629         }
5630       }
5631     }
5632   }
5633
5634   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5635   if (N0.getOpcode() == ISD::AND &&
5636       N1.getOpcode() == ISD::AND &&
5637       N0.getOperand(0) == N1.getOperand(0) &&
5638       // Don't increase # computations.
5639       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5640     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5641                             N0.getOperand(1), N1.getOperand(1));
5642     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5643   }
5644
5645   return SDValue();
5646 }
5647
5648 /// OR combines for which the commuted variant will be tried as well.
5649 static SDValue visitORCommutative(
5650     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5651   EVT VT = N0.getValueType();
5652   if (N0.getOpcode() == ISD::AND) {
5653     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5654     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5655       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5656
5657     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5658     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5659       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5660   }
5661
5662   return SDValue();
5663 }
5664
5665 SDValue DAGCombiner::visitOR(SDNode *N) {
5666   SDValue N0 = N->getOperand(0);
5667   SDValue N1 = N->getOperand(1);
5668   EVT VT = N1.getValueType();
5669
5670   // x | x --> x
5671   if (N0 == N1)
5672     return N0;
5673
5674   // fold vector ops
5675   if (VT.isVector()) {
5676     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5677       return FoldedVOp;
5678
5679     // fold (or x, 0) -> x, vector edition
5680     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5681       return N1;
5682     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5683       return N0;
5684
5685     // fold (or x, -1) -> -1, vector edition
5686     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5687       // do not return N0, because undef node may exist in N0
5688       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5689     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5690       // do not return N1, because undef node may exist in N1
5691       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5692
5693     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5694     // Do this only if the resulting shuffle is legal.
5695     if (isa<ShuffleVectorSDNode>(N0) &&
5696         isa<ShuffleVectorSDNode>(N1) &&
5697         // Avoid folding a node with illegal type.
5698         TLI.isTypeLegal(VT)) {
5699       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5700       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5701       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5702       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5703       // Ensure both shuffles have a zero input.
5704       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5705         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5706         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5707         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5708         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5709         bool CanFold = true;
5710         int NumElts = VT.getVectorNumElements();
5711         SmallVector<int, 4> Mask(NumElts);
5712
5713         for (int i = 0; i != NumElts; ++i) {
5714           int M0 = SV0->getMaskElt(i);
5715           int M1 = SV1->getMaskElt(i);
5716
5717           // Determine if either index is pointing to a zero vector.
5718           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5719           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5720
5721           // If one element is zero and the otherside is undef, keep undef.
5722           // This also handles the case that both are undef.
5723           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5724             Mask[i] = -1;
5725             continue;
5726           }
5727
5728           // Make sure only one of the elements is zero.
5729           if (M0Zero == M1Zero) {
5730             CanFold = false;
5731             break;
5732           }
5733
5734           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5735
5736           // We have a zero and non-zero element. If the non-zero came from
5737           // SV0 make the index a LHS index. If it came from SV1, make it
5738           // a RHS index. We need to mod by NumElts because we don't care
5739           // which operand it came from in the original shuffles.
5740           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5741         }
5742
5743         if (CanFold) {
5744           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5745           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5746
5747           SDValue LegalShuffle =
5748               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
5749                                           Mask, DAG);
5750           if (LegalShuffle)
5751             return LegalShuffle;
5752         }
5753       }
5754     }
5755   }
5756
5757   // fold (or c1, c2) -> c1|c2
5758   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5759   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5760   if (N0C && N1C && !N1C->isOpaque())
5761     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5762   // canonicalize constant to RHS
5763   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5764      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5765     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5766   // fold (or x, 0) -> x
5767   if (isNullConstant(N1))
5768     return N0;
5769   // fold (or x, -1) -> -1
5770   if (isAllOnesConstant(N1))
5771     return N1;
5772
5773   if (SDValue NewSel = foldBinOpIntoSelect(N))
5774     return NewSel;
5775
5776   // fold (or x, c) -> c iff (x & ~c) == 0
5777   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5778     return N1;
5779
5780   if (SDValue Combined = visitORLike(N0, N1, N))
5781     return Combined;
5782
5783   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5784   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5785     return BSwap;
5786   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5787     return BSwap;
5788
5789   // reassociate or
5790   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5791     return ROR;
5792
5793   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5794   // iff (c1 & c2) != 0 or c1/c2 are undef.
5795   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5796     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5797   };
5798   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5799       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5800     if (SDValue COR = DAG.FoldConstantArithmetic(
5801             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5802       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5803       AddToWorklist(IOR.getNode());
5804       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5805     }
5806   }
5807
5808   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
5809     return Combined;
5810   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
5811     return Combined;
5812
5813   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5814   if (N0.getOpcode() == N1.getOpcode())
5815     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5816       return V;
5817
5818   // See if this is some rotate idiom.
5819   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5820     return SDValue(Rot, 0);
5821
5822   if (SDValue Load = MatchLoadCombine(N))
5823     return Load;
5824
5825   // Simplify the operands using demanded-bits information.
5826   if (SimplifyDemandedBits(SDValue(N, 0)))
5827     return SDValue(N, 0);
5828
5829   // If OR can be rewritten into ADD, try combines based on ADD.
5830   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
5831       DAG.haveNoCommonBitsSet(N0, N1))
5832     if (SDValue Combined = visitADDLike(N))
5833       return Combined;
5834
5835   return SDValue();
5836 }
5837
5838 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5839   if (Op.getOpcode() == ISD::AND &&
5840       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5841     Mask = Op.getOperand(1);
5842     return Op.getOperand(0);
5843   }
5844   return Op;
5845 }
5846
5847 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5848 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5849                             SDValue &Mask) {
5850   Op = stripConstantMask(DAG, Op, Mask);
5851   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5852     Shift = Op;
5853     return true;
5854   }
5855   return false;
5856 }
5857
5858 /// Helper function for visitOR to extract the needed side of a rotate idiom
5859 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5860 /// InstCombine merged some outside op with one of the shifts from
5861 /// the rotate pattern.
5862 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5863 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5864 /// patterns:
5865 ///
5866 ///   (or (add v v) (shrl v bitwidth-1)):
5867 ///     expands (add v v) -> (shl v 1)
5868 ///
5869 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5870 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5871 ///
5872 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5873 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5874 ///
5875 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5876 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5877 ///
5878 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5879 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5880 ///
5881 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5882 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5883                                      SDValue ExtractFrom, SDValue &Mask,
5884                                      const SDLoc &DL) {
5885   assert(OppShift && ExtractFrom && "Empty SDValue");
5886   assert(
5887       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5888       "Existing shift must be valid as a rotate half");
5889
5890   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5891
5892   // Value and Type of the shift.
5893   SDValue OppShiftLHS = OppShift.getOperand(0);
5894   EVT ShiftedVT = OppShiftLHS.getValueType();
5895
5896   // Amount of the existing shift.
5897   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5898
5899   // (add v v) -> (shl v 1)
5900   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
5901       ExtractFrom.getOpcode() == ISD::ADD &&
5902       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
5903       ExtractFrom.getOperand(0) == OppShiftLHS &&
5904       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
5905     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
5906                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
5907
5908   // Preconditions:
5909   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5910   //
5911   // Find opcode of the needed shift to be extracted from (op0 v c0).
5912   unsigned Opcode = ISD::DELETED_NODE;
5913   bool IsMulOrDiv = false;
5914   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5915   // opcode or its arithmetic (mul or udiv) variant.
5916   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5917     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5918     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5919       return false;
5920     Opcode = NeededShift;
5921     return true;
5922   };
5923   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5924   // that the needed shift can be extracted from.
5925   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5926       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5927     return SDValue();
5928
5929   // op0 must be the same opcode on both sides, have the same LHS argument,
5930   // and produce the same value type.
5931   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5932       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5933       ShiftedVT != ExtractFrom.getValueType())
5934     return SDValue();
5935
5936   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5937   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5938   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5939   ConstantSDNode *ExtractFromCst =
5940       isConstOrConstSplat(ExtractFrom.getOperand(1));
5941   // TODO: We should be able to handle non-uniform constant vectors for these values
5942   // Check that we have constant values.
5943   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5944       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5945       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5946     return SDValue();
5947
5948   // Compute the shift amount we need to extract to complete the rotate.
5949   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5950   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5951     return SDValue();
5952   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5953   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5954   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5955   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5956   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5957
5958   // Now try extract the needed shift from the ExtractFrom op and see if the
5959   // result matches up with the existing shift's LHS op.
5960   if (IsMulOrDiv) {
5961     // Op to extract from is a mul or udiv by a constant.
5962     // Check:
5963     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5964     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5965     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5966                                                  NeededShiftAmt.getZExtValue());
5967     APInt ResultAmt;
5968     APInt Rem;
5969     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5970     if (Rem != 0 || ResultAmt != OppLHSAmt)
5971       return SDValue();
5972   } else {
5973     // Op to extract from is a shift by a constant.
5974     // Check:
5975     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5976     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5977                                           ExtractFromAmt.getBitWidth()))
5978       return SDValue();
5979   }
5980
5981   // Return the expanded shift op that should allow a rotate to be formed.
5982   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5983   EVT ResVT = ExtractFrom.getValueType();
5984   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5985   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5986 }
5987
5988 // Return true if we can prove that, whenever Neg and Pos are both in the
5989 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5990 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5991 //
5992 //     (or (shift1 X, Neg), (shift2 X, Pos))
5993 //
5994 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5995 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
5996 // to consider shift amounts with defined behavior.
5997 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5998                            SelectionDAG &DAG) {
5999   // If EltSize is a power of 2 then:
6000   //
6001   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6002   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6003   //
6004   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6005   // for the stronger condition:
6006   //
6007   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6008   //
6009   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6010   // we can just replace Neg with Neg' for the rest of the function.
6011   //
6012   // In other cases we check for the even stronger condition:
6013   //
6014   //     Neg == EltSize - Pos                                    [B]
6015   //
6016   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6017   // behavior if Pos == 0 (and consequently Neg == EltSize).
6018   //
6019   // We could actually use [A] whenever EltSize is a power of 2, but the
6020   // only extra cases that it would match are those uninteresting ones
6021   // where Neg and Pos are never in range at the same time.  E.g. for
6022   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6023   // as well as (sub 32, Pos), but:
6024   //
6025   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6026   //
6027   // always invokes undefined behavior for 32-bit X.
6028   //
6029   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6030   unsigned MaskLoBits = 0;
6031   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6032     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6033       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6034       unsigned Bits = Log2_64(EltSize);
6035       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6036           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6037         Neg = Neg.getOperand(0);
6038         MaskLoBits = Bits;
6039       }
6040     }
6041   }
6042
6043   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6044   if (Neg.getOpcode() != ISD::SUB)
6045     return false;
6046   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6047   if (!NegC)
6048     return false;
6049   SDValue NegOp1 = Neg.getOperand(1);
6050
6051   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6052   // Pos'.  The truncation is redundant for the purpose of the equality.
6053   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6054     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6055       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6056       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6057           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6058            MaskLoBits))
6059         Pos = Pos.getOperand(0);
6060     }
6061   }
6062
6063   // The condition we need is now:
6064   //
6065   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6066   //
6067   // If NegOp1 == Pos then we need:
6068   //
6069   //              EltSize & Mask == NegC & Mask
6070   //
6071   // (because "x & Mask" is a truncation and distributes through subtraction).
6072   APInt Width;
6073   if (Pos == NegOp1)
6074     Width = NegC->getAPIntValue();
6075
6076   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6077   // Then the condition we want to prove becomes:
6078   //
6079   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6080   //
6081   // which, again because "x & Mask" is a truncation, becomes:
6082   //
6083   //                NegC & Mask == (EltSize - PosC) & Mask
6084   //             EltSize & Mask == (NegC + PosC) & Mask
6085   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6086     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6087       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6088     else
6089       return false;
6090   } else
6091     return false;
6092
6093   // Now we just need to check that EltSize & Mask == Width & Mask.
6094   if (MaskLoBits)
6095     // EltSize & Mask is 0 since Mask is EltSize - 1.
6096     return Width.getLoBits(MaskLoBits) == 0;
6097   return Width == EltSize;
6098 }
6099
6100 // A subroutine of MatchRotate used once we have found an OR of two opposite
6101 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6102 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6103 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6104 // Neg with outer conversions stripped away.
6105 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6106                                        SDValue Neg, SDValue InnerPos,
6107                                        SDValue InnerNeg, unsigned PosOpcode,
6108                                        unsigned NegOpcode, const SDLoc &DL) {
6109   // fold (or (shl x, (*ext y)),
6110   //          (srl x, (*ext (sub 32, y)))) ->
6111   //   (rotl x, y) or (rotr x, (sub 32, y))
6112   //
6113   // fold (or (shl x, (*ext (sub 32, y))),
6114   //          (srl x, (*ext y))) ->
6115   //   (rotr x, y) or (rotl x, (sub 32, y))
6116   EVT VT = Shifted.getValueType();
6117   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6118     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6119     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6120                        HasPos ? Pos : Neg).getNode();
6121   }
6122
6123   return nullptr;
6124 }
6125
6126 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6127 // idioms for rotate, and if the target supports rotation instructions, generate
6128 // a rot[lr].
6129 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6130   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6131   EVT VT = LHS.getValueType();
6132   if (!TLI.isTypeLegal(VT)) return nullptr;
6133
6134   // The target must have at least one rotate flavor.
6135   bool HasROTL = hasOperation(ISD::ROTL, VT);
6136   bool HasROTR = hasOperation(ISD::ROTR, VT);
6137   if (!HasROTL && !HasROTR) return nullptr;
6138
6139   // Check for truncated rotate.
6140   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6141       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6142     assert(LHS.getValueType() == RHS.getValueType());
6143     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6144       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
6145                          SDValue(Rot, 0)).getNode();
6146     }
6147   }
6148
6149   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6150   SDValue LHSShift;   // The shift.
6151   SDValue LHSMask;    // AND value if any.
6152   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6153
6154   SDValue RHSShift;   // The shift.
6155   SDValue RHSMask;    // AND value if any.
6156   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6157
6158   // If neither side matched a rotate half, bail
6159   if (!LHSShift && !RHSShift)
6160     return nullptr;
6161
6162   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6163   // side of the rotate, so try to handle that here. In all cases we need to
6164   // pass the matched shift from the opposite side to compute the opcode and
6165   // needed shift amount to extract.  We still want to do this if both sides
6166   // matched a rotate half because one half may be a potential overshift that
6167   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6168   // single one).
6169
6170   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6171   if (LHSShift)
6172     if (SDValue NewRHSShift =
6173             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6174       RHSShift = NewRHSShift;
6175   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6176   if (RHSShift)
6177     if (SDValue NewLHSShift =
6178             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6179       LHSShift = NewLHSShift;
6180
6181   // If a side is still missing, nothing else we can do.
6182   if (!RHSShift || !LHSShift)
6183     return nullptr;
6184
6185   // At this point we've matched or extracted a shift op on each side.
6186
6187   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
6188     return nullptr;   // Not shifting the same value.
6189
6190   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6191     return nullptr;   // Shifts must disagree.
6192
6193   // Canonicalize shl to left side in a shl/srl pair.
6194   if (RHSShift.getOpcode() == ISD::SHL) {
6195     std::swap(LHS, RHS);
6196     std::swap(LHSShift, RHSShift);
6197     std::swap(LHSMask, RHSMask);
6198   }
6199
6200   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6201   SDValue LHSShiftArg = LHSShift.getOperand(0);
6202   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6203   SDValue RHSShiftArg = RHSShift.getOperand(0);
6204   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6205
6206   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6207   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6208   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6209                                         ConstantSDNode *RHS) {
6210     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6211   };
6212   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6213     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
6214                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
6215
6216     // If there is an AND of either shifted operand, apply it to the result.
6217     if (LHSMask.getNode() || RHSMask.getNode()) {
6218       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6219       SDValue Mask = AllOnes;
6220
6221       if (LHSMask.getNode()) {
6222         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6223         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6224                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6225       }
6226       if (RHSMask.getNode()) {
6227         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6228         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6229                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6230       }
6231
6232       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
6233     }
6234
6235     return Rot.getNode();
6236   }
6237
6238   // If there is a mask here, and we have a variable shift, we can't be sure
6239   // that we're masking out the right stuff.
6240   if (LHSMask.getNode() || RHSMask.getNode())
6241     return nullptr;
6242
6243   // If the shift amount is sign/zext/any-extended just peel it off.
6244   SDValue LExtOp0 = LHSShiftAmt;
6245   SDValue RExtOp0 = RHSShiftAmt;
6246   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6247        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6248        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6249        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6250       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6251        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6252        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6253        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6254     LExtOp0 = LHSShiftAmt.getOperand(0);
6255     RExtOp0 = RHSShiftAmt.getOperand(0);
6256   }
6257
6258   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6259                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6260   if (TryL)
6261     return TryL;
6262
6263   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6264                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6265   if (TryR)
6266     return TryR;
6267
6268   return nullptr;
6269 }
6270
6271 namespace {
6272
6273 /// Represents known origin of an individual byte in load combine pattern. The
6274 /// value of the byte is either constant zero or comes from memory.
6275 struct ByteProvider {
6276   // For constant zero providers Load is set to nullptr. For memory providers
6277   // Load represents the node which loads the byte from memory.
6278   // ByteOffset is the offset of the byte in the value produced by the load.
6279   LoadSDNode *Load = nullptr;
6280   unsigned ByteOffset = 0;
6281
6282   ByteProvider() = default;
6283
6284   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6285     return ByteProvider(Load, ByteOffset);
6286   }
6287
6288   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6289
6290   bool isConstantZero() const { return !Load; }
6291   bool isMemory() const { return Load; }
6292
6293   bool operator==(const ByteProvider &Other) const {
6294     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6295   }
6296
6297 private:
6298   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6299       : Load(Load), ByteOffset(ByteOffset) {}
6300 };
6301
6302 } // end anonymous namespace
6303
6304 /// Recursively traverses the expression calculating the origin of the requested
6305 /// byte of the given value. Returns None if the provider can't be calculated.
6306 ///
6307 /// For all the values except the root of the expression verifies that the value
6308 /// has exactly one use and if it's not true return None. This way if the origin
6309 /// of the byte is returned it's guaranteed that the values which contribute to
6310 /// the byte are not used outside of this expression.
6311 ///
6312 /// Because the parts of the expression are not allowed to have more than one
6313 /// use this function iterates over trees, not DAGs. So it never visits the same
6314 /// node more than once.
6315 static const Optional<ByteProvider>
6316 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6317                       bool Root = false) {
6318   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6319   if (Depth == 10)
6320     return None;
6321
6322   if (!Root && !Op.hasOneUse())
6323     return None;
6324
6325   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6326   unsigned BitWidth = Op.getValueSizeInBits();
6327   if (BitWidth % 8 != 0)
6328     return None;
6329   unsigned ByteWidth = BitWidth / 8;
6330   assert(Index < ByteWidth && "invalid index requested");
6331   (void) ByteWidth;
6332
6333   switch (Op.getOpcode()) {
6334   case ISD::OR: {
6335     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6336     if (!LHS)
6337       return None;
6338     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6339     if (!RHS)
6340       return None;
6341
6342     if (LHS->isConstantZero())
6343       return RHS;
6344     if (RHS->isConstantZero())
6345       return LHS;
6346     return None;
6347   }
6348   case ISD::SHL: {
6349     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6350     if (!ShiftOp)
6351       return None;
6352
6353     uint64_t BitShift = ShiftOp->getZExtValue();
6354     if (BitShift % 8 != 0)
6355       return None;
6356     uint64_t ByteShift = BitShift / 8;
6357
6358     return Index < ByteShift
6359                ? ByteProvider::getConstantZero()
6360                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6361                                        Depth + 1);
6362   }
6363   case ISD::ANY_EXTEND:
6364   case ISD::SIGN_EXTEND:
6365   case ISD::ZERO_EXTEND: {
6366     SDValue NarrowOp = Op->getOperand(0);
6367     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6368     if (NarrowBitWidth % 8 != 0)
6369       return None;
6370     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6371
6372     if (Index >= NarrowByteWidth)
6373       return Op.getOpcode() == ISD::ZERO_EXTEND
6374                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6375                  : None;
6376     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6377   }
6378   case ISD::BSWAP:
6379     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6380                                  Depth + 1);
6381   case ISD::LOAD: {
6382     auto L = cast<LoadSDNode>(Op.getNode());
6383     if (!L->isSimple() || L->isIndexed())
6384       return None;
6385
6386     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6387     if (NarrowBitWidth % 8 != 0)
6388       return None;
6389     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6390
6391     if (Index >= NarrowByteWidth)
6392       return L->getExtensionType() == ISD::ZEXTLOAD
6393                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6394                  : None;
6395     return ByteProvider::getMemory(L, Index);
6396   }
6397   }
6398
6399   return None;
6400 }
6401
6402 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6403   return i;
6404 }
6405
6406 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6407   return BW - i - 1;
6408 }
6409
6410 // Check if the bytes offsets we are looking at match with either big or
6411 // little endian value loaded. Return true for big endian, false for little
6412 // endian, and None if match failed.
6413 static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
6414                                   int64_t FirstOffset) {
6415   // The endian can be decided only when it is 2 bytes at least.
6416   unsigned Width = ByteOffsets.size();
6417   if (Width < 2)
6418     return None;
6419
6420   bool BigEndian = true, LittleEndian = true;
6421   for (unsigned i = 0; i < Width; i++) {
6422     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6423     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6424     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6425     if (!BigEndian && !LittleEndian)
6426       return None;
6427   }
6428
6429   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6430                                         "little endian");
6431   return BigEndian;
6432 }
6433
6434 static SDValue stripTruncAndExt(SDValue Value) {
6435   switch (Value.getOpcode()) {
6436   case ISD::TRUNCATE:
6437   case ISD::ZERO_EXTEND:
6438   case ISD::SIGN_EXTEND:
6439   case ISD::ANY_EXTEND:
6440     return stripTruncAndExt(Value.getOperand(0));
6441   }
6442   return Value;
6443 }
6444
6445 /// Match a pattern where a wide type scalar value is stored by several narrow
6446 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6447 /// supports it.
6448 ///
6449 /// Assuming little endian target:
6450 ///  i8 *p = ...
6451 ///  i32 val = ...
6452 ///  p[0] = (val >> 0) & 0xFF;
6453 ///  p[1] = (val >> 8) & 0xFF;
6454 ///  p[2] = (val >> 16) & 0xFF;
6455 ///  p[3] = (val >> 24) & 0xFF;
6456 /// =>
6457 ///  *((i32)p) = val;
6458 ///
6459 ///  i8 *p = ...
6460 ///  i32 val = ...
6461 ///  p[0] = (val >> 24) & 0xFF;
6462 ///  p[1] = (val >> 16) & 0xFF;
6463 ///  p[2] = (val >> 8) & 0xFF;
6464 ///  p[3] = (val >> 0) & 0xFF;
6465 /// =>
6466 ///  *((i32)p) = BSWAP(val);
6467 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6468   // Collect all the stores in the chain.
6469   SDValue Chain;
6470   SmallVector<StoreSDNode *, 8> Stores;
6471   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6472     // TODO: Allow unordered atomics when wider type is legal (see D66309)
6473     if (Store->getMemoryVT() != MVT::i8 ||
6474         !Store->isSimple() || Store->isIndexed())
6475       return SDValue();
6476     Stores.push_back(Store);
6477     Chain = Store->getChain();
6478   }
6479   // Handle the simple type only.
6480   unsigned Width = Stores.size();
6481   EVT VT = EVT::getIntegerVT(
6482     *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6483   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6484     return SDValue();
6485
6486   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6487   if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6488     return SDValue();
6489
6490   // Check if all the bytes of the combined value we are looking at are stored
6491   // to the same base address. Collect bytes offsets from Base address into
6492   // ByteOffsets.
6493   SDValue CombinedValue;
6494   SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
6495   int64_t FirstOffset = INT64_MAX;
6496   StoreSDNode *FirstStore = nullptr;
6497   Optional<BaseIndexOffset> Base;
6498   for (auto Store : Stores) {
6499     // All the stores store different byte of the CombinedValue. A truncate is
6500     // required to get that byte value.
6501     SDValue Trunc = Store->getValue();
6502     if (Trunc.getOpcode() != ISD::TRUNCATE)
6503       return SDValue();
6504     // A shift operation is required to get the right byte offset, except the
6505     // first byte.
6506     int64_t Offset = 0;
6507     SDValue Value = Trunc.getOperand(0);
6508     if (Value.getOpcode() == ISD::SRL ||
6509         Value.getOpcode() == ISD::SRA) {
6510       ConstantSDNode *ShiftOffset =
6511         dyn_cast<ConstantSDNode>(Value.getOperand(1));
6512       // Trying to match the following pattern. The shift offset must be
6513       // a constant and a multiple of 8. It is the byte offset in "y".
6514       //
6515       // x = srl y, offset
6516       // i8 z = trunc x
6517       // store z, ...
6518       if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6519         return SDValue();
6520
6521      Offset = ShiftOffset->getSExtValue()/8;
6522      Value = Value.getOperand(0);
6523     }
6524
6525     // Stores must share the same combined value with different offsets.
6526     if (!CombinedValue)
6527       CombinedValue = Value;
6528     else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6529       return SDValue();
6530
6531     // The trunc and all the extend operation should be stripped to get the
6532     // real value we are stored.
6533     else if (CombinedValue.getValueType() != VT) {
6534       if (Value.getValueType() == VT ||
6535           Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6536         CombinedValue = Value;
6537       // Give up if the combined value type is smaller than the store size.
6538       if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6539         return SDValue();
6540     }
6541
6542     // Stores must share the same base address
6543     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6544     int64_t ByteOffsetFromBase = 0;
6545     if (!Base)
6546       Base = Ptr;
6547     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6548       return SDValue();
6549
6550     // Remember the first byte store
6551     if (ByteOffsetFromBase < FirstOffset) {
6552       FirstStore = Store;
6553       FirstOffset = ByteOffsetFromBase;
6554     }
6555     // Map the offset in the store and the offset in the combined value, and
6556     // early return if it has been set before.
6557     if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6558       return SDValue();
6559     ByteOffsets[Offset] = ByteOffsetFromBase;
6560   }
6561
6562   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6563   assert(FirstStore && "First store must be set");
6564
6565   // Check if the bytes of the combined value we are looking at match with
6566   // either big or little endian value store.
6567   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6568   if (!IsBigEndian.hasValue())
6569     return SDValue();
6570
6571   // The node we are looking at matches with the pattern, check if we can
6572   // replace it with a single bswap if needed and store.
6573
6574   // If the store needs byte swap check if the target supports it
6575   bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6576
6577   // Before legalize we can introduce illegal bswaps which will be later
6578   // converted to an explicit bswap sequence. This way we end up with a single
6579   // store and byte shuffling instead of several stores and byte shuffling.
6580   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6581     return SDValue();
6582
6583   // Check that a store of the wide type is both allowed and fast on the target
6584   bool Fast = false;
6585   bool Allowed =
6586       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6587                              *FirstStore->getMemOperand(), &Fast);
6588   if (!Allowed || !Fast)
6589     return SDValue();
6590
6591   if (VT != CombinedValue.getValueType()) {
6592     assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6593            "Get unexpected store value to combine");
6594     CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6595                              CombinedValue);
6596   }
6597
6598   if (NeedsBswap)
6599     CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6600
6601   SDValue NewStore =
6602     DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
6603                  FirstStore->getPointerInfo(), FirstStore->getAlignment());
6604
6605   // Rely on other DAG combine rules to remove the other individual stores.
6606   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6607   return NewStore;
6608 }
6609
6610 /// Match a pattern where a wide type scalar value is loaded by several narrow
6611 /// loads and combined by shifts and ors. Fold it into a single load or a load
6612 /// and a BSWAP if the targets supports it.
6613 ///
6614 /// Assuming little endian target:
6615 ///  i8 *a = ...
6616 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6617 /// =>
6618 ///  i32 val = *((i32)a)
6619 ///
6620 ///  i8 *a = ...
6621 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6622 /// =>
6623 ///  i32 val = BSWAP(*((i32)a))
6624 ///
6625 /// TODO: This rule matches complex patterns with OR node roots and doesn't
6626 /// interact well with the worklist mechanism. When a part of the pattern is
6627 /// updated (e.g. one of the loads) its direct users are put into the worklist,
6628 /// but the root node of the pattern which triggers the load combine is not
6629 /// necessarily a direct user of the changed node. For example, once the address
6630 /// of t28 load is reassociated load combine won't be triggered:
6631 ///             t25: i32 = add t4, Constant:i32<2>
6632 ///           t26: i64 = sign_extend t25
6633 ///        t27: i64 = add t2, t26
6634 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6635 ///     t29: i32 = zero_extend t28
6636 ///   t32: i32 = shl t29, Constant:i8<8>
6637 /// t33: i32 = or t23, t32
6638 /// As a possible fix visitLoad can check if the load can be a part of a load
6639 /// combine pattern and add corresponding OR roots to the worklist.
6640 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6641   assert(N->getOpcode() == ISD::OR &&
6642          "Can only match load combining against OR nodes");
6643
6644   // Handles simple types only
6645   EVT VT = N->getValueType(0);
6646   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6647     return SDValue();
6648   unsigned ByteWidth = VT.getSizeInBits() / 8;
6649
6650   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6651   // Before legalize we can introduce too wide illegal loads which will be later
6652   // split into legal sized loads. This enables us to combine i64 load by i8
6653   // patterns to a couple of i32 loads on 32 bit targets.
6654   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
6655     return SDValue();
6656
6657   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6658   auto MemoryByteOffset = [&] (ByteProvider P) {
6659     assert(P.isMemory() && "Must be a memory byte provider");
6660     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6661     assert(LoadBitWidth % 8 == 0 &&
6662            "can only analyze providers for individual bytes not bit");
6663     unsigned LoadByteWidth = LoadBitWidth / 8;
6664     return IsBigEndianTarget
6665             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6666             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6667   };
6668
6669   Optional<BaseIndexOffset> Base;
6670   SDValue Chain;
6671
6672   SmallPtrSet<LoadSDNode *, 8> Loads;
6673   Optional<ByteProvider> FirstByteProvider;
6674   int64_t FirstOffset = INT64_MAX;
6675
6676   // Check if all the bytes of the OR we are looking at are loaded from the same
6677   // base address. Collect bytes offsets from Base address in ByteOffsets.
6678   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6679   for (unsigned i = 0; i < ByteWidth; i++) {
6680     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6681     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6682       return SDValue();
6683
6684     LoadSDNode *L = P->Load;
6685     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
6686            !L->isIndexed() &&
6687            "Must be enforced by calculateByteProvider");
6688     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6689
6690     // All loads must share the same chain
6691     SDValue LChain = L->getChain();
6692     if (!Chain)
6693       Chain = LChain;
6694     else if (Chain != LChain)
6695       return SDValue();
6696
6697     // Loads must share the same base address
6698     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6699     int64_t ByteOffsetFromBase = 0;
6700     if (!Base)
6701       Base = Ptr;
6702     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6703       return SDValue();
6704
6705     // Calculate the offset of the current byte from the base address
6706     ByteOffsetFromBase += MemoryByteOffset(*P);
6707     ByteOffsets[i] = ByteOffsetFromBase;
6708
6709     // Remember the first byte load
6710     if (ByteOffsetFromBase < FirstOffset) {
6711       FirstByteProvider = P;
6712       FirstOffset = ByteOffsetFromBase;
6713     }
6714
6715     Loads.insert(L);
6716   }
6717   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6718          "memory, so there must be at least one load which produces the value");
6719   assert(Base && "Base address of the accessed memory location must be set");
6720   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6721
6722   // Check if the bytes of the OR we are looking at match with either big or
6723   // little endian value load
6724   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6725   if (!IsBigEndian.hasValue())
6726     return SDValue();
6727
6728   assert(FirstByteProvider && "must be set");
6729
6730   // Ensure that the first byte is loaded from zero offset of the first load.
6731   // So the combined value can be loaded from the first load address.
6732   if (MemoryByteOffset(*FirstByteProvider) != 0)
6733     return SDValue();
6734   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6735
6736   // The node we are looking at matches with the pattern, check if we can
6737   // replace it with a single load and bswap if needed.
6738
6739   // If the load needs byte swap check if the target supports it
6740   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6741
6742   // Before legalize we can introduce illegal bswaps which will be later
6743   // converted to an explicit bswap sequence. This way we end up with a single
6744   // load and byte shuffling instead of several loads and byte shuffling.
6745   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6746     return SDValue();
6747
6748   // Check that a load of the wide type is both allowed and fast on the target
6749   bool Fast = false;
6750   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6751                                         VT, *FirstLoad->getMemOperand(), &Fast);
6752   if (!Allowed || !Fast)
6753     return SDValue();
6754
6755   SDValue NewLoad =
6756       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6757                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6758
6759   // Transfer chain users from old loads to the new load.
6760   for (LoadSDNode *L : Loads)
6761     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6762
6763   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6764 }
6765
6766 // If the target has andn, bsl, or a similar bit-select instruction,
6767 // we want to unfold masked merge, with canonical pattern of:
6768 //   |        A  |  |B|
6769 //   ((x ^ y) & m) ^ y
6770 //    |  D  |
6771 // Into:
6772 //   (x & m) | (y & ~m)
6773 // If y is a constant, and the 'andn' does not work with immediates,
6774 // we unfold into a different pattern:
6775 //   ~(~x & m) & (m | y)
6776 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6777 //       the very least that breaks andnpd / andnps patterns, and because those
6778 //       patterns are simplified in IR and shouldn't be created in the DAG
6779 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6780   assert(N->getOpcode() == ISD::XOR);
6781
6782   // Don't touch 'not' (i.e. where y = -1).
6783   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6784     return SDValue();
6785
6786   EVT VT = N->getValueType(0);
6787
6788   // There are 3 commutable operators in the pattern,
6789   // so we have to deal with 8 possible variants of the basic pattern.
6790   SDValue X, Y, M;
6791   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6792     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6793       return false;
6794     SDValue Xor = And.getOperand(XorIdx);
6795     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6796       return false;
6797     SDValue Xor0 = Xor.getOperand(0);
6798     SDValue Xor1 = Xor.getOperand(1);
6799     // Don't touch 'not' (i.e. where y = -1).
6800     if (isAllOnesOrAllOnesSplat(Xor1))
6801       return false;
6802     if (Other == Xor0)
6803       std::swap(Xor0, Xor1);
6804     if (Other != Xor1)
6805       return false;
6806     X = Xor0;
6807     Y = Xor1;
6808     M = And.getOperand(XorIdx ? 0 : 1);
6809     return true;
6810   };
6811
6812   SDValue N0 = N->getOperand(0);
6813   SDValue N1 = N->getOperand(1);
6814   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6815       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6816     return SDValue();
6817
6818   // Don't do anything if the mask is constant. This should not be reachable.
6819   // InstCombine should have already unfolded this pattern, and DAGCombiner
6820   // probably shouldn't produce it, too.
6821   if (isa<ConstantSDNode>(M.getNode()))
6822     return SDValue();
6823
6824   // We can transform if the target has AndNot
6825   if (!TLI.hasAndNot(M))
6826     return SDValue();
6827
6828   SDLoc DL(N);
6829
6830   // If Y is a constant, check that 'andn' works with immediates.
6831   if (!TLI.hasAndNot(Y)) {
6832     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6833     // If not, we need to do a bit more work to make sure andn is still used.
6834     SDValue NotX = DAG.getNOT(DL, X, VT);
6835     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6836     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6837     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6838     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6839   }
6840
6841   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6842   SDValue NotM = DAG.getNOT(DL, M, VT);
6843   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6844
6845   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6846 }
6847
6848 SDValue DAGCombiner::visitXOR(SDNode *N) {
6849   SDValue N0 = N->getOperand(0);
6850   SDValue N1 = N->getOperand(1);
6851   EVT VT = N0.getValueType();
6852
6853   // fold vector ops
6854   if (VT.isVector()) {
6855     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6856       return FoldedVOp;
6857
6858     // fold (xor x, 0) -> x, vector edition
6859     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6860       return N1;
6861     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6862       return N0;
6863   }
6864
6865   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6866   SDLoc DL(N);
6867   if (N0.isUndef() && N1.isUndef())
6868     return DAG.getConstant(0, DL, VT);
6869   // fold (xor x, undef) -> undef
6870   if (N0.isUndef())
6871     return N0;
6872   if (N1.isUndef())
6873     return N1;
6874   // fold (xor c1, c2) -> c1^c2
6875   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6876   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6877   if (N0C && N1C)
6878     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6879   // canonicalize constant to RHS
6880   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6881      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6882     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6883   // fold (xor x, 0) -> x
6884   if (isNullConstant(N1))
6885     return N0;
6886
6887   if (SDValue NewSel = foldBinOpIntoSelect(N))
6888     return NewSel;
6889
6890   // reassociate xor
6891   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6892     return RXOR;
6893
6894   // fold !(x cc y) -> (x !cc y)
6895   unsigned N0Opcode = N0.getOpcode();
6896   SDValue LHS, RHS, CC;
6897   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6898     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6899                                                LHS.getValueType().isInteger());
6900     if (!LegalOperations ||
6901         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6902       switch (N0Opcode) {
6903       default:
6904         llvm_unreachable("Unhandled SetCC Equivalent!");
6905       case ISD::SETCC:
6906         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6907       case ISD::SELECT_CC:
6908         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6909                                N0.getOperand(3), NotCC);
6910       }
6911     }
6912   }
6913
6914   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6915   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6916       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6917     SDValue V = N0.getOperand(0);
6918     SDLoc DL0(N0);
6919     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6920                     DAG.getConstant(1, DL0, V.getValueType()));
6921     AddToWorklist(V.getNode());
6922     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6923   }
6924
6925   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6926   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6927       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6928     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
6929     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
6930       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6931       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
6932       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
6933       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
6934       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
6935     }
6936   }
6937   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6938   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6939       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6940     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
6941     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
6942       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6943       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
6944       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
6945       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
6946       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
6947     }
6948   }
6949
6950   // fold (not (neg x)) -> (add X, -1)
6951   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
6952   // Y is a constant or the subtract has a single use.
6953   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
6954       isNullConstant(N0.getOperand(0))) {
6955     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
6956                        DAG.getAllOnesConstant(DL, VT));
6957   }
6958
6959   // fold (xor (and x, y), y) -> (and (not x), y)
6960   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6961     SDValue X = N0.getOperand(0);
6962     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6963     AddToWorklist(NotX.getNode());
6964     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6965   }
6966
6967   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6968     ConstantSDNode *XorC = isConstOrConstSplat(N1);
6969     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
6970     unsigned BitWidth = VT.getScalarSizeInBits();
6971     if (XorC && ShiftC) {
6972       // Don't crash on an oversized shift. We can not guarantee that a bogus
6973       // shift has been simplified to undef.
6974       uint64_t ShiftAmt = ShiftC->getLimitedValue();
6975       if (ShiftAmt < BitWidth) {
6976         APInt Ones = APInt::getAllOnesValue(BitWidth);
6977         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
6978         if (XorC->getAPIntValue() == Ones) {
6979           // If the xor constant is a shifted -1, do a 'not' before the shift:
6980           // xor (X << ShiftC), XorC --> (not X) << ShiftC
6981           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6982           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6983           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6984         }
6985       }
6986     }
6987   }
6988
6989   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6990   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6991     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
6992     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
6993     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6994       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6995       SDValue S0 = S.getOperand(0);
6996       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6997         unsigned OpSizeInBits = VT.getScalarSizeInBits();
6998         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6999           if (C->getAPIntValue() == (OpSizeInBits - 1))
7000             return DAG.getNode(ISD::ABS, DL, VT, S0);
7001       }
7002     }
7003   }
7004
7005   // fold (xor x, x) -> 0
7006   if (N0 == N1)
7007     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7008
7009   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7010   // Here is a concrete example of this equivalence:
7011   // i16   x ==  14
7012   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7013   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7014   //
7015   // =>
7016   //
7017   // i16     ~1      == 0b1111111111111110
7018   // i16 rol(~1, 14) == 0b1011111111111111
7019   //
7020   // Some additional tips to help conceptualize this transform:
7021   // - Try to see the operation as placing a single zero in a value of all ones.
7022   // - There exists no value for x which would allow the result to contain zero.
7023   // - Values of x larger than the bitwidth are undefined and do not require a
7024   //   consistent result.
7025   // - Pushing the zero left requires shifting one bits in from the right.
7026   // A rotate left of ~1 is a nice way of achieving the desired result.
7027   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7028       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7029     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7030                        N0.getOperand(1));
7031   }
7032
7033   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7034   if (N0Opcode == N1.getOpcode())
7035     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7036       return V;
7037
7038   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7039   if (SDValue MM = unfoldMaskedMerge(N))
7040     return MM;
7041
7042   // Simplify the expression using non-local knowledge.
7043   if (SimplifyDemandedBits(SDValue(N, 0)))
7044     return SDValue(N, 0);
7045
7046   return SDValue();
7047 }
7048
7049 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7050 /// shift-by-constant operand with identical opcode, we may be able to convert
7051 /// that into 2 independent shifts followed by the logic op. This is a
7052 /// throughput improvement.
7053 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7054   // Match a one-use bitwise logic op.
7055   SDValue LogicOp = Shift->getOperand(0);
7056   if (!LogicOp.hasOneUse())
7057     return SDValue();
7058
7059   unsigned LogicOpcode = LogicOp.getOpcode();
7060   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7061       LogicOpcode != ISD::XOR)
7062     return SDValue();
7063
7064   // Find a matching one-use shift by constant.
7065   unsigned ShiftOpcode = Shift->getOpcode();
7066   SDValue C1 = Shift->getOperand(1);
7067   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7068   assert(C1Node && "Expected a shift with constant operand");
7069   const APInt &C1Val = C1Node->getAPIntValue();
7070   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7071                              const APInt *&ShiftAmtVal) {
7072     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7073       return false;
7074
7075     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7076     if (!ShiftCNode)
7077       return false;
7078
7079     // Capture the shifted operand and shift amount value.
7080     ShiftOp = V.getOperand(0);
7081     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7082
7083     // Shift amount types do not have to match their operand type, so check that
7084     // the constants are the same width.
7085     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7086       return false;
7087
7088     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7089     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7090       return false;
7091
7092     return true;
7093   };
7094
7095   // Logic ops are commutative, so check each operand for a match.
7096   SDValue X, Y;
7097   const APInt *C0Val;
7098   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7099     Y = LogicOp.getOperand(1);
7100   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7101     Y = LogicOp.getOperand(0);
7102   else
7103     return SDValue();
7104
7105   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7106   SDLoc DL(Shift);
7107   EVT VT = Shift->getValueType(0);
7108   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7109   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7110   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7111   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7112   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7113 }
7114
7115 /// Handle transforms common to the three shifts, when the shift amount is a
7116 /// constant.
7117 /// We are looking for: (shift being one of shl/sra/srl)
7118 ///   shift (binop X, C0), C1
7119 /// And want to transform into:
7120 ///   binop (shift X, C1), (shift C0, C1)
7121 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7122   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7123
7124   // Do not turn a 'not' into a regular xor.
7125   if (isBitwiseNot(N->getOperand(0)))
7126     return SDValue();
7127
7128   // The inner binop must be one-use, since we want to replace it.
7129   SDValue LHS = N->getOperand(0);
7130   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7131     return SDValue();
7132
7133   // TODO: This is limited to early combining because it may reveal regressions
7134   //       otherwise. But since we just checked a target hook to see if this is
7135   //       desirable, that should have filtered out cases where this interferes
7136   //       with some other pattern matching.
7137   if (!LegalTypes)
7138     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7139       return R;
7140
7141   // We want to pull some binops through shifts, so that we have (and (shift))
7142   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7143   // thing happens with address calculations, so it's important to canonicalize
7144   // it.
7145   switch (LHS.getOpcode()) {
7146   default:
7147     return SDValue();
7148   case ISD::OR:
7149   case ISD::XOR:
7150   case ISD::AND:
7151     break;
7152   case ISD::ADD:
7153     if (N->getOpcode() != ISD::SHL)
7154       return SDValue(); // only shl(add) not sr[al](add).
7155     break;
7156   }
7157
7158   // We require the RHS of the binop to be a constant and not opaque as well.
7159   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7160   if (!BinOpCst)
7161     return SDValue();
7162
7163   // FIXME: disable this unless the input to the binop is a shift by a constant
7164   // or is copy/select. Enable this in other cases when figure out it's exactly
7165   // profitable.
7166   SDValue BinOpLHSVal = LHS.getOperand(0);
7167   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7168                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7169                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7170                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7171   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7172                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7173
7174   if (!IsShiftByConstant && !IsCopyOrSelect)
7175     return SDValue();
7176
7177   if (IsCopyOrSelect && N->hasOneUse())
7178     return SDValue();
7179
7180   // Fold the constants, shifting the binop RHS by the shift amount.
7181   SDLoc DL(N);
7182   EVT VT = N->getValueType(0);
7183   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7184                                N->getOperand(1));
7185   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7186
7187   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7188                                  N->getOperand(1));
7189   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7190 }
7191
7192 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7193   assert(N->getOpcode() == ISD::TRUNCATE);
7194   assert(N->getOperand(0).getOpcode() == ISD::AND);
7195
7196   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7197   EVT TruncVT = N->getValueType(0);
7198   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7199       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7200     SDValue N01 = N->getOperand(0).getOperand(1);
7201     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7202       SDLoc DL(N);
7203       SDValue N00 = N->getOperand(0).getOperand(0);
7204       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7205       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7206       AddToWorklist(Trunc00.getNode());
7207       AddToWorklist(Trunc01.getNode());
7208       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7209     }
7210   }
7211
7212   return SDValue();
7213 }
7214
7215 SDValue DAGCombiner::visitRotate(SDNode *N) {
7216   SDLoc dl(N);
7217   SDValue N0 = N->getOperand(0);
7218   SDValue N1 = N->getOperand(1);
7219   EVT VT = N->getValueType(0);
7220   unsigned Bitsize = VT.getScalarSizeInBits();
7221
7222   // fold (rot x, 0) -> x
7223   if (isNullOrNullSplat(N1))
7224     return N0;
7225
7226   // fold (rot x, c) -> x iff (c % BitSize) == 0
7227   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7228     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7229     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7230       return N0;
7231   }
7232
7233   // fold (rot x, c) -> (rot x, c % BitSize)
7234   // TODO - support non-uniform vector amounts.
7235   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
7236     if (Cst->getAPIntValue().uge(Bitsize)) {
7237       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
7238       return DAG.getNode(N->getOpcode(), dl, VT, N0,
7239                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
7240     }
7241   }
7242
7243   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7244   if (N1.getOpcode() == ISD::TRUNCATE &&
7245       N1.getOperand(0).getOpcode() == ISD::AND) {
7246     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7247       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7248   }
7249
7250   unsigned NextOp = N0.getOpcode();
7251   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7252   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7253     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7254     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7255     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7256       EVT ShiftVT = C1->getValueType(0);
7257       bool SameSide = (N->getOpcode() == NextOp);
7258       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7259       if (SDValue CombinedShift =
7260               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
7261         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7262         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7263             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
7264             BitsizeC.getNode());
7265         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7266                            CombinedShiftNorm);
7267       }
7268     }
7269   }
7270   return SDValue();
7271 }
7272
7273 SDValue DAGCombiner::visitSHL(SDNode *N) {
7274   SDValue N0 = N->getOperand(0);
7275   SDValue N1 = N->getOperand(1);
7276   if (SDValue V = DAG.simplifyShift(N0, N1))
7277     return V;
7278
7279   EVT VT = N0.getValueType();
7280   EVT ShiftVT = N1.getValueType();
7281   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7282
7283   // fold vector ops
7284   if (VT.isVector()) {
7285     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7286       return FoldedVOp;
7287
7288     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7289     // If setcc produces all-one true value then:
7290     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7291     if (N1CV && N1CV->isConstant()) {
7292       if (N0.getOpcode() == ISD::AND) {
7293         SDValue N00 = N0->getOperand(0);
7294         SDValue N01 = N0->getOperand(1);
7295         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7296
7297         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7298             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7299                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7300           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
7301                                                      N01CV, N1CV))
7302             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7303         }
7304       }
7305     }
7306   }
7307
7308   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7309
7310   // fold (shl c1, c2) -> c1<<c2
7311   // TODO - support non-uniform vector shift amounts.
7312   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7313   if (N0C && N1C && !N1C->isOpaque())
7314     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
7315
7316   if (SDValue NewSel = foldBinOpIntoSelect(N))
7317     return NewSel;
7318
7319   // if (shl x, c) is known to be zero, return 0
7320   if (DAG.MaskedValueIsZero(SDValue(N, 0),
7321                             APInt::getAllOnesValue(OpSizeInBits)))
7322     return DAG.getConstant(0, SDLoc(N), VT);
7323
7324   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7325   if (N1.getOpcode() == ISD::TRUNCATE &&
7326       N1.getOperand(0).getOpcode() == ISD::AND) {
7327     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7328       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7329   }
7330
7331   // TODO - support non-uniform vector shift amounts.
7332   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7333     return SDValue(N, 0);
7334
7335   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7336   if (N0.getOpcode() == ISD::SHL) {
7337     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7338                                           ConstantSDNode *RHS) {
7339       APInt c1 = LHS->getAPIntValue();
7340       APInt c2 = RHS->getAPIntValue();
7341       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7342       return (c1 + c2).uge(OpSizeInBits);
7343     };
7344     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7345       return DAG.getConstant(0, SDLoc(N), VT);
7346
7347     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7348                                        ConstantSDNode *RHS) {
7349       APInt c1 = LHS->getAPIntValue();
7350       APInt c2 = RHS->getAPIntValue();
7351       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7352       return (c1 + c2).ult(OpSizeInBits);
7353     };
7354     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7355       SDLoc DL(N);
7356       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7357       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7358     }
7359   }
7360
7361   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7362   // For this to be valid, the second form must not preserve any of the bits
7363   // that are shifted out by the inner shift in the first form.  This means
7364   // the outer shift size must be >= the number of bits added by the ext.
7365   // As a corollary, we don't care what kind of ext it is.
7366   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7367        N0.getOpcode() == ISD::ANY_EXTEND ||
7368        N0.getOpcode() == ISD::SIGN_EXTEND) &&
7369       N0.getOperand(0).getOpcode() == ISD::SHL) {
7370     SDValue N0Op0 = N0.getOperand(0);
7371     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7372     EVT InnerVT = N0Op0.getValueType();
7373     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7374
7375     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7376                                                          ConstantSDNode *RHS) {
7377       APInt c1 = LHS->getAPIntValue();
7378       APInt c2 = RHS->getAPIntValue();
7379       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7380       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7381              (c1 + c2).uge(OpSizeInBits);
7382     };
7383     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7384                                   /*AllowUndefs*/ false,
7385                                   /*AllowTypeMismatch*/ true))
7386       return DAG.getConstant(0, SDLoc(N), VT);
7387
7388     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7389                                                       ConstantSDNode *RHS) {
7390       APInt c1 = LHS->getAPIntValue();
7391       APInt c2 = RHS->getAPIntValue();
7392       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7393       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7394              (c1 + c2).ult(OpSizeInBits);
7395     };
7396     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7397                                   /*AllowUndefs*/ false,
7398                                   /*AllowTypeMismatch*/ true)) {
7399       SDLoc DL(N);
7400       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7401       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7402       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7403       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7404     }
7405   }
7406
7407   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7408   // Only fold this if the inner zext has no other uses to avoid increasing
7409   // the total number of instructions.
7410   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7411       N0.getOperand(0).getOpcode() == ISD::SRL) {
7412     SDValue N0Op0 = N0.getOperand(0);
7413     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7414
7415     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7416       APInt c1 = LHS->getAPIntValue();
7417       APInt c2 = RHS->getAPIntValue();
7418       zeroExtendToMatch(c1, c2);
7419       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7420     };
7421     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7422                                   /*AllowUndefs*/ false,
7423                                   /*AllowTypeMismatch*/ true)) {
7424       SDLoc DL(N);
7425       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7426       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7427       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7428       AddToWorklist(NewSHL.getNode());
7429       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7430     }
7431   }
7432
7433   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
7434   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
7435   // TODO - support non-uniform vector shift amounts.
7436   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7437       N0->getFlags().hasExact()) {
7438     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7439       uint64_t C1 = N0C1->getZExtValue();
7440       uint64_t C2 = N1C->getZExtValue();
7441       SDLoc DL(N);
7442       if (C1 <= C2)
7443         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7444                            DAG.getConstant(C2 - C1, DL, ShiftVT));
7445       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7446                          DAG.getConstant(C1 - C2, DL, ShiftVT));
7447     }
7448   }
7449
7450   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7451   //                               (and (srl x, (sub c1, c2), MASK)
7452   // Only fold this if the inner shift has no other uses -- if it does, folding
7453   // this will increase the total number of instructions.
7454   // TODO - drop hasOneUse requirement if c1 == c2?
7455   // TODO - support non-uniform vector shift amounts.
7456   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7457       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7458     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7459       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7460         uint64_t c1 = N0C1->getZExtValue();
7461         uint64_t c2 = N1C->getZExtValue();
7462         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7463         SDValue Shift;
7464         if (c2 > c1) {
7465           Mask <<= c2 - c1;
7466           SDLoc DL(N);
7467           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7468                               DAG.getConstant(c2 - c1, DL, ShiftVT));
7469         } else {
7470           Mask.lshrInPlace(c1 - c2);
7471           SDLoc DL(N);
7472           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7473                               DAG.getConstant(c1 - c2, DL, ShiftVT));
7474         }
7475         SDLoc DL(N0);
7476         return DAG.getNode(ISD::AND, DL, VT, Shift,
7477                            DAG.getConstant(Mask, DL, VT));
7478       }
7479     }
7480   }
7481
7482   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7483   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7484       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7485     SDLoc DL(N);
7486     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7487     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7488     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7489   }
7490
7491   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7492   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7493   // Variant of version done on multiply, except mul by a power of 2 is turned
7494   // into a shift.
7495   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7496       N0.getNode()->hasOneUse() &&
7497       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7498       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7499       TLI.isDesirableToCommuteWithShift(N, Level)) {
7500     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7501     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7502     AddToWorklist(Shl0.getNode());
7503     AddToWorklist(Shl1.getNode());
7504     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7505   }
7506
7507   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7508   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7509       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7510       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7511     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7512     if (isConstantOrConstantVector(Shl))
7513       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7514   }
7515
7516   if (N1C && !N1C->isOpaque())
7517     if (SDValue NewSHL = visitShiftByConstant(N))
7518       return NewSHL;
7519
7520   return SDValue();
7521 }
7522
7523 SDValue DAGCombiner::visitSRA(SDNode *N) {
7524   SDValue N0 = N->getOperand(0);
7525   SDValue N1 = N->getOperand(1);
7526   if (SDValue V = DAG.simplifyShift(N0, N1))
7527     return V;
7528
7529   EVT VT = N0.getValueType();
7530   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7531
7532   // Arithmetic shifting an all-sign-bit value is a no-op.
7533   // fold (sra 0, x) -> 0
7534   // fold (sra -1, x) -> -1
7535   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
7536     return N0;
7537
7538   // fold vector ops
7539   if (VT.isVector())
7540     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7541       return FoldedVOp;
7542
7543   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7544
7545   // fold (sra c1, c2) -> (sra c1, c2)
7546   // TODO - support non-uniform vector shift amounts.
7547   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7548   if (N0C && N1C && !N1C->isOpaque())
7549     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
7550
7551   if (SDValue NewSel = foldBinOpIntoSelect(N))
7552     return NewSel;
7553
7554   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7555   // sext_inreg.
7556   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7557     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7558     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7559     if (VT.isVector())
7560       ExtVT = EVT::getVectorVT(*DAG.getContext(),
7561                                ExtVT, VT.getVectorNumElements());
7562     if ((!LegalOperations ||
7563          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
7564       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7565                          N0.getOperand(0), DAG.getValueType(ExtVT));
7566   }
7567
7568   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7569   // clamp (add c1, c2) to max shift.
7570   if (N0.getOpcode() == ISD::SRA) {
7571     SDLoc DL(N);
7572     EVT ShiftVT = N1.getValueType();
7573     EVT ShiftSVT = ShiftVT.getScalarType();
7574     SmallVector<SDValue, 16> ShiftValues;
7575
7576     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7577       APInt c1 = LHS->getAPIntValue();
7578       APInt c2 = RHS->getAPIntValue();
7579       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7580       APInt Sum = c1 + c2;
7581       unsigned ShiftSum =
7582           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7583       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7584       return true;
7585     };
7586     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7587       SDValue ShiftValue;
7588       if (VT.isVector())
7589         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7590       else
7591         ShiftValue = ShiftValues[0];
7592       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7593     }
7594   }
7595
7596   // fold (sra (shl X, m), (sub result_size, n))
7597   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7598   // result_size - n != m.
7599   // If truncate is free for the target sext(shl) is likely to result in better
7600   // code.
7601   if (N0.getOpcode() == ISD::SHL && N1C) {
7602     // Get the two constanst of the shifts, CN0 = m, CN = n.
7603     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7604     if (N01C) {
7605       LLVMContext &Ctx = *DAG.getContext();
7606       // Determine what the truncate's result bitsize and type would be.
7607       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7608
7609       if (VT.isVector())
7610         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7611
7612       // Determine the residual right-shift amount.
7613       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7614
7615       // If the shift is not a no-op (in which case this should be just a sign
7616       // extend already), the truncated to type is legal, sign_extend is legal
7617       // on that type, and the truncate to that type is both legal and free,
7618       // perform the transform.
7619       if ((ShiftAmt > 0) &&
7620           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
7621           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
7622           TLI.isTruncateFree(VT, TruncVT)) {
7623         SDLoc DL(N);
7624         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7625             getShiftAmountTy(N0.getOperand(0).getValueType()));
7626         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7627                                     N0.getOperand(0), Amt);
7628         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7629                                     Shift);
7630         return DAG.getNode(ISD::SIGN_EXTEND, DL,
7631                            N->getValueType(0), Trunc);
7632       }
7633     }
7634   }
7635
7636   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
7637   //   sra (add (shl X, N1C), AddC), N1C -->
7638   //   sext (add (trunc X to (width - N1C)), AddC')
7639   if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
7640       N0.getOperand(0).getOpcode() == ISD::SHL &&
7641       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
7642     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
7643       SDValue Shl = N0.getOperand(0);
7644       // Determine what the truncate's type would be and ask the target if that
7645       // is a free operation.
7646       LLVMContext &Ctx = *DAG.getContext();
7647       unsigned ShiftAmt = N1C->getZExtValue();
7648       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
7649       if (VT.isVector())
7650         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7651
7652       // TODO: The simple type check probably belongs in the default hook
7653       //       implementation and/or target-specific overrides (because
7654       //       non-simple types likely require masking when legalized), but that
7655       //       restriction may conflict with other transforms.
7656       if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
7657         SDLoc DL(N);
7658         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
7659         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
7660                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
7661         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
7662         return DAG.getSExtOrTrunc(Add, DL, VT);
7663       }
7664     }
7665   }
7666
7667   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7668   if (N1.getOpcode() == ISD::TRUNCATE &&
7669       N1.getOperand(0).getOpcode() == ISD::AND) {
7670     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7671       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7672   }
7673
7674   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
7675   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7676   //      if c1 is equal to the number of bits the trunc removes
7677   // TODO - support non-uniform vector shift amounts.
7678   if (N0.getOpcode() == ISD::TRUNCATE &&
7679       (N0.getOperand(0).getOpcode() == ISD::SRL ||
7680        N0.getOperand(0).getOpcode() == ISD::SRA) &&
7681       N0.getOperand(0).hasOneUse() &&
7682       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
7683     SDValue N0Op0 = N0.getOperand(0);
7684     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7685       EVT LargeVT = N0Op0.getValueType();
7686       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
7687       if (LargeShift->getAPIntValue() == TruncBits) {
7688         SDLoc DL(N);
7689         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
7690                                       getShiftAmountTy(LargeVT));
7691         SDValue SRA =
7692             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
7693         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7694       }
7695     }
7696   }
7697
7698   // Simplify, based on bits shifted out of the LHS.
7699   // TODO - support non-uniform vector shift amounts.
7700   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7701     return SDValue(N, 0);
7702
7703   // If the sign bit is known to be zero, switch this to a SRL.
7704   if (DAG.SignBitIsZero(N0))
7705     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7706
7707   if (N1C && !N1C->isOpaque())
7708     if (SDValue NewSRA = visitShiftByConstant(N))
7709       return NewSRA;
7710
7711   return SDValue();
7712 }
7713
7714 SDValue DAGCombiner::visitSRL(SDNode *N) {
7715   SDValue N0 = N->getOperand(0);
7716   SDValue N1 = N->getOperand(1);
7717   if (SDValue V = DAG.simplifyShift(N0, N1))
7718     return V;
7719
7720   EVT VT = N0.getValueType();
7721   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7722
7723   // fold vector ops
7724   if (VT.isVector())
7725     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7726       return FoldedVOp;
7727
7728   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7729
7730   // fold (srl c1, c2) -> c1 >>u c2
7731   // TODO - support non-uniform vector shift amounts.
7732   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7733   if (N0C && N1C && !N1C->isOpaque())
7734     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
7735
7736   if (SDValue NewSel = foldBinOpIntoSelect(N))
7737     return NewSel;
7738
7739   // if (srl x, c) is known to be zero, return 0
7740   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7741                                    APInt::getAllOnesValue(OpSizeInBits)))
7742     return DAG.getConstant(0, SDLoc(N), VT);
7743
7744   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7745   if (N0.getOpcode() == ISD::SRL) {
7746     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7747                                           ConstantSDNode *RHS) {
7748       APInt c1 = LHS->getAPIntValue();
7749       APInt c2 = RHS->getAPIntValue();
7750       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7751       return (c1 + c2).uge(OpSizeInBits);
7752     };
7753     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7754       return DAG.getConstant(0, SDLoc(N), VT);
7755
7756     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7757                                        ConstantSDNode *RHS) {
7758       APInt c1 = LHS->getAPIntValue();
7759       APInt c2 = RHS->getAPIntValue();
7760       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7761       return (c1 + c2).ult(OpSizeInBits);
7762     };
7763     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7764       SDLoc DL(N);
7765       EVT ShiftVT = N1.getValueType();
7766       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7767       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7768     }
7769   }
7770
7771   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7772   // TODO - support non-uniform vector shift amounts.
7773   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7774       N0.getOperand(0).getOpcode() == ISD::SRL) {
7775     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7776       uint64_t c1 = N001C->getZExtValue();
7777       uint64_t c2 = N1C->getZExtValue();
7778       EVT InnerShiftVT = N0.getOperand(0).getValueType();
7779       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7780       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7781       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7782       if (c1 + OpSizeInBits == InnerShiftSize) {
7783         SDLoc DL(N0);
7784         if (c1 + c2 >= InnerShiftSize)
7785           return DAG.getConstant(0, DL, VT);
7786         return DAG.getNode(ISD::TRUNCATE, DL, VT,
7787                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7788                                        N0.getOperand(0).getOperand(0),
7789                                        DAG.getConstant(c1 + c2, DL,
7790                                                        ShiftCountVT)));
7791       }
7792     }
7793   }
7794
7795   // fold (srl (shl x, c), c) -> (and x, cst2)
7796   // TODO - (srl (shl x, c1), c2).
7797   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7798       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7799     SDLoc DL(N);
7800     SDValue Mask =
7801         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7802     AddToWorklist(Mask.getNode());
7803     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7804   }
7805
7806   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7807   // TODO - support non-uniform vector shift amounts.
7808   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7809     // Shifting in all undef bits?
7810     EVT SmallVT = N0.getOperand(0).getValueType();
7811     unsigned BitSize = SmallVT.getScalarSizeInBits();
7812     if (N1C->getAPIntValue().uge(BitSize))
7813       return DAG.getUNDEF(VT);
7814
7815     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
7816       uint64_t ShiftAmt = N1C->getZExtValue();
7817       SDLoc DL0(N0);
7818       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7819                                        N0.getOperand(0),
7820                           DAG.getConstant(ShiftAmt, DL0,
7821                                           getShiftAmountTy(SmallVT)));
7822       AddToWorklist(SmallShift.getNode());
7823       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7824       SDLoc DL(N);
7825       return DAG.getNode(ISD::AND, DL, VT,
7826                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7827                          DAG.getConstant(Mask, DL, VT));
7828     }
7829   }
7830
7831   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
7832   // bit, which is unmodified by sra.
7833   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
7834     if (N0.getOpcode() == ISD::SRA)
7835       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7836   }
7837
7838   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
7839   if (N1C && N0.getOpcode() == ISD::CTLZ &&
7840       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
7841     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7842
7843     // If any of the input bits are KnownOne, then the input couldn't be all
7844     // zeros, thus the result of the srl will always be zero.
7845     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
7846
7847     // If all of the bits input the to ctlz node are known to be zero, then
7848     // the result of the ctlz is "32" and the result of the shift is one.
7849     APInt UnknownBits = ~Known.Zero;
7850     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
7851
7852     // Otherwise, check to see if there is exactly one bit input to the ctlz.
7853     if (UnknownBits.isPowerOf2()) {
7854       // Okay, we know that only that the single bit specified by UnknownBits
7855       // could be set on input to the CTLZ node. If this bit is set, the SRL
7856       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7857       // to an SRL/XOR pair, which is likely to simplify more.
7858       unsigned ShAmt = UnknownBits.countTrailingZeros();
7859       SDValue Op = N0.getOperand(0);
7860
7861       if (ShAmt) {
7862         SDLoc DL(N0);
7863         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7864                   DAG.getConstant(ShAmt, DL,
7865                                   getShiftAmountTy(Op.getValueType())));
7866         AddToWorklist(Op.getNode());
7867       }
7868
7869       SDLoc DL(N);
7870       return DAG.getNode(ISD::XOR, DL, VT,
7871                          Op, DAG.getConstant(1, DL, VT));
7872     }
7873   }
7874
7875   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7876   if (N1.getOpcode() == ISD::TRUNCATE &&
7877       N1.getOperand(0).getOpcode() == ISD::AND) {
7878     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7879       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7880   }
7881
7882   // fold operands of srl based on knowledge that the low bits are not
7883   // demanded.
7884   // TODO - support non-uniform vector shift amounts.
7885   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7886     return SDValue(N, 0);
7887
7888   if (N1C && !N1C->isOpaque())
7889     if (SDValue NewSRL = visitShiftByConstant(N))
7890       return NewSRL;
7891
7892   // Attempt to convert a srl of a load into a narrower zero-extending load.
7893   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7894     return NarrowLoad;
7895
7896   // Here is a common situation. We want to optimize:
7897   //
7898   //   %a = ...
7899   //   %b = and i32 %a, 2
7900   //   %c = srl i32 %b, 1
7901   //   brcond i32 %c ...
7902   //
7903   // into
7904   //
7905   //   %a = ...
7906   //   %b = and %a, 2
7907   //   %c = setcc eq %b, 0
7908   //   brcond %c ...
7909   //
7910   // However when after the source operand of SRL is optimized into AND, the SRL
7911   // itself may not be optimized further. Look for it and add the BRCOND into
7912   // the worklist.
7913   if (N->hasOneUse()) {
7914     SDNode *Use = *N->use_begin();
7915     if (Use->getOpcode() == ISD::BRCOND)
7916       AddToWorklist(Use);
7917     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7918       // Also look pass the truncate.
7919       Use = *Use->use_begin();
7920       if (Use->getOpcode() == ISD::BRCOND)
7921         AddToWorklist(Use);
7922     }
7923   }
7924
7925   return SDValue();
7926 }
7927
7928 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7929   EVT VT = N->getValueType(0);
7930   SDValue N0 = N->getOperand(0);
7931   SDValue N1 = N->getOperand(1);
7932   SDValue N2 = N->getOperand(2);
7933   bool IsFSHL = N->getOpcode() == ISD::FSHL;
7934   unsigned BitWidth = VT.getScalarSizeInBits();
7935
7936   // fold (fshl N0, N1, 0) -> N0
7937   // fold (fshr N0, N1, 0) -> N1
7938   if (isPowerOf2_32(BitWidth))
7939     if (DAG.MaskedValueIsZero(
7940             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7941       return IsFSHL ? N0 : N1;
7942
7943   auto IsUndefOrZero = [](SDValue V) {
7944     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
7945   };
7946
7947   // TODO - support non-uniform vector shift amounts.
7948   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7949     EVT ShAmtTy = N2.getValueType();
7950
7951     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7952     if (Cst->getAPIntValue().uge(BitWidth)) {
7953       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7954       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7955                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7956     }
7957
7958     unsigned ShAmt = Cst->getZExtValue();
7959     if (ShAmt == 0)
7960       return IsFSHL ? N0 : N1;
7961
7962     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7963     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7964     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7965     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7966     if (IsUndefOrZero(N0))
7967       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7968                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7969                                          SDLoc(N), ShAmtTy));
7970     if (IsUndefOrZero(N1))
7971       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7972                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7973                                          SDLoc(N), ShAmtTy));
7974   }
7975
7976   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7977   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7978   // iff We know the shift amount is in range.
7979   // TODO: when is it worth doing SUB(BW, N2) as well?
7980   if (isPowerOf2_32(BitWidth)) {
7981     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7982     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7983       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7984     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7985       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7986   }
7987
7988   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7989   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7990   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7991   // is legal as well we might be better off avoiding non-constant (BW - N2).
7992   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7993   if (N0 == N1 && hasOperation(RotOpc, VT))
7994     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7995
7996   // Simplify, based on bits shifted out of N0/N1.
7997   if (SimplifyDemandedBits(SDValue(N, 0)))
7998     return SDValue(N, 0);
7999
8000   return SDValue();
8001 }
8002
8003 SDValue DAGCombiner::visitABS(SDNode *N) {
8004   SDValue N0 = N->getOperand(0);
8005   EVT VT = N->getValueType(0);
8006
8007   // fold (abs c1) -> c2
8008   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8009     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8010   // fold (abs (abs x)) -> (abs x)
8011   if (N0.getOpcode() == ISD::ABS)
8012     return N0;
8013   // fold (abs x) -> x iff not-negative
8014   if (DAG.SignBitIsZero(N0))
8015     return N0;
8016   return SDValue();
8017 }
8018
8019 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8020   SDValue N0 = N->getOperand(0);
8021   EVT VT = N->getValueType(0);
8022
8023   // fold (bswap c1) -> c2
8024   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8025     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8026   // fold (bswap (bswap x)) -> x
8027   if (N0.getOpcode() == ISD::BSWAP)
8028     return N0->getOperand(0);
8029   return SDValue();
8030 }
8031
8032 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8033   SDValue N0 = N->getOperand(0);
8034   EVT VT = N->getValueType(0);
8035
8036   // fold (bitreverse c1) -> c2
8037   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8038     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8039   // fold (bitreverse (bitreverse x)) -> x
8040   if (N0.getOpcode() == ISD::BITREVERSE)
8041     return N0.getOperand(0);
8042   return SDValue();
8043 }
8044
8045 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8046   SDValue N0 = N->getOperand(0);
8047   EVT VT = N->getValueType(0);
8048
8049   // fold (ctlz c1) -> c2
8050   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8051     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8052
8053   // If the value is known never to be zero, switch to the undef version.
8054   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8055     if (DAG.isKnownNeverZero(N0))
8056       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8057   }
8058
8059   return SDValue();
8060 }
8061
8062 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8063   SDValue N0 = N->getOperand(0);
8064   EVT VT = N->getValueType(0);
8065
8066   // fold (ctlz_zero_undef c1) -> c2
8067   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8068     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8069   return SDValue();
8070 }
8071
8072 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8073   SDValue N0 = N->getOperand(0);
8074   EVT VT = N->getValueType(0);
8075
8076   // fold (cttz c1) -> c2
8077   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8078     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8079
8080   // If the value is known never to be zero, switch to the undef version.
8081   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8082     if (DAG.isKnownNeverZero(N0))
8083       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8084   }
8085
8086   return SDValue();
8087 }
8088
8089 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8090   SDValue N0 = N->getOperand(0);
8091   EVT VT = N->getValueType(0);
8092
8093   // fold (cttz_zero_undef c1) -> c2
8094   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8095     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8096   return SDValue();
8097 }
8098
8099 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8100   SDValue N0 = N->getOperand(0);
8101   EVT VT = N->getValueType(0);
8102
8103   // fold (ctpop c1) -> c2
8104   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8105     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8106   return SDValue();
8107 }
8108
8109 // FIXME: This should be checking for no signed zeros on individual operands, as
8110 // well as no nans.
8111 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8112                                          SDValue RHS,
8113                                          const TargetLowering &TLI) {
8114   const TargetOptions &Options = DAG.getTarget().Options;
8115   EVT VT = LHS.getValueType();
8116
8117   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8118          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8119          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8120 }
8121
8122 /// Generate Min/Max node
8123 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8124                                    SDValue RHS, SDValue True, SDValue False,
8125                                    ISD::CondCode CC, const TargetLowering &TLI,
8126                                    SelectionDAG &DAG) {
8127   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8128     return SDValue();
8129
8130   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8131   switch (CC) {
8132   case ISD::SETOLT:
8133   case ISD::SETOLE:
8134   case ISD::SETLT:
8135   case ISD::SETLE:
8136   case ISD::SETULT:
8137   case ISD::SETULE: {
8138     // Since it's known never nan to get here already, either fminnum or
8139     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8140     // expanded in terms of it.
8141     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8142     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8143       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8144
8145     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8146     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8147       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8148     return SDValue();
8149   }
8150   case ISD::SETOGT:
8151   case ISD::SETOGE:
8152   case ISD::SETGT:
8153   case ISD::SETGE:
8154   case ISD::SETUGT:
8155   case ISD::SETUGE: {
8156     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8157     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8158       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8159
8160     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8161     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8162       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8163     return SDValue();
8164   }
8165   default:
8166     return SDValue();
8167   }
8168 }
8169
8170 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8171   SDValue Cond = N->getOperand(0);
8172   SDValue N1 = N->getOperand(1);
8173   SDValue N2 = N->getOperand(2);
8174   EVT VT = N->getValueType(0);
8175   EVT CondVT = Cond.getValueType();
8176   SDLoc DL(N);
8177
8178   if (!VT.isInteger())
8179     return SDValue();
8180
8181   auto *C1 = dyn_cast<ConstantSDNode>(N1);
8182   auto *C2 = dyn_cast<ConstantSDNode>(N2);
8183   if (!C1 || !C2)
8184     return SDValue();
8185
8186   // Only do this before legalization to avoid conflicting with target-specific
8187   // transforms in the other direction (create a select from a zext/sext). There
8188   // is also a target-independent combine here in DAGCombiner in the other
8189   // direction for (select Cond, -1, 0) when the condition is not i1.
8190   if (CondVT == MVT::i1 && !LegalOperations) {
8191     if (C1->isNullValue() && C2->isOne()) {
8192       // select Cond, 0, 1 --> zext (!Cond)
8193       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8194       if (VT != MVT::i1)
8195         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8196       return NotCond;
8197     }
8198     if (C1->isNullValue() && C2->isAllOnesValue()) {
8199       // select Cond, 0, -1 --> sext (!Cond)
8200       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8201       if (VT != MVT::i1)
8202         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8203       return NotCond;
8204     }
8205     if (C1->isOne() && C2->isNullValue()) {
8206       // select Cond, 1, 0 --> zext (Cond)
8207       if (VT != MVT::i1)
8208         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8209       return Cond;
8210     }
8211     if (C1->isAllOnesValue() && C2->isNullValue()) {
8212       // select Cond, -1, 0 --> sext (Cond)
8213       if (VT != MVT::i1)
8214         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8215       return Cond;
8216     }
8217
8218     // For any constants that differ by 1, we can transform the select into an
8219     // extend and add. Use a target hook because some targets may prefer to
8220     // transform in the other direction.
8221     if (TLI.convertSelectOfConstantsToMath(VT)) {
8222       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
8223         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8224         if (VT != MVT::i1)
8225           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8226         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8227       }
8228       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
8229         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8230         if (VT != MVT::i1)
8231           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8232         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8233       }
8234     }
8235
8236     return SDValue();
8237   }
8238
8239   // fold (select Cond, 0, 1) -> (xor Cond, 1)
8240   // We can't do this reliably if integer based booleans have different contents
8241   // to floating point based booleans. This is because we can't tell whether we
8242   // have an integer-based boolean or a floating-point-based boolean unless we
8243   // can find the SETCC that produced it and inspect its operands. This is
8244   // fairly easy if C is the SETCC node, but it can potentially be
8245   // undiscoverable (or not reasonably discoverable). For example, it could be
8246   // in another basic block or it could require searching a complicated
8247   // expression.
8248   if (CondVT.isInteger() &&
8249       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8250           TargetLowering::ZeroOrOneBooleanContent &&
8251       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8252           TargetLowering::ZeroOrOneBooleanContent &&
8253       C1->isNullValue() && C2->isOne()) {
8254     SDValue NotCond =
8255         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8256     if (VT.bitsEq(CondVT))
8257       return NotCond;
8258     return DAG.getZExtOrTrunc(NotCond, DL, VT);
8259   }
8260
8261   return SDValue();
8262 }
8263
8264 SDValue DAGCombiner::visitSELECT(SDNode *N) {
8265   SDValue N0 = N->getOperand(0);
8266   SDValue N1 = N->getOperand(1);
8267   SDValue N2 = N->getOperand(2);
8268   EVT VT = N->getValueType(0);
8269   EVT VT0 = N0.getValueType();
8270   SDLoc DL(N);
8271   SDNodeFlags Flags = N->getFlags();
8272
8273   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8274     return V;
8275
8276   // fold (select X, X, Y) -> (or X, Y)
8277   // fold (select X, 1, Y) -> (or C, Y)
8278   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8279     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8280
8281   if (SDValue V = foldSelectOfConstants(N))
8282     return V;
8283
8284   // fold (select C, 0, X) -> (and (not C), X)
8285   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8286     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8287     AddToWorklist(NOTNode.getNode());
8288     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8289   }
8290   // fold (select C, X, 1) -> (or (not C), X)
8291   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8292     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8293     AddToWorklist(NOTNode.getNode());
8294     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8295   }
8296   // fold (select X, Y, X) -> (and X, Y)
8297   // fold (select X, Y, 0) -> (and X, Y)
8298   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8299     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8300
8301   // If we can fold this based on the true/false value, do so.
8302   if (SimplifySelectOps(N, N1, N2))
8303     return SDValue(N, 0); // Don't revisit N.
8304
8305   if (VT0 == MVT::i1) {
8306     // The code in this block deals with the following 2 equivalences:
8307     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8308     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8309     // The target can specify its preferred form with the
8310     // shouldNormalizeToSelectSequence() callback. However we always transform
8311     // to the right anyway if we find the inner select exists in the DAG anyway
8312     // and we always transform to the left side if we know that we can further
8313     // optimize the combination of the conditions.
8314     bool normalizeToSequence =
8315         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8316     // select (and Cond0, Cond1), X, Y
8317     //   -> select Cond0, (select Cond1, X, Y), Y
8318     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8319       SDValue Cond0 = N0->getOperand(0);
8320       SDValue Cond1 = N0->getOperand(1);
8321       SDValue InnerSelect =
8322           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8323       if (normalizeToSequence || !InnerSelect.use_empty())
8324         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8325                            InnerSelect, N2, Flags);
8326       // Cleanup on failure.
8327       if (InnerSelect.use_empty())
8328         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8329     }
8330     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8331     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8332       SDValue Cond0 = N0->getOperand(0);
8333       SDValue Cond1 = N0->getOperand(1);
8334       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8335                                         Cond1, N1, N2, Flags);
8336       if (normalizeToSequence || !InnerSelect.use_empty())
8337         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8338                            InnerSelect, Flags);
8339       // Cleanup on failure.
8340       if (InnerSelect.use_empty())
8341         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8342     }
8343
8344     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8345     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8346       SDValue N1_0 = N1->getOperand(0);
8347       SDValue N1_1 = N1->getOperand(1);
8348       SDValue N1_2 = N1->getOperand(2);
8349       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
8350         // Create the actual and node if we can generate good code for it.
8351         if (!normalizeToSequence) {
8352           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
8353           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
8354                              N2, Flags);
8355         }
8356         // Otherwise see if we can optimize the "and" to a better pattern.
8357         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
8358           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
8359                              N2, Flags);
8360         }
8361       }
8362     }
8363     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
8364     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
8365       SDValue N2_0 = N2->getOperand(0);
8366       SDValue N2_1 = N2->getOperand(1);
8367       SDValue N2_2 = N2->getOperand(2);
8368       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
8369         // Create the actual or node if we can generate good code for it.
8370         if (!normalizeToSequence) {
8371           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
8372           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
8373                              N2_2, Flags);
8374         }
8375         // Otherwise see if we can optimize to a better pattern.
8376         if (SDValue Combined = visitORLike(N0, N2_0, N))
8377           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
8378                              N2_2, Flags);
8379       }
8380     }
8381   }
8382
8383   // select (not Cond), N1, N2 -> select Cond, N2, N1
8384   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
8385     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
8386     SelectOp->setFlags(Flags);
8387     return SelectOp;
8388   }
8389
8390   // Fold selects based on a setcc into other things, such as min/max/abs.
8391   if (N0.getOpcode() == ISD::SETCC) {
8392     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
8393     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8394
8395     // select (fcmp lt x, y), x, y -> fminnum x, y
8396     // select (fcmp gt x, y), x, y -> fmaxnum x, y
8397     //
8398     // This is OK if we don't care what happens if either operand is a NaN.
8399     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
8400       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
8401                                                 CC, TLI, DAG))
8402         return FMinMax;
8403
8404     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
8405     // This is conservatively limited to pre-legal-operations to give targets
8406     // a chance to reverse the transform if they want to do that. Also, it is
8407     // unlikely that the pattern would be formed late, so it's probably not
8408     // worth going through the other checks.
8409     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
8410         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
8411         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
8412       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
8413       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
8414       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
8415         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
8416         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
8417         //
8418         // The IR equivalent of this transform would have this form:
8419         //   %a = add %x, C
8420         //   %c = icmp ugt %x, ~C
8421         //   %r = select %c, -1, %a
8422         //   =>
8423         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
8424         //   %u0 = extractvalue %u, 0
8425         //   %u1 = extractvalue %u, 1
8426         //   %r = select %u1, -1, %u0
8427         SDVTList VTs = DAG.getVTList(VT, VT0);
8428         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
8429         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
8430       }
8431     }
8432
8433     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
8434         (!LegalOperations &&
8435          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
8436       // Any flags available in a select/setcc fold will be on the setcc as they
8437       // migrated from fcmp
8438       Flags = N0.getNode()->getFlags();
8439       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
8440                                        N2, N0.getOperand(2));
8441       SelectNode->setFlags(Flags);
8442       return SelectNode;
8443     }
8444
8445     return SimplifySelect(DL, N0, N1, N2);
8446   }
8447
8448   return SDValue();
8449 }
8450
8451 // This function assumes all the vselect's arguments are CONCAT_VECTOR
8452 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
8453 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
8454   SDLoc DL(N);
8455   SDValue Cond = N->getOperand(0);
8456   SDValue LHS = N->getOperand(1);
8457   SDValue RHS = N->getOperand(2);
8458   EVT VT = N->getValueType(0);
8459   int NumElems = VT.getVectorNumElements();
8460   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
8461          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
8462          Cond.getOpcode() == ISD::BUILD_VECTOR);
8463
8464   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
8465   // binary ones here.
8466   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
8467     return SDValue();
8468
8469   // We're sure we have an even number of elements due to the
8470   // concat_vectors we have as arguments to vselect.
8471   // Skip BV elements until we find one that's not an UNDEF
8472   // After we find an UNDEF element, keep looping until we get to half the
8473   // length of the BV and see if all the non-undef nodes are the same.
8474   ConstantSDNode *BottomHalf = nullptr;
8475   for (int i = 0; i < NumElems / 2; ++i) {
8476     if (Cond->getOperand(i)->isUndef())
8477       continue;
8478
8479     if (BottomHalf == nullptr)
8480       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8481     else if (Cond->getOperand(i).getNode() != BottomHalf)
8482       return SDValue();
8483   }
8484
8485   // Do the same for the second half of the BuildVector
8486   ConstantSDNode *TopHalf = nullptr;
8487   for (int i = NumElems / 2; i < NumElems; ++i) {
8488     if (Cond->getOperand(i)->isUndef())
8489       continue;
8490
8491     if (TopHalf == nullptr)
8492       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8493     else if (Cond->getOperand(i).getNode() != TopHalf)
8494       return SDValue();
8495   }
8496
8497   assert(TopHalf && BottomHalf &&
8498          "One half of the selector was all UNDEFs and the other was all the "
8499          "same value. This should have been addressed before this function.");
8500   return DAG.getNode(
8501       ISD::CONCAT_VECTORS, DL, VT,
8502       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
8503       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
8504 }
8505
8506 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
8507   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8508   SDValue Mask = MSC->getMask();
8509   SDValue Chain = MSC->getChain();
8510   SDLoc DL(N);
8511
8512   // Zap scatters with a zero mask.
8513   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8514     return Chain;
8515
8516   return SDValue();
8517 }
8518
8519 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
8520   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8521   SDValue Mask = MST->getMask();
8522   SDValue Chain = MST->getChain();
8523   SDLoc DL(N);
8524
8525   // Zap masked stores with a zero mask.
8526   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8527     return Chain;
8528
8529   return SDValue();
8530 }
8531
8532 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8533   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8534   SDValue Mask = MGT->getMask();
8535   SDLoc DL(N);
8536
8537   // Zap gathers with a zero mask.
8538   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8539     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
8540
8541   return SDValue();
8542 }
8543
8544 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8545   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
8546   SDValue Mask = MLD->getMask();
8547   SDLoc DL(N);
8548
8549   // Zap masked loads with a zero mask.
8550   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8551     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
8552
8553   return SDValue();
8554 }
8555
8556 /// A vector select of 2 constant vectors can be simplified to math/logic to
8557 /// avoid a variable select instruction and possibly avoid constant loads.
8558 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8559   SDValue Cond = N->getOperand(0);
8560   SDValue N1 = N->getOperand(1);
8561   SDValue N2 = N->getOperand(2);
8562   EVT VT = N->getValueType(0);
8563   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8564       !TLI.convertSelectOfConstantsToMath(VT) ||
8565       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8566       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8567     return SDValue();
8568
8569   // Check if we can use the condition value to increment/decrement a single
8570   // constant value. This simplifies a select to an add and removes a constant
8571   // load/materialization from the general case.
8572   bool AllAddOne = true;
8573   bool AllSubOne = true;
8574   unsigned Elts = VT.getVectorNumElements();
8575   for (unsigned i = 0; i != Elts; ++i) {
8576     SDValue N1Elt = N1.getOperand(i);
8577     SDValue N2Elt = N2.getOperand(i);
8578     if (N1Elt.isUndef() || N2Elt.isUndef())
8579       continue;
8580
8581     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8582     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8583     if (C1 != C2 + 1)
8584       AllAddOne = false;
8585     if (C1 != C2 - 1)
8586       AllSubOne = false;
8587   }
8588
8589   // Further simplifications for the extra-special cases where the constants are
8590   // all 0 or all -1 should be implemented as folds of these patterns.
8591   SDLoc DL(N);
8592   if (AllAddOne || AllSubOne) {
8593     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8594     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8595     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8596     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8597     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8598   }
8599
8600   // The general case for select-of-constants:
8601   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8602   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8603   // leave that to a machine-specific pass.
8604   return SDValue();
8605 }
8606
8607 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8608   SDValue N0 = N->getOperand(0);
8609   SDValue N1 = N->getOperand(1);
8610   SDValue N2 = N->getOperand(2);
8611   EVT VT = N->getValueType(0);
8612   SDLoc DL(N);
8613
8614   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8615     return V;
8616
8617   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8618   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
8619     return DAG.getSelect(DL, VT, F, N2, N1);
8620
8621   // Canonicalize integer abs.
8622   // vselect (setg[te] X,  0),  X, -X ->
8623   // vselect (setgt    X, -1),  X, -X ->
8624   // vselect (setl[te] X,  0), -X,  X ->
8625   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8626   if (N0.getOpcode() == ISD::SETCC) {
8627     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8628     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8629     bool isAbs = false;
8630     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8631
8632     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8633          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8634         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8635       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8636     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8637              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8638       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8639
8640     if (isAbs) {
8641       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8642         return DAG.getNode(ISD::ABS, DL, VT, LHS);
8643
8644       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
8645                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
8646                                                   DL, getShiftAmountTy(VT)));
8647       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8648       AddToWorklist(Shift.getNode());
8649       AddToWorklist(Add.getNode());
8650       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8651     }
8652
8653     // vselect x, y (fcmp lt x, y) -> fminnum x, y
8654     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8655     //
8656     // This is OK if we don't care about what happens if either operand is a
8657     // NaN.
8658     //
8659     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
8660       if (SDValue FMinMax =
8661               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
8662         return FMinMax;
8663     }
8664
8665     // If this select has a condition (setcc) with narrower operands than the
8666     // select, try to widen the compare to match the select width.
8667     // TODO: This should be extended to handle any constant.
8668     // TODO: This could be extended to handle non-loading patterns, but that
8669     //       requires thorough testing to avoid regressions.
8670     if (isNullOrNullSplat(RHS)) {
8671       EVT NarrowVT = LHS.getValueType();
8672       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8673       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8674       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8675       unsigned WideWidth = WideVT.getScalarSizeInBits();
8676       bool IsSigned = isSignedIntSetCC(CC);
8677       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8678       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8679           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8680           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8681           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8682         // Both compare operands can be widened for free. The LHS can use an
8683         // extended load, and the RHS is a constant:
8684         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8685         //   vselect (setcc extload(X), C'), N1, N2
8686         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8687         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8688         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8689         EVT WideSetCCVT = getSetCCResultType(WideVT);
8690         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8691         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8692       }
8693     }
8694   }
8695
8696   if (SimplifySelectOps(N, N1, N2))
8697     return SDValue(N, 0);  // Don't revisit N.
8698
8699   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8700   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8701     return N1;
8702   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8703   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8704     return N2;
8705
8706   // The ConvertSelectToConcatVector function is assuming both the above
8707   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8708   // and addressed.
8709   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8710       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8711       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8712     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8713       return CV;
8714   }
8715
8716   if (SDValue V = foldVSelectOfConstants(N))
8717     return V;
8718
8719   return SDValue();
8720 }
8721
8722 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8723   SDValue N0 = N->getOperand(0);
8724   SDValue N1 = N->getOperand(1);
8725   SDValue N2 = N->getOperand(2);
8726   SDValue N3 = N->getOperand(3);
8727   SDValue N4 = N->getOperand(4);
8728   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8729
8730   // fold select_cc lhs, rhs, x, x, cc -> x
8731   if (N2 == N3)
8732     return N2;
8733
8734   // Determine if the condition we're dealing with is constant
8735   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8736                                   CC, SDLoc(N), false)) {
8737     AddToWorklist(SCC.getNode());
8738
8739     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8740       if (!SCCC->isNullValue())
8741         return N2;    // cond always true -> true val
8742       else
8743         return N3;    // cond always false -> false val
8744     } else if (SCC->isUndef()) {
8745       // When the condition is UNDEF, just return the first operand. This is
8746       // coherent the DAG creation, no setcc node is created in this case
8747       return N2;
8748     } else if (SCC.getOpcode() == ISD::SETCC) {
8749       // Fold to a simpler select_cc
8750       SDValue SelectOp = DAG.getNode(
8751           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
8752           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
8753       SelectOp->setFlags(SCC->getFlags());
8754       return SelectOp;
8755     }
8756   }
8757
8758   // If we can fold this based on the true/false value, do so.
8759   if (SimplifySelectOps(N, N2, N3))
8760     return SDValue(N, 0);  // Don't revisit N.
8761
8762   // fold select_cc into other things, such as min/max/abs
8763   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8764 }
8765
8766 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8767   // setcc is very commonly used as an argument to brcond. This pattern
8768   // also lend itself to numerous combines and, as a result, it is desired
8769   // we keep the argument to a brcond as a setcc as much as possible.
8770   bool PreferSetCC =
8771       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8772
8773   SDValue Combined = SimplifySetCC(
8774       N->getValueType(0), N->getOperand(0), N->getOperand(1),
8775       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8776
8777   if (!Combined)
8778     return SDValue();
8779
8780   // If we prefer to have a setcc, and we don't, we'll try our best to
8781   // recreate one using rebuildSetCC.
8782   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8783     SDValue NewSetCC = rebuildSetCC(Combined);
8784
8785     // We don't have anything interesting to combine to.
8786     if (NewSetCC.getNode() == N)
8787       return SDValue();
8788
8789     if (NewSetCC)
8790       return NewSetCC;
8791   }
8792
8793   return Combined;
8794 }
8795
8796 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8797   SDValue LHS = N->getOperand(0);
8798   SDValue RHS = N->getOperand(1);
8799   SDValue Carry = N->getOperand(2);
8800   SDValue Cond = N->getOperand(3);
8801
8802   // If Carry is false, fold to a regular SETCC.
8803   if (isNullConstant(Carry))
8804     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
8805
8806   return SDValue();
8807 }
8808
8809 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8810 /// a build_vector of constants.
8811 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8812 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8813 /// Vector extends are not folded if operations are legal; this is to
8814 /// avoid introducing illegal build_vector dag nodes.
8815 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
8816                                          SelectionDAG &DAG, bool LegalTypes) {
8817   unsigned Opcode = N->getOpcode();
8818   SDValue N0 = N->getOperand(0);
8819   EVT VT = N->getValueType(0);
8820   SDLoc DL(N);
8821
8822   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8823          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8824          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
8825          && "Expected EXTEND dag node in input!");
8826
8827   // fold (sext c1) -> c1
8828   // fold (zext c1) -> c1
8829   // fold (aext c1) -> c1
8830   if (isa<ConstantSDNode>(N0))
8831     return DAG.getNode(Opcode, DL, VT, N0);
8832
8833   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
8834   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
8835   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
8836   if (N0->getOpcode() == ISD::SELECT) {
8837     SDValue Op1 = N0->getOperand(1);
8838     SDValue Op2 = N0->getOperand(2);
8839     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
8840         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
8841       // For any_extend, choose sign extension of the constants to allow a
8842       // possible further transform to sign_extend_inreg.i.e.
8843       //
8844       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
8845       // t2: i64 = any_extend t1
8846       // -->
8847       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
8848       // -->
8849       // t4: i64 = sign_extend_inreg t3
8850       unsigned FoldOpc = Opcode;
8851       if (FoldOpc == ISD::ANY_EXTEND)
8852         FoldOpc = ISD::SIGN_EXTEND;
8853       return DAG.getSelect(DL, VT, N0->getOperand(0),
8854                            DAG.getNode(FoldOpc, DL, VT, Op1),
8855                            DAG.getNode(FoldOpc, DL, VT, Op2));
8856     }
8857   }
8858
8859   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8860   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8861   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8862   EVT SVT = VT.getScalarType();
8863   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
8864       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
8865     return SDValue();
8866
8867   // We can fold this node into a build_vector.
8868   unsigned VTBits = SVT.getSizeInBits();
8869   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8870   SmallVector<SDValue, 8> Elts;
8871   unsigned NumElts = VT.getVectorNumElements();
8872
8873   // For zero-extensions, UNDEF elements still guarantee to have the upper
8874   // bits set to zero.
8875   bool IsZext =
8876       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
8877
8878   for (unsigned i = 0; i != NumElts; ++i) {
8879     SDValue Op = N0.getOperand(i);
8880     if (Op.isUndef()) {
8881       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
8882       continue;
8883     }
8884
8885     SDLoc DL(Op);
8886     // Get the constant value and if needed trunc it to the size of the type.
8887     // Nodes like build_vector might have constants wider than the scalar type.
8888     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8889     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8890       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8891     else
8892       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8893   }
8894
8895   return DAG.getBuildVector(VT, DL, Elts);
8896 }
8897
8898 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8899 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8900 // transformation. Returns true if extension are possible and the above
8901 // mentioned transformation is profitable.
8902 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8903                                     unsigned ExtOpc,
8904                                     SmallVectorImpl<SDNode *> &ExtendNodes,
8905                                     const TargetLowering &TLI) {
8906   bool HasCopyToRegUses = false;
8907   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8908   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8909                             UE = N0.getNode()->use_end();
8910        UI != UE; ++UI) {
8911     SDNode *User = *UI;
8912     if (User == N)
8913       continue;
8914     if (UI.getUse().getResNo() != N0.getResNo())
8915       continue;
8916     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8917     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8918       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8919       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8920         // Sign bits will be lost after a zext.
8921         return false;
8922       bool Add = false;
8923       for (unsigned i = 0; i != 2; ++i) {
8924         SDValue UseOp = User->getOperand(i);
8925         if (UseOp == N0)
8926           continue;
8927         if (!isa<ConstantSDNode>(UseOp))
8928           return false;
8929         Add = true;
8930       }
8931       if (Add)
8932         ExtendNodes.push_back(User);
8933       continue;
8934     }
8935     // If truncates aren't free and there are users we can't
8936     // extend, it isn't worthwhile.
8937     if (!isTruncFree)
8938       return false;
8939     // Remember if this value is live-out.
8940     if (User->getOpcode() == ISD::CopyToReg)
8941       HasCopyToRegUses = true;
8942   }
8943
8944   if (HasCopyToRegUses) {
8945     bool BothLiveOut = false;
8946     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8947          UI != UE; ++UI) {
8948       SDUse &Use = UI.getUse();
8949       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8950         BothLiveOut = true;
8951         break;
8952       }
8953     }
8954     if (BothLiveOut)
8955       // Both unextended and extended values are live out. There had better be
8956       // a good reason for the transformation.
8957       return ExtendNodes.size();
8958   }
8959   return true;
8960 }
8961
8962 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8963                                   SDValue OrigLoad, SDValue ExtLoad,
8964                                   ISD::NodeType ExtType) {
8965   // Extend SetCC uses if necessary.
8966   SDLoc DL(ExtLoad);
8967   for (SDNode *SetCC : SetCCs) {
8968     SmallVector<SDValue, 4> Ops;
8969
8970     for (unsigned j = 0; j != 2; ++j) {
8971       SDValue SOp = SetCC->getOperand(j);
8972       if (SOp == OrigLoad)
8973         Ops.push_back(ExtLoad);
8974       else
8975         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8976     }
8977
8978     Ops.push_back(SetCC->getOperand(2));
8979     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8980   }
8981 }
8982
8983 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8984 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8985   SDValue N0 = N->getOperand(0);
8986   EVT DstVT = N->getValueType(0);
8987   EVT SrcVT = N0.getValueType();
8988
8989   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8990           N->getOpcode() == ISD::ZERO_EXTEND) &&
8991          "Unexpected node type (not an extend)!");
8992
8993   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8994   // For example, on a target with legal v4i32, but illegal v8i32, turn:
8995   //   (v8i32 (sext (v8i16 (load x))))
8996   // into:
8997   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
8998   //                          (v4i32 (sextload (x + 16)))))
8999   // Where uses of the original load, i.e.:
9000   //   (v8i16 (load x))
9001   // are replaced with:
9002   //   (v8i16 (truncate
9003   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9004   //                            (v4i32 (sextload (x + 16)))))))
9005   //
9006   // This combine is only applicable to illegal, but splittable, vectors.
9007   // All legal types, and illegal non-vector types, are handled elsewhere.
9008   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9009   //
9010   if (N0->getOpcode() != ISD::LOAD)
9011     return SDValue();
9012
9013   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9014
9015   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9016       !N0.hasOneUse() || !LN0->isSimple() ||
9017       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
9018       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9019     return SDValue();
9020
9021   SmallVector<SDNode *, 4> SetCCs;
9022   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9023     return SDValue();
9024
9025   ISD::LoadExtType ExtType =
9026       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9027
9028   // Try to split the vector types to get down to legal types.
9029   EVT SplitSrcVT = SrcVT;
9030   EVT SplitDstVT = DstVT;
9031   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9032          SplitSrcVT.getVectorNumElements() > 1) {
9033     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9034     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9035   }
9036
9037   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9038     return SDValue();
9039
9040   SDLoc DL(N);
9041   const unsigned NumSplits =
9042       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9043   const unsigned Stride = SplitSrcVT.getStoreSize();
9044   SmallVector<SDValue, 4> Loads;
9045   SmallVector<SDValue, 4> Chains;
9046
9047   SDValue BasePtr = LN0->getBasePtr();
9048   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9049     const unsigned Offset = Idx * Stride;
9050     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9051
9052     SDValue SplitLoad = DAG.getExtLoad(
9053         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9054         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9055         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9056
9057     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
9058                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
9059
9060     Loads.push_back(SplitLoad.getValue(0));
9061     Chains.push_back(SplitLoad.getValue(1));
9062   }
9063
9064   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9065   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9066
9067   // Simplify TF.
9068   AddToWorklist(NewChain.getNode());
9069
9070   CombineTo(N, NewValue);
9071
9072   // Replace uses of the original load (before extension)
9073   // with a truncate of the concatenated sextloaded vectors.
9074   SDValue Trunc =
9075       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9076   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9077   CombineTo(N0.getNode(), Trunc, NewChain);
9078   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9079 }
9080
9081 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9082 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9083 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9084   assert(N->getOpcode() == ISD::ZERO_EXTEND);
9085   EVT VT = N->getValueType(0);
9086   EVT OrigVT = N->getOperand(0).getValueType();
9087   if (TLI.isZExtFree(OrigVT, VT))
9088     return SDValue();
9089
9090   // and/or/xor
9091   SDValue N0 = N->getOperand(0);
9092   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9093         N0.getOpcode() == ISD::XOR) ||
9094       N0.getOperand(1).getOpcode() != ISD::Constant ||
9095       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9096     return SDValue();
9097
9098   // shl/shr
9099   SDValue N1 = N0->getOperand(0);
9100   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9101       N1.getOperand(1).getOpcode() != ISD::Constant ||
9102       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9103     return SDValue();
9104
9105   // load
9106   if (!isa<LoadSDNode>(N1.getOperand(0)))
9107     return SDValue();
9108   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9109   EVT MemVT = Load->getMemoryVT();
9110   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9111       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9112     return SDValue();
9113
9114
9115   // If the shift op is SHL, the logic op must be AND, otherwise the result
9116   // will be wrong.
9117   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9118     return SDValue();
9119
9120   if (!N0.hasOneUse() || !N1.hasOneUse())
9121     return SDValue();
9122
9123   SmallVector<SDNode*, 4> SetCCs;
9124   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9125                                ISD::ZERO_EXTEND, SetCCs, TLI))
9126     return SDValue();
9127
9128   // Actually do the transformation.
9129   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9130                                    Load->getChain(), Load->getBasePtr(),
9131                                    Load->getMemoryVT(), Load->getMemOperand());
9132
9133   SDLoc DL1(N1);
9134   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9135                               N1.getOperand(1));
9136
9137   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9138   Mask = Mask.zext(VT.getSizeInBits());
9139   SDLoc DL0(N0);
9140   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9141                             DAG.getConstant(Mask, DL0, VT));
9142
9143   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9144   CombineTo(N, And);
9145   if (SDValue(Load, 0).hasOneUse()) {
9146     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9147   } else {
9148     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9149                                 Load->getValueType(0), ExtLoad);
9150     CombineTo(Load, Trunc, ExtLoad.getValue(1));
9151   }
9152
9153   // N0 is dead at this point.
9154   recursivelyDeleteUnusedNodes(N0.getNode());
9155
9156   return SDValue(N,0); // Return N so it doesn't get rechecked!
9157 }
9158
9159 /// If we're narrowing or widening the result of a vector select and the final
9160 /// size is the same size as a setcc (compare) feeding the select, then try to
9161 /// apply the cast operation to the select's operands because matching vector
9162 /// sizes for a select condition and other operands should be more efficient.
9163 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9164   unsigned CastOpcode = Cast->getOpcode();
9165   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9166           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9167           CastOpcode == ISD::FP_ROUND) &&
9168          "Unexpected opcode for vector select narrowing/widening");
9169
9170   // We only do this transform before legal ops because the pattern may be
9171   // obfuscated by target-specific operations after legalization. Do not create
9172   // an illegal select op, however, because that may be difficult to lower.
9173   EVT VT = Cast->getValueType(0);
9174   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9175     return SDValue();
9176
9177   SDValue VSel = Cast->getOperand(0);
9178   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9179       VSel.getOperand(0).getOpcode() != ISD::SETCC)
9180     return SDValue();
9181
9182   // Does the setcc have the same vector size as the casted select?
9183   SDValue SetCC = VSel.getOperand(0);
9184   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9185   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9186     return SDValue();
9187
9188   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9189   SDValue A = VSel.getOperand(1);
9190   SDValue B = VSel.getOperand(2);
9191   SDValue CastA, CastB;
9192   SDLoc DL(Cast);
9193   if (CastOpcode == ISD::FP_ROUND) {
9194     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9195     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9196     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9197   } else {
9198     CastA = DAG.getNode(CastOpcode, DL, VT, A);
9199     CastB = DAG.getNode(CastOpcode, DL, VT, B);
9200   }
9201   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9202 }
9203
9204 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9205 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9206 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9207                                      const TargetLowering &TLI, EVT VT,
9208                                      bool LegalOperations, SDNode *N,
9209                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
9210   SDNode *N0Node = N0.getNode();
9211   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9212                                                    : ISD::isZEXTLoad(N0Node);
9213   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9214       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9215     return SDValue();
9216
9217   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9218   EVT MemVT = LN0->getMemoryVT();
9219   if ((LegalOperations || !LN0->isSimple() ||
9220        VT.isVector()) &&
9221       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9222     return SDValue();
9223
9224   SDValue ExtLoad =
9225       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9226                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9227   Combiner.CombineTo(N, ExtLoad);
9228   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9229   if (LN0->use_empty())
9230     Combiner.recursivelyDeleteUnusedNodes(LN0);
9231   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9232 }
9233
9234 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9235 // Only generate vector extloads when 1) they're legal, and 2) they are
9236 // deemed desirable by the target.
9237 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9238                                   const TargetLowering &TLI, EVT VT,
9239                                   bool LegalOperations, SDNode *N, SDValue N0,
9240                                   ISD::LoadExtType ExtLoadType,
9241                                   ISD::NodeType ExtOpc) {
9242   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9243       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9244       ((LegalOperations || VT.isVector() ||
9245         !cast<LoadSDNode>(N0)->isSimple()) &&
9246        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9247     return {};
9248
9249   bool DoXform = true;
9250   SmallVector<SDNode *, 4> SetCCs;
9251   if (!N0.hasOneUse())
9252     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9253   if (VT.isVector())
9254     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9255   if (!DoXform)
9256     return {};
9257
9258   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9259   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9260                                    LN0->getBasePtr(), N0.getValueType(),
9261                                    LN0->getMemOperand());
9262   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9263   // If the load value is used only by N, replace it via CombineTo N.
9264   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9265   Combiner.CombineTo(N, ExtLoad);
9266   if (NoReplaceTrunc) {
9267     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9268     Combiner.recursivelyDeleteUnusedNodes(LN0);
9269   } else {
9270     SDValue Trunc =
9271         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9272     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9273   }
9274   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9275 }
9276
9277 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9278                                        bool LegalOperations) {
9279   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9280           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9281
9282   SDValue SetCC = N->getOperand(0);
9283   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9284       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9285     return SDValue();
9286
9287   SDValue X = SetCC.getOperand(0);
9288   SDValue Ones = SetCC.getOperand(1);
9289   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9290   EVT VT = N->getValueType(0);
9291   EVT XVT = X.getValueType();
9292   // setge X, C is canonicalized to setgt, so we do not need to match that
9293   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9294   // not require the 'not' op.
9295   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9296     // Invert and smear/shift the sign bit:
9297     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9298     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
9299     SDLoc DL(N);
9300     SDValue NotX = DAG.getNOT(DL, X, VT);
9301     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
9302     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
9303     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
9304   }
9305   return SDValue();
9306 }
9307
9308 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
9309   SDValue N0 = N->getOperand(0);
9310   EVT VT = N->getValueType(0);
9311   SDLoc DL(N);
9312
9313   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9314     return Res;
9315
9316   // fold (sext (sext x)) -> (sext x)
9317   // fold (sext (aext x)) -> (sext x)
9318   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9319     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
9320
9321   if (N0.getOpcode() == ISD::TRUNCATE) {
9322     // fold (sext (truncate (load x))) -> (sext (smaller load x))
9323     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
9324     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9325       SDNode *oye = N0.getOperand(0).getNode();
9326       if (NarrowLoad.getNode() != N0.getNode()) {
9327         CombineTo(N0.getNode(), NarrowLoad);
9328         // CombineTo deleted the truncate, if needed, but not what's under it.
9329         AddToWorklist(oye);
9330       }
9331       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9332     }
9333
9334     // See if the value being truncated is already sign extended.  If so, just
9335     // eliminate the trunc/sext pair.
9336     SDValue Op = N0.getOperand(0);
9337     unsigned OpBits   = Op.getScalarValueSizeInBits();
9338     unsigned MidBits  = N0.getScalarValueSizeInBits();
9339     unsigned DestBits = VT.getScalarSizeInBits();
9340     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
9341
9342     if (OpBits == DestBits) {
9343       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
9344       // bits, it is already ready.
9345       if (NumSignBits > DestBits-MidBits)
9346         return Op;
9347     } else if (OpBits < DestBits) {
9348       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
9349       // bits, just sext from i32.
9350       if (NumSignBits > OpBits-MidBits)
9351         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
9352     } else {
9353       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
9354       // bits, just truncate to i32.
9355       if (NumSignBits > OpBits-MidBits)
9356         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
9357     }
9358
9359     // fold (sext (truncate x)) -> (sextinreg x).
9360     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
9361                                                  N0.getValueType())) {
9362       if (OpBits < DestBits)
9363         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
9364       else if (OpBits > DestBits)
9365         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
9366       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
9367                          DAG.getValueType(N0.getValueType()));
9368     }
9369   }
9370
9371   // Try to simplify (sext (load x)).
9372   if (SDValue foldedExt =
9373           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9374                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9375     return foldedExt;
9376
9377   // fold (sext (load x)) to multiple smaller sextloads.
9378   // Only on illegal but splittable vectors.
9379   if (SDValue ExtLoad = CombineExtLoad(N))
9380     return ExtLoad;
9381
9382   // Try to simplify (sext (sextload x)).
9383   if (SDValue foldedExt = tryToFoldExtOfExtload(
9384           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9385     return foldedExt;
9386
9387   // fold (sext (and/or/xor (load x), cst)) ->
9388   //      (and/or/xor (sextload x), (sext cst))
9389   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9390        N0.getOpcode() == ISD::XOR) &&
9391       isa<LoadSDNode>(N0.getOperand(0)) &&
9392       N0.getOperand(1).getOpcode() == ISD::Constant &&
9393       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9394     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9395     EVT MemVT = LN00->getMemoryVT();
9396     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9397       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9398       SmallVector<SDNode*, 4> SetCCs;
9399       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9400                                              ISD::SIGN_EXTEND, SetCCs, TLI);
9401       if (DoXform) {
9402         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9403                                          LN00->getChain(), LN00->getBasePtr(),
9404                                          LN00->getMemoryVT(),
9405                                          LN00->getMemOperand());
9406         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9407         Mask = Mask.sext(VT.getSizeInBits());
9408         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9409                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9410         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9411         bool NoReplaceTruncAnd = !N0.hasOneUse();
9412         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9413         CombineTo(N, And);
9414         // If N0 has multiple uses, change other uses as well.
9415         if (NoReplaceTruncAnd) {
9416           SDValue TruncAnd =
9417               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9418           CombineTo(N0.getNode(), TruncAnd);
9419         }
9420         if (NoReplaceTrunc) {
9421           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9422         } else {
9423           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9424                                       LN00->getValueType(0), ExtLoad);
9425           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9426         }
9427         return SDValue(N,0); // Return N so it doesn't get rechecked!
9428       }
9429     }
9430   }
9431
9432   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9433     return V;
9434
9435   if (N0.getOpcode() == ISD::SETCC) {
9436     SDValue N00 = N0.getOperand(0);
9437     SDValue N01 = N0.getOperand(1);
9438     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9439     EVT N00VT = N0.getOperand(0).getValueType();
9440
9441     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9442     // Only do this before legalize for now.
9443     if (VT.isVector() && !LegalOperations &&
9444         TLI.getBooleanContents(N00VT) ==
9445             TargetLowering::ZeroOrNegativeOneBooleanContent) {
9446       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9447       // of the same size as the compared operands. Only optimize sext(setcc())
9448       // if this is the case.
9449       EVT SVT = getSetCCResultType(N00VT);
9450
9451       // If we already have the desired type, don't change it.
9452       if (SVT != N0.getValueType()) {
9453         // We know that the # elements of the results is the same as the
9454         // # elements of the compare (and the # elements of the compare result
9455         // for that matter).  Check to see that they are the same size.  If so,
9456         // we know that the element size of the sext'd result matches the
9457         // element size of the compare operands.
9458         if (VT.getSizeInBits() == SVT.getSizeInBits())
9459           return DAG.getSetCC(DL, VT, N00, N01, CC);
9460
9461         // If the desired elements are smaller or larger than the source
9462         // elements, we can use a matching integer vector type and then
9463         // truncate/sign extend.
9464         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9465         if (SVT == MatchingVecType) {
9466           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9467           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9468         }
9469       }
9470     }
9471
9472     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9473     // Here, T can be 1 or -1, depending on the type of the setcc and
9474     // getBooleanContents().
9475     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9476
9477     // To determine the "true" side of the select, we need to know the high bit
9478     // of the value returned by the setcc if it evaluates to true.
9479     // If the type of the setcc is i1, then the true case of the select is just
9480     // sext(i1 1), that is, -1.
9481     // If the type of the setcc is larger (say, i8) then the value of the high
9482     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9483     // of the appropriate width.
9484     SDValue ExtTrueVal = (SetCCWidth == 1)
9485                              ? DAG.getAllOnesConstant(DL, VT)
9486                              : DAG.getBoolConstant(true, DL, VT, N00VT);
9487     SDValue Zero = DAG.getConstant(0, DL, VT);
9488     if (SDValue SCC =
9489             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9490       return SCC;
9491
9492     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9493       EVT SetCCVT = getSetCCResultType(N00VT);
9494       // Don't do this transform for i1 because there's a select transform
9495       // that would reverse it.
9496       // TODO: We should not do this transform at all without a target hook
9497       // because a sext is likely cheaper than a select?
9498       if (SetCCVT.getScalarSizeInBits() != 1 &&
9499           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9500         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9501         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9502       }
9503     }
9504   }
9505
9506   // fold (sext x) -> (zext x) if the sign bit is known zero.
9507   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9508       DAG.SignBitIsZero(N0))
9509     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9510
9511   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9512     return NewVSel;
9513
9514   // Eliminate this sign extend by doing a negation in the destination type:
9515   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9516   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9517       isNullOrNullSplat(N0.getOperand(0)) &&
9518       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
9519       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
9520     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9521     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9522   }
9523   // Eliminate this sign extend by doing a decrement in the destination type:
9524   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9525   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9526       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
9527       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9528       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
9529     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9530     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9531   }
9532
9533   return SDValue();
9534 }
9535
9536 // isTruncateOf - If N is a truncate of some other value, return true, record
9537 // the value being truncated in Op and which of Op's bits are zero/one in Known.
9538 // This function computes KnownBits to avoid a duplicated call to
9539 // computeKnownBits in the caller.
9540 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9541                          KnownBits &Known) {
9542   if (N->getOpcode() == ISD::TRUNCATE) {
9543     Op = N->getOperand(0);
9544     Known = DAG.computeKnownBits(Op);
9545     return true;
9546   }
9547
9548   if (N.getOpcode() != ISD::SETCC ||
9549       N.getValueType().getScalarType() != MVT::i1 ||
9550       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9551     return false;
9552
9553   SDValue Op0 = N->getOperand(0);
9554   SDValue Op1 = N->getOperand(1);
9555   assert(Op0.getValueType() == Op1.getValueType());
9556
9557   if (isNullOrNullSplat(Op0))
9558     Op = Op1;
9559   else if (isNullOrNullSplat(Op1))
9560     Op = Op0;
9561   else
9562     return false;
9563
9564   Known = DAG.computeKnownBits(Op);
9565
9566   return (Known.Zero | 1).isAllOnesValue();
9567 }
9568
9569 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9570   SDValue N0 = N->getOperand(0);
9571   EVT VT = N->getValueType(0);
9572
9573   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9574     return Res;
9575
9576   // fold (zext (zext x)) -> (zext x)
9577   // fold (zext (aext x)) -> (zext x)
9578   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9579     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9580                        N0.getOperand(0));
9581
9582   // fold (zext (truncate x)) -> (zext x) or
9583   //      (zext (truncate x)) -> (truncate x)
9584   // This is valid when the truncated bits of x are already zero.
9585   SDValue Op;
9586   KnownBits Known;
9587   if (isTruncateOf(DAG, N0, Op, Known)) {
9588     APInt TruncatedBits =
9589       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9590       APInt(Op.getScalarValueSizeInBits(), 0) :
9591       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9592                         N0.getScalarValueSizeInBits(),
9593                         std::min(Op.getScalarValueSizeInBits(),
9594                                  VT.getScalarSizeInBits()));
9595     if (TruncatedBits.isSubsetOf(Known.Zero))
9596       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9597   }
9598
9599   // fold (zext (truncate x)) -> (and x, mask)
9600   if (N0.getOpcode() == ISD::TRUNCATE) {
9601     // fold (zext (truncate (load x))) -> (zext (smaller load x))
9602     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9603     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9604       SDNode *oye = N0.getOperand(0).getNode();
9605       if (NarrowLoad.getNode() != N0.getNode()) {
9606         CombineTo(N0.getNode(), NarrowLoad);
9607         // CombineTo deleted the truncate, if needed, but not what's under it.
9608         AddToWorklist(oye);
9609       }
9610       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9611     }
9612
9613     EVT SrcVT = N0.getOperand(0).getValueType();
9614     EVT MinVT = N0.getValueType();
9615
9616     // Try to mask before the extension to avoid having to generate a larger mask,
9617     // possibly over several sub-vectors.
9618     if (SrcVT.bitsLT(VT) && VT.isVector()) {
9619       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9620                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9621         SDValue Op = N0.getOperand(0);
9622         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9623         AddToWorklist(Op.getNode());
9624         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9625         // Transfer the debug info; the new node is equivalent to N0.
9626         DAG.transferDbgValues(N0, ZExtOrTrunc);
9627         return ZExtOrTrunc;
9628       }
9629     }
9630
9631     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9632       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9633       AddToWorklist(Op.getNode());
9634       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9635       // We may safely transfer the debug info describing the truncate node over
9636       // to the equivalent and operation.
9637       DAG.transferDbgValues(N0, And);
9638       return And;
9639     }
9640   }
9641
9642   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9643   // if either of the casts is not free.
9644   if (N0.getOpcode() == ISD::AND &&
9645       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9646       N0.getOperand(1).getOpcode() == ISD::Constant &&
9647       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9648                            N0.getValueType()) ||
9649        !TLI.isZExtFree(N0.getValueType(), VT))) {
9650     SDValue X = N0.getOperand(0).getOperand(0);
9651     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9652     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9653     Mask = Mask.zext(VT.getSizeInBits());
9654     SDLoc DL(N);
9655     return DAG.getNode(ISD::AND, DL, VT,
9656                        X, DAG.getConstant(Mask, DL, VT));
9657   }
9658
9659   // Try to simplify (zext (load x)).
9660   if (SDValue foldedExt =
9661           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9662                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9663     return foldedExt;
9664
9665   // fold (zext (load x)) to multiple smaller zextloads.
9666   // Only on illegal but splittable vectors.
9667   if (SDValue ExtLoad = CombineExtLoad(N))
9668     return ExtLoad;
9669
9670   // fold (zext (and/or/xor (load x), cst)) ->
9671   //      (and/or/xor (zextload x), (zext cst))
9672   // Unless (and (load x) cst) will match as a zextload already and has
9673   // additional users.
9674   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9675        N0.getOpcode() == ISD::XOR) &&
9676       isa<LoadSDNode>(N0.getOperand(0)) &&
9677       N0.getOperand(1).getOpcode() == ISD::Constant &&
9678       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9679     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9680     EVT MemVT = LN00->getMemoryVT();
9681     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9682         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9683       bool DoXform = true;
9684       SmallVector<SDNode*, 4> SetCCs;
9685       if (!N0.hasOneUse()) {
9686         if (N0.getOpcode() == ISD::AND) {
9687           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9688           EVT LoadResultTy = AndC->getValueType(0);
9689           EVT ExtVT;
9690           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9691             DoXform = false;
9692         }
9693       }
9694       if (DoXform)
9695         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9696                                           ISD::ZERO_EXTEND, SetCCs, TLI);
9697       if (DoXform) {
9698         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9699                                          LN00->getChain(), LN00->getBasePtr(),
9700                                          LN00->getMemoryVT(),
9701                                          LN00->getMemOperand());
9702         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9703         Mask = Mask.zext(VT.getSizeInBits());
9704         SDLoc DL(N);
9705         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9706                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9707         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9708         bool NoReplaceTruncAnd = !N0.hasOneUse();
9709         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9710         CombineTo(N, And);
9711         // If N0 has multiple uses, change other uses as well.
9712         if (NoReplaceTruncAnd) {
9713           SDValue TruncAnd =
9714               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9715           CombineTo(N0.getNode(), TruncAnd);
9716         }
9717         if (NoReplaceTrunc) {
9718           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9719         } else {
9720           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9721                                       LN00->getValueType(0), ExtLoad);
9722           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9723         }
9724         return SDValue(N,0); // Return N so it doesn't get rechecked!
9725       }
9726     }
9727   }
9728
9729   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9730   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9731   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9732     return ZExtLoad;
9733
9734   // Try to simplify (zext (zextload x)).
9735   if (SDValue foldedExt = tryToFoldExtOfExtload(
9736           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9737     return foldedExt;
9738
9739   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9740     return V;
9741
9742   if (N0.getOpcode() == ISD::SETCC) {
9743     // Only do this before legalize for now.
9744     if (!LegalOperations && VT.isVector() &&
9745         N0.getValueType().getVectorElementType() == MVT::i1) {
9746       EVT N00VT = N0.getOperand(0).getValueType();
9747       if (getSetCCResultType(N00VT) == N0.getValueType())
9748         return SDValue();
9749
9750       // We know that the # elements of the results is the same as the #
9751       // elements of the compare (and the # elements of the compare result for
9752       // that matter). Check to see that they are the same size. If so, we know
9753       // that the element size of the sext'd result matches the element size of
9754       // the compare operands.
9755       SDLoc DL(N);
9756       SDValue VecOnes = DAG.getConstant(1, DL, VT);
9757       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9758         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9759         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9760                                      N0.getOperand(1), N0.getOperand(2));
9761         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9762       }
9763
9764       // If the desired elements are smaller or larger than the source
9765       // elements we can use a matching integer vector type and then
9766       // truncate/sign extend.
9767       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9768       SDValue VsetCC =
9769           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9770                       N0.getOperand(1), N0.getOperand(2));
9771       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9772                          VecOnes);
9773     }
9774
9775     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9776     SDLoc DL(N);
9777     if (SDValue SCC = SimplifySelectCC(
9778             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9779             DAG.getConstant(0, DL, VT),
9780             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9781       return SCC;
9782   }
9783
9784   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9785   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9786       isa<ConstantSDNode>(N0.getOperand(1)) &&
9787       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9788       N0.hasOneUse()) {
9789     SDValue ShAmt = N0.getOperand(1);
9790     if (N0.getOpcode() == ISD::SHL) {
9791       SDValue InnerZExt = N0.getOperand(0);
9792       // If the original shl may be shifting out bits, do not perform this
9793       // transformation.
9794       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9795         InnerZExt.getOperand(0).getValueSizeInBits();
9796       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
9797         return SDValue();
9798     }
9799
9800     SDLoc DL(N);
9801
9802     // Ensure that the shift amount is wide enough for the shifted value.
9803     if (VT.getSizeInBits() >= 256)
9804       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9805
9806     return DAG.getNode(N0.getOpcode(), DL, VT,
9807                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
9808                        ShAmt);
9809   }
9810
9811   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9812     return NewVSel;
9813
9814   return SDValue();
9815 }
9816
9817 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
9818   SDValue N0 = N->getOperand(0);
9819   EVT VT = N->getValueType(0);
9820
9821   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9822     return Res;
9823
9824   // fold (aext (aext x)) -> (aext x)
9825   // fold (aext (zext x)) -> (zext x)
9826   // fold (aext (sext x)) -> (sext x)
9827   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
9828       N0.getOpcode() == ISD::ZERO_EXTEND ||
9829       N0.getOpcode() == ISD::SIGN_EXTEND)
9830     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9831
9832   // fold (aext (truncate (load x))) -> (aext (smaller load x))
9833   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9834   if (N0.getOpcode() == ISD::TRUNCATE) {
9835     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9836       SDNode *oye = N0.getOperand(0).getNode();
9837       if (NarrowLoad.getNode() != N0.getNode()) {
9838         CombineTo(N0.getNode(), NarrowLoad);
9839         // CombineTo deleted the truncate, if needed, but not what's under it.
9840         AddToWorklist(oye);
9841       }
9842       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9843     }
9844   }
9845
9846   // fold (aext (truncate x))
9847   if (N0.getOpcode() == ISD::TRUNCATE)
9848     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9849
9850   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9851   // if the trunc is not free.
9852   if (N0.getOpcode() == ISD::AND &&
9853       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9854       N0.getOperand(1).getOpcode() == ISD::Constant &&
9855       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9856                           N0.getValueType())) {
9857     SDLoc DL(N);
9858     SDValue X = N0.getOperand(0).getOperand(0);
9859     X = DAG.getAnyExtOrTrunc(X, DL, VT);
9860     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9861     Mask = Mask.zext(VT.getSizeInBits());
9862     return DAG.getNode(ISD::AND, DL, VT,
9863                        X, DAG.getConstant(Mask, DL, VT));
9864   }
9865
9866   // fold (aext (load x)) -> (aext (truncate (extload x)))
9867   // None of the supported targets knows how to perform load and any_ext
9868   // on vectors in one instruction.  We only perform this transformation on
9869   // scalars.
9870   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9871       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9872       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9873     bool DoXform = true;
9874     SmallVector<SDNode*, 4> SetCCs;
9875     if (!N0.hasOneUse())
9876       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9877                                         TLI);
9878     if (DoXform) {
9879       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9880       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9881                                        LN0->getChain(),
9882                                        LN0->getBasePtr(), N0.getValueType(),
9883                                        LN0->getMemOperand());
9884       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9885       // If the load value is used only by N, replace it via CombineTo N.
9886       bool NoReplaceTrunc = N0.hasOneUse();
9887       CombineTo(N, ExtLoad);
9888       if (NoReplaceTrunc) {
9889         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9890         recursivelyDeleteUnusedNodes(LN0);
9891       } else {
9892         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
9893                                     N0.getValueType(), ExtLoad);
9894         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9895       }
9896       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9897     }
9898   }
9899
9900   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9901   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9902   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
9903   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
9904       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
9905     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9906     ISD::LoadExtType ExtType = LN0->getExtensionType();
9907     EVT MemVT = LN0->getMemoryVT();
9908     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
9909       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
9910                                        VT, LN0->getChain(), LN0->getBasePtr(),
9911                                        MemVT, LN0->getMemOperand());
9912       CombineTo(N, ExtLoad);
9913       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9914       recursivelyDeleteUnusedNodes(LN0);
9915       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9916     }
9917   }
9918
9919   if (N0.getOpcode() == ISD::SETCC) {
9920     // For vectors:
9921     // aext(setcc) -> vsetcc
9922     // aext(setcc) -> truncate(vsetcc)
9923     // aext(setcc) -> aext(vsetcc)
9924     // Only do this before legalize for now.
9925     if (VT.isVector() && !LegalOperations) {
9926       EVT N00VT = N0.getOperand(0).getValueType();
9927       if (getSetCCResultType(N00VT) == N0.getValueType())
9928         return SDValue();
9929
9930       // We know that the # elements of the results is the same as the
9931       // # elements of the compare (and the # elements of the compare result
9932       // for that matter).  Check to see that they are the same size.  If so,
9933       // we know that the element size of the sext'd result matches the
9934       // element size of the compare operands.
9935       if (VT.getSizeInBits() == N00VT.getSizeInBits())
9936         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9937                              N0.getOperand(1),
9938                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
9939
9940       // If the desired elements are smaller or larger than the source
9941       // elements we can use a matching integer vector type and then
9942       // truncate/any extend
9943       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9944       SDValue VsetCC =
9945         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9946                       N0.getOperand(1),
9947                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
9948       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9949     }
9950
9951     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9952     SDLoc DL(N);
9953     if (SDValue SCC = SimplifySelectCC(
9954             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9955             DAG.getConstant(0, DL, VT),
9956             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9957       return SCC;
9958   }
9959
9960   return SDValue();
9961 }
9962
9963 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9964   unsigned Opcode = N->getOpcode();
9965   SDValue N0 = N->getOperand(0);
9966   SDValue N1 = N->getOperand(1);
9967   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9968
9969   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9970   if (N0.getOpcode() == Opcode &&
9971       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9972     return N0;
9973
9974   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9975       N0.getOperand(0).getOpcode() == Opcode) {
9976     // We have an assert, truncate, assert sandwich. Make one stronger assert
9977     // by asserting on the smallest asserted type to the larger source type.
9978     // This eliminates the later assert:
9979     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9980     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9981     SDValue BigA = N0.getOperand(0);
9982     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9983     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9984            "Asserting zero/sign-extended bits to a type larger than the "
9985            "truncated destination does not provide information");
9986
9987     SDLoc DL(N);
9988     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9989     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9990     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9991                                     BigA.getOperand(0), MinAssertVTVal);
9992     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9993   }
9994
9995   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
9996   // than X. Just move the AssertZext in front of the truncate and drop the
9997   // AssertSExt.
9998   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9999       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10000       Opcode == ISD::AssertZext) {
10001     SDValue BigA = N0.getOperand(0);
10002     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10003     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10004            "Asserting zero/sign-extended bits to a type larger than the "
10005            "truncated destination does not provide information");
10006
10007     if (AssertVT.bitsLT(BigA_AssertVT)) {
10008       SDLoc DL(N);
10009       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10010                                       BigA.getOperand(0), N1);
10011       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10012     }
10013   }
10014
10015   return SDValue();
10016 }
10017
10018 /// If the result of a wider load is shifted to right of N  bits and then
10019 /// truncated to a narrower type and where N is a multiple of number of bits of
10020 /// the narrower type, transform it to a narrower load from address + N / num of
10021 /// bits of new type. Also narrow the load if the result is masked with an AND
10022 /// to effectively produce a smaller type. If the result is to be extended, also
10023 /// fold the extension to form a extending load.
10024 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10025   unsigned Opc = N->getOpcode();
10026
10027   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10028   SDValue N0 = N->getOperand(0);
10029   EVT VT = N->getValueType(0);
10030   EVT ExtVT = VT;
10031
10032   // This transformation isn't valid for vector loads.
10033   if (VT.isVector())
10034     return SDValue();
10035
10036   unsigned ShAmt = 0;
10037   bool HasShiftedOffset = false;
10038   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10039   // extended to VT.
10040   if (Opc == ISD::SIGN_EXTEND_INREG) {
10041     ExtType = ISD::SEXTLOAD;
10042     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10043   } else if (Opc == ISD::SRL) {
10044     // Another special-case: SRL is basically zero-extending a narrower value,
10045     // or it maybe shifting a higher subword, half or byte into the lowest
10046     // bits.
10047     ExtType = ISD::ZEXTLOAD;
10048     N0 = SDValue(N, 0);
10049
10050     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10051     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10052     if (!N01 || !LN0)
10053       return SDValue();
10054
10055     uint64_t ShiftAmt = N01->getZExtValue();
10056     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10057     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10058       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10059     else
10060       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10061                                 VT.getSizeInBits() - ShiftAmt);
10062   } else if (Opc == ISD::AND) {
10063     // An AND with a constant mask is the same as a truncate + zero-extend.
10064     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10065     if (!AndC)
10066       return SDValue();
10067
10068     const APInt &Mask = AndC->getAPIntValue();
10069     unsigned ActiveBits = 0;
10070     if (Mask.isMask()) {
10071       ActiveBits = Mask.countTrailingOnes();
10072     } else if (Mask.isShiftedMask()) {
10073       ShAmt = Mask.countTrailingZeros();
10074       APInt ShiftedMask = Mask.lshr(ShAmt);
10075       ActiveBits = ShiftedMask.countTrailingOnes();
10076       HasShiftedOffset = true;
10077     } else
10078       return SDValue();
10079
10080     ExtType = ISD::ZEXTLOAD;
10081     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10082   }
10083
10084   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10085     SDValue SRL = N0;
10086     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10087       ShAmt = ConstShift->getZExtValue();
10088       unsigned EVTBits = ExtVT.getSizeInBits();
10089       // Is the shift amount a multiple of size of VT?
10090       if ((ShAmt & (EVTBits-1)) == 0) {
10091         N0 = N0.getOperand(0);
10092         // Is the load width a multiple of size of VT?
10093         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10094           return SDValue();
10095       }
10096
10097       // At this point, we must have a load or else we can't do the transform.
10098       if (!isa<LoadSDNode>(N0)) return SDValue();
10099
10100       auto *LN0 = cast<LoadSDNode>(N0);
10101
10102       // Because a SRL must be assumed to *need* to zero-extend the high bits
10103       // (as opposed to anyext the high bits), we can't combine the zextload
10104       // lowering of SRL and an sextload.
10105       if (LN0->getExtensionType() == ISD::SEXTLOAD)
10106         return SDValue();
10107
10108       // If the shift amount is larger than the input type then we're not
10109       // accessing any of the loaded bytes.  If the load was a zextload/extload
10110       // then the result of the shift+trunc is zero/undef (handled elsewhere).
10111       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10112         return SDValue();
10113
10114       // If the SRL is only used by a masking AND, we may be able to adjust
10115       // the ExtVT to make the AND redundant.
10116       SDNode *Mask = *(SRL->use_begin());
10117       if (Mask->getOpcode() == ISD::AND &&
10118           isa<ConstantSDNode>(Mask->getOperand(1))) {
10119         const APInt &ShiftMask =
10120           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
10121         if (ShiftMask.isMask()) {
10122           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10123                                            ShiftMask.countTrailingOnes());
10124           // If the mask is smaller, recompute the type.
10125           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10126               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10127             ExtVT = MaskedVT;
10128         }
10129       }
10130     }
10131   }
10132
10133   // If the load is shifted left (and the result isn't shifted back right),
10134   // we can fold the truncate through the shift.
10135   unsigned ShLeftAmt = 0;
10136   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10137       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10138     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10139       ShLeftAmt = N01->getZExtValue();
10140       N0 = N0.getOperand(0);
10141     }
10142   }
10143
10144   // If we haven't found a load, we can't narrow it.
10145   if (!isa<LoadSDNode>(N0))
10146     return SDValue();
10147
10148   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10149   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10150     return SDValue();
10151
10152   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10153     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10154     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10155     return LVTStoreBits - EVTStoreBits - ShAmt;
10156   };
10157
10158   // For big endian targets, we need to adjust the offset to the pointer to
10159   // load the correct bytes.
10160   if (DAG.getDataLayout().isBigEndian())
10161     ShAmt = AdjustBigEndianShift(ShAmt);
10162
10163   EVT PtrType = N0.getOperand(1).getValueType();
10164   uint64_t PtrOff = ShAmt / 8;
10165   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10166   SDLoc DL(LN0);
10167   // The original load itself didn't wrap, so an offset within it doesn't.
10168   SDNodeFlags Flags;
10169   Flags.setNoUnsignedWrap(true);
10170   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
10171                                PtrType, LN0->getBasePtr(),
10172                                DAG.getConstant(PtrOff, DL, PtrType),
10173                                Flags);
10174   AddToWorklist(NewPtr.getNode());
10175
10176   SDValue Load;
10177   if (ExtType == ISD::NON_EXTLOAD)
10178     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
10179                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10180                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10181   else
10182     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
10183                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10184                           NewAlign, LN0->getMemOperand()->getFlags(),
10185                           LN0->getAAInfo());
10186
10187   // Replace the old load's chain with the new load's chain.
10188   WorklistRemover DeadNodes(*this);
10189   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10190
10191   // Shift the result left, if we've swallowed a left shift.
10192   SDValue Result = Load;
10193   if (ShLeftAmt != 0) {
10194     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10195     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10196       ShImmTy = VT;
10197     // If the shift amount is as large as the result size (but, presumably,
10198     // no larger than the source) then the useful bits of the result are
10199     // zero; we can't simply return the shortened shift, because the result
10200     // of that operation is undefined.
10201     if (ShLeftAmt >= VT.getSizeInBits())
10202       Result = DAG.getConstant(0, DL, VT);
10203     else
10204       Result = DAG.getNode(ISD::SHL, DL, VT,
10205                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10206   }
10207
10208   if (HasShiftedOffset) {
10209     // Recalculate the shift amount after it has been altered to calculate
10210     // the offset.
10211     if (DAG.getDataLayout().isBigEndian())
10212       ShAmt = AdjustBigEndianShift(ShAmt);
10213
10214     // We're using a shifted mask, so the load now has an offset. This means
10215     // that data has been loaded into the lower bytes than it would have been
10216     // before, so we need to shl the loaded data into the correct position in the
10217     // register.
10218     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10219     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10220     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10221   }
10222
10223   // Return the new loaded value.
10224   return Result;
10225 }
10226
10227 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
10228   SDValue N0 = N->getOperand(0);
10229   SDValue N1 = N->getOperand(1);
10230   EVT VT = N->getValueType(0);
10231   EVT EVT = cast<VTSDNode>(N1)->getVT();
10232   unsigned VTBits = VT.getScalarSizeInBits();
10233   unsigned EVTBits = EVT.getScalarSizeInBits();
10234
10235   if (N0.isUndef())
10236     return DAG.getUNDEF(VT);
10237
10238   // fold (sext_in_reg c1) -> c1
10239   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10240     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
10241
10242   // If the input is already sign extended, just drop the extension.
10243   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
10244     return N0;
10245
10246   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
10247   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
10248       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
10249     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10250                        N0.getOperand(0), N1);
10251
10252   // fold (sext_in_reg (sext x)) -> (sext x)
10253   // fold (sext_in_reg (aext x)) -> (sext x)
10254   // if x is small enough or if we know that x has more than 1 sign bit and the
10255   // sign_extend_inreg is extending from one of them.
10256   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
10257     SDValue N00 = N0.getOperand(0);
10258     unsigned N00Bits = N00.getScalarValueSizeInBits();
10259     if ((N00Bits <= EVTBits ||
10260          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
10261         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10262       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
10263   }
10264
10265   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
10266   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
10267        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
10268        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
10269       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
10270     if (!LegalOperations ||
10271         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
10272       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
10273                          N0.getOperand(0));
10274   }
10275
10276   // fold (sext_in_reg (zext x)) -> (sext x)
10277   // iff we are extending the source sign bit.
10278   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
10279     SDValue N00 = N0.getOperand(0);
10280     if (N00.getScalarValueSizeInBits() == EVTBits &&
10281         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10282       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
10283   }
10284
10285   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
10286   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
10287     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
10288
10289   // fold operands of sext_in_reg based on knowledge that the top bits are not
10290   // demanded.
10291   if (SimplifyDemandedBits(SDValue(N, 0)))
10292     return SDValue(N, 0);
10293
10294   // fold (sext_in_reg (load x)) -> (smaller sextload x)
10295   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
10296   if (SDValue NarrowLoad = ReduceLoadWidth(N))
10297     return NarrowLoad;
10298
10299   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
10300   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
10301   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
10302   if (N0.getOpcode() == ISD::SRL) {
10303     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
10304       if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
10305         // We can turn this into an SRA iff the input to the SRL is already sign
10306         // extended enough.
10307         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
10308         if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
10309           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
10310                              N0.getOperand(1));
10311       }
10312   }
10313
10314   // fold (sext_inreg (extload x)) -> (sextload x)
10315   // If sextload is not supported by target, we can only do the combine when
10316   // load has one use. Doing otherwise can block folding the extload with other
10317   // extends that the target does support.
10318   if (ISD::isEXTLoad(N0.getNode()) &&
10319       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10320       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10321       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
10322         N0.hasOneUse()) ||
10323        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10324     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10325     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10326                                      LN0->getChain(),
10327                                      LN0->getBasePtr(), EVT,
10328                                      LN0->getMemOperand());
10329     CombineTo(N, ExtLoad);
10330     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10331     AddToWorklist(ExtLoad.getNode());
10332     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10333   }
10334   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
10335   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
10336       N0.hasOneUse() &&
10337       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10338       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
10339        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10340     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10341     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10342                                      LN0->getChain(),
10343                                      LN0->getBasePtr(), EVT,
10344                                      LN0->getMemOperand());
10345     CombineTo(N, ExtLoad);
10346     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10347     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10348   }
10349
10350   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
10351   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
10352     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
10353                                            N0.getOperand(1), false))
10354       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10355                          BSwap, N1);
10356   }
10357
10358   return SDValue();
10359 }
10360
10361 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
10362   SDValue N0 = N->getOperand(0);
10363   EVT VT = N->getValueType(0);
10364
10365   if (N0.isUndef())
10366     return DAG.getUNDEF(VT);
10367
10368   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10369     return Res;
10370
10371   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10372     return SDValue(N, 0);
10373
10374   return SDValue();
10375 }
10376
10377 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10378   SDValue N0 = N->getOperand(0);
10379   EVT VT = N->getValueType(0);
10380
10381   if (N0.isUndef())
10382     return DAG.getUNDEF(VT);
10383
10384   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10385     return Res;
10386
10387   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10388     return SDValue(N, 0);
10389
10390   return SDValue();
10391 }
10392
10393 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10394   SDValue N0 = N->getOperand(0);
10395   EVT VT = N->getValueType(0);
10396   EVT SrcVT = N0.getValueType();
10397   bool isLE = DAG.getDataLayout().isLittleEndian();
10398
10399   // noop truncate
10400   if (SrcVT == VT)
10401     return N0;
10402
10403   // fold (truncate (truncate x)) -> (truncate x)
10404   if (N0.getOpcode() == ISD::TRUNCATE)
10405     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10406
10407   // fold (truncate c1) -> c1
10408   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10409     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10410     if (C.getNode() != N)
10411       return C;
10412   }
10413
10414   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10415   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10416       N0.getOpcode() == ISD::SIGN_EXTEND ||
10417       N0.getOpcode() == ISD::ANY_EXTEND) {
10418     // if the source is smaller than the dest, we still need an extend.
10419     if (N0.getOperand(0).getValueType().bitsLT(VT))
10420       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10421     // if the source is larger than the dest, than we just need the truncate.
10422     if (N0.getOperand(0).getValueType().bitsGT(VT))
10423       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10424     // if the source and dest are the same type, we can drop both the extend
10425     // and the truncate.
10426     return N0.getOperand(0);
10427   }
10428
10429   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10430   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10431     return SDValue();
10432
10433   // Fold extract-and-trunc into a narrow extract. For example:
10434   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10435   //   i32 y = TRUNCATE(i64 x)
10436   //        -- becomes --
10437   //   v16i8 b = BITCAST (v2i64 val)
10438   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10439   //
10440   // Note: We only run this optimization after type legalization (which often
10441   // creates this pattern) and before operation legalization after which
10442   // we need to be more careful about the vector instructions that we generate.
10443   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10444       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10445     EVT VecTy = N0.getOperand(0).getValueType();
10446     EVT ExTy = N0.getValueType();
10447     EVT TrTy = N->getValueType(0);
10448
10449     unsigned NumElem = VecTy.getVectorNumElements();
10450     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10451
10452     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10453     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10454
10455     SDValue EltNo = N0->getOperand(1);
10456     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10457       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10458       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
10459       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10460
10461       SDLoc DL(N);
10462       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10463                          DAG.getBitcast(NVT, N0.getOperand(0)),
10464                          DAG.getConstant(Index, DL, IndexTy));
10465     }
10466   }
10467
10468   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10469   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10470     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10471         TLI.isTruncateFree(SrcVT, VT)) {
10472       SDLoc SL(N0);
10473       SDValue Cond = N0.getOperand(0);
10474       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10475       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10476       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10477     }
10478   }
10479
10480   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10481   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10482       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
10483       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10484     SDValue Amt = N0.getOperand(1);
10485     KnownBits Known = DAG.computeKnownBits(Amt);
10486     unsigned Size = VT.getScalarSizeInBits();
10487     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10488       SDLoc SL(N);
10489       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10490
10491       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10492       if (AmtVT != Amt.getValueType()) {
10493         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10494         AddToWorklist(Amt.getNode());
10495       }
10496       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10497     }
10498   }
10499
10500   // Attempt to pre-truncate BUILD_VECTOR sources.
10501   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10502       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10503     SDLoc DL(N);
10504     EVT SVT = VT.getScalarType();
10505     SmallVector<SDValue, 8> TruncOps;
10506     for (const SDValue &Op : N0->op_values()) {
10507       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10508       TruncOps.push_back(TruncOp);
10509     }
10510     return DAG.getBuildVector(VT, DL, TruncOps);
10511   }
10512
10513   // Fold a series of buildvector, bitcast, and truncate if possible.
10514   // For example fold
10515   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10516   //   (2xi32 (buildvector x, y)).
10517   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10518       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10519       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
10520       N0.getOperand(0).hasOneUse()) {
10521     SDValue BuildVect = N0.getOperand(0);
10522     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10523     EVT TruncVecEltTy = VT.getVectorElementType();
10524
10525     // Check that the element types match.
10526     if (BuildVectEltTy == TruncVecEltTy) {
10527       // Now we only need to compute the offset of the truncated elements.
10528       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
10529       unsigned TruncVecNumElts = VT.getVectorNumElements();
10530       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10531
10532       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10533              "Invalid number of elements");
10534
10535       SmallVector<SDValue, 8> Opnds;
10536       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10537         Opnds.push_back(BuildVect.getOperand(i));
10538
10539       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10540     }
10541   }
10542
10543   // See if we can simplify the input to this truncate through knowledge that
10544   // only the low bits are being used.
10545   // For example "trunc (or (shl x, 8), y)" // -> trunc y
10546   // Currently we only perform this optimization on scalars because vectors
10547   // may have different active low bits.
10548   if (!VT.isVector()) {
10549     APInt Mask =
10550         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
10551     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10552       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10553   }
10554
10555   // fold (truncate (load x)) -> (smaller load x)
10556   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10557   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
10558     if (SDValue Reduced = ReduceLoadWidth(N))
10559       return Reduced;
10560
10561     // Handle the case where the load remains an extending load even
10562     // after truncation.
10563     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
10564       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10565       if (LN0->isSimple() &&
10566           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
10567         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10568                                          VT, LN0->getChain(), LN0->getBasePtr(),
10569                                          LN0->getMemoryVT(),
10570                                          LN0->getMemOperand());
10571         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10572         return NewLoad;
10573       }
10574     }
10575   }
10576
10577   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10578   // where ... are all 'undef'.
10579   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10580     SmallVector<EVT, 8> VTs;
10581     SDValue V;
10582     unsigned Idx = 0;
10583     unsigned NumDefs = 0;
10584
10585     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10586       SDValue X = N0.getOperand(i);
10587       if (!X.isUndef()) {
10588         V = X;
10589         Idx = i;
10590         NumDefs++;
10591       }
10592       // Stop if more than one members are non-undef.
10593       if (NumDefs > 1)
10594         break;
10595       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10596                                      VT.getVectorElementType(),
10597                                      X.getValueType().getVectorNumElements()));
10598     }
10599
10600     if (NumDefs == 0)
10601       return DAG.getUNDEF(VT);
10602
10603     if (NumDefs == 1) {
10604       assert(V.getNode() && "The single defined operand is empty!");
10605       SmallVector<SDValue, 8> Opnds;
10606       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10607         if (i != Idx) {
10608           Opnds.push_back(DAG.getUNDEF(VTs[i]));
10609           continue;
10610         }
10611         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10612         AddToWorklist(NV.getNode());
10613         Opnds.push_back(NV);
10614       }
10615       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10616     }
10617   }
10618
10619   // Fold truncate of a bitcast of a vector to an extract of the low vector
10620   // element.
10621   //
10622   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10623   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10624     SDValue VecSrc = N0.getOperand(0);
10625     EVT SrcVT = VecSrc.getValueType();
10626     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10627         (!LegalOperations ||
10628          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
10629       SDLoc SL(N);
10630
10631       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10632       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10633       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10634                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10635     }
10636   }
10637
10638   // Simplify the operands using demanded-bits information.
10639   if (!VT.isVector() &&
10640       SimplifyDemandedBits(SDValue(N, 0)))
10641     return SDValue(N, 0);
10642
10643   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10644   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10645   // When the adde's carry is not used.
10646   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10647       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10648       // We only do for addcarry before legalize operation
10649       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
10650        TLI.isOperationLegal(N0.getOpcode(), VT))) {
10651     SDLoc SL(N);
10652     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10653     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10654     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10655     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10656   }
10657
10658   // fold (truncate (extract_subvector(ext x))) ->
10659   //      (extract_subvector x)
10660   // TODO: This can be generalized to cover cases where the truncate and extract
10661   // do not fully cancel each other out.
10662   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10663     SDValue N00 = N0.getOperand(0);
10664     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10665         N00.getOpcode() == ISD::ZERO_EXTEND ||
10666         N00.getOpcode() == ISD::ANY_EXTEND) {
10667       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10668           VT.getVectorElementType())
10669         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10670                            N00.getOperand(0), N0.getOperand(1));
10671     }
10672   }
10673
10674   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10675     return NewVSel;
10676
10677   // Narrow a suitable binary operation with a non-opaque constant operand by
10678   // moving it ahead of the truncate. This is limited to pre-legalization
10679   // because targets may prefer a wider type during later combines and invert
10680   // this transform.
10681   switch (N0.getOpcode()) {
10682   case ISD::ADD:
10683   case ISD::SUB:
10684   case ISD::MUL:
10685   case ISD::AND:
10686   case ISD::OR:
10687   case ISD::XOR:
10688     if (!LegalOperations && N0.hasOneUse() &&
10689         (isConstantOrConstantVector(N0.getOperand(0), true) ||
10690          isConstantOrConstantVector(N0.getOperand(1), true))) {
10691       // TODO: We already restricted this to pre-legalization, but for vectors
10692       // we are extra cautious to not create an unsupported operation.
10693       // Target-specific changes are likely needed to avoid regressions here.
10694       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10695         SDLoc DL(N);
10696         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10697         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10698         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10699       }
10700     }
10701   }
10702
10703   return SDValue();
10704 }
10705
10706 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10707   SDValue Elt = N->getOperand(i);
10708   if (Elt.getOpcode() != ISD::MERGE_VALUES)
10709     return Elt.getNode();
10710   return Elt.getOperand(Elt.getResNo()).getNode();
10711 }
10712
10713 /// build_pair (load, load) -> load
10714 /// if load locations are consecutive.
10715 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10716   assert(N->getOpcode() == ISD::BUILD_PAIR);
10717
10718   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10719   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10720
10721   // A BUILD_PAIR is always having the least significant part in elt 0 and the
10722   // most significant part in elt 1. So when combining into one large load, we
10723   // need to consider the endianness.
10724   if (DAG.getDataLayout().isBigEndian())
10725     std::swap(LD1, LD2);
10726
10727   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10728       LD1->getAddressSpace() != LD2->getAddressSpace())
10729     return SDValue();
10730   EVT LD1VT = LD1->getValueType(0);
10731   unsigned LD1Bytes = LD1VT.getStoreSize();
10732   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10733       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10734     unsigned Align = LD1->getAlignment();
10735     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10736         VT.getTypeForEVT(*DAG.getContext()));
10737
10738     if (NewAlign <= Align &&
10739         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10740       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10741                          LD1->getPointerInfo(), Align);
10742   }
10743
10744   return SDValue();
10745 }
10746
10747 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10748   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10749   // and Lo parts; on big-endian machines it doesn't.
10750   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10751 }
10752
10753 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10754                                     const TargetLowering &TLI) {
10755   // If this is not a bitcast to an FP type or if the target doesn't have
10756   // IEEE754-compliant FP logic, we're done.
10757   EVT VT = N->getValueType(0);
10758   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10759     return SDValue();
10760
10761   // TODO: Handle cases where the integer constant is a different scalar
10762   // bitwidth to the FP.
10763   SDValue N0 = N->getOperand(0);
10764   EVT SourceVT = N0.getValueType();
10765   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10766     return SDValue();
10767
10768   unsigned FPOpcode;
10769   APInt SignMask;
10770   switch (N0.getOpcode()) {
10771   case ISD::AND:
10772     FPOpcode = ISD::FABS;
10773     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10774     break;
10775   case ISD::XOR:
10776     FPOpcode = ISD::FNEG;
10777     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10778     break;
10779   case ISD::OR:
10780     FPOpcode = ISD::FABS;
10781     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10782     break;
10783   default:
10784     return SDValue();
10785   }
10786
10787   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10788   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10789   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10790   //   fneg (fabs X)
10791   SDValue LogicOp0 = N0.getOperand(0);
10792   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10793   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10794       LogicOp0.getOpcode() == ISD::BITCAST &&
10795       LogicOp0.getOperand(0).getValueType() == VT) {
10796     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10797     NumFPLogicOpsConv++;
10798     if (N0.getOpcode() == ISD::OR)
10799       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10800     return FPOp;
10801   }
10802
10803   return SDValue();
10804 }
10805
10806 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
10807   SDValue N0 = N->getOperand(0);
10808   EVT VT = N->getValueType(0);
10809
10810   if (N0.isUndef())
10811     return DAG.getUNDEF(VT);
10812
10813   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10814   // Only do this before legalize types, unless both types are integer and the
10815   // scalar type is legal. Only do this before legalize ops, since the target
10816   // maybe depending on the bitcast.
10817   // First check to see if this is all constant.
10818   // TODO: Support FP bitcasts after legalize types.
10819   if (VT.isVector() &&
10820       (!LegalTypes ||
10821        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
10822         TLI.isTypeLegal(VT.getVectorElementType()))) &&
10823       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
10824       cast<BuildVectorSDNode>(N0)->isConstant())
10825     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
10826                                              VT.getVectorElementType());
10827
10828   // If the input is a constant, let getNode fold it.
10829   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
10830     // If we can't allow illegal operations, we need to check that this is just
10831     // a fp -> int or int -> conversion and that the resulting operation will
10832     // be legal.
10833     if (!LegalOperations ||
10834         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
10835          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
10836         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
10837          TLI.isOperationLegal(ISD::Constant, VT))) {
10838       SDValue C = DAG.getBitcast(VT, N0);
10839       if (C.getNode() != N)
10840         return C;
10841     }
10842   }
10843
10844   // (conv (conv x, t1), t2) -> (conv x, t2)
10845   if (N0.getOpcode() == ISD::BITCAST)
10846     return DAG.getBitcast(VT, N0.getOperand(0));
10847
10848   // fold (conv (load x)) -> (load (conv*)x)
10849   // If the resultant load doesn't need a higher alignment than the original!
10850   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10851       // Do not remove the cast if the types differ in endian layout.
10852       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
10853           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
10854       // If the load is volatile, we only want to change the load type if the
10855       // resulting load is legal. Otherwise we might increase the number of
10856       // memory accesses. We don't care if the original type was legal or not
10857       // as we assume software couldn't rely on the number of accesses of an
10858       // illegal type.
10859       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
10860        TLI.isOperationLegal(ISD::LOAD, VT))) {
10861     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10862
10863     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
10864                                     *LN0->getMemOperand())) {
10865       SDValue Load =
10866           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
10867                       LN0->getPointerInfo(), LN0->getAlignment(),
10868                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10869       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10870       return Load;
10871     }
10872   }
10873
10874   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
10875     return V;
10876
10877   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10878   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10879   //
10880   // For ppc_fp128:
10881   // fold (bitcast (fneg x)) ->
10882   //     flipbit = signbit
10883   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10884   //
10885   // fold (bitcast (fabs x)) ->
10886   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
10887   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10888   // This often reduces constant pool loads.
10889   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
10890        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
10891       N0.getNode()->hasOneUse() && VT.isInteger() &&
10892       !VT.isVector() && !N0.getValueType().isVector()) {
10893     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
10894     AddToWorklist(NewConv.getNode());
10895
10896     SDLoc DL(N);
10897     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10898       assert(VT.getSizeInBits() == 128);
10899       SDValue SignBit = DAG.getConstant(
10900           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
10901       SDValue FlipBit;
10902       if (N0.getOpcode() == ISD::FNEG) {
10903         FlipBit = SignBit;
10904         AddToWorklist(FlipBit.getNode());
10905       } else {
10906         assert(N0.getOpcode() == ISD::FABS);
10907         SDValue Hi =
10908             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
10909                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10910                                               SDLoc(NewConv)));
10911         AddToWorklist(Hi.getNode());
10912         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
10913         AddToWorklist(FlipBit.getNode());
10914       }
10915       SDValue FlipBits =
10916           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10917       AddToWorklist(FlipBits.getNode());
10918       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
10919     }
10920     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10921     if (N0.getOpcode() == ISD::FNEG)
10922       return DAG.getNode(ISD::XOR, DL, VT,
10923                          NewConv, DAG.getConstant(SignBit, DL, VT));
10924     assert(N0.getOpcode() == ISD::FABS);
10925     return DAG.getNode(ISD::AND, DL, VT,
10926                        NewConv, DAG.getConstant(~SignBit, DL, VT));
10927   }
10928
10929   // fold (bitconvert (fcopysign cst, x)) ->
10930   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
10931   // Note that we don't handle (copysign x, cst) because this can always be
10932   // folded to an fneg or fabs.
10933   //
10934   // For ppc_fp128:
10935   // fold (bitcast (fcopysign cst, x)) ->
10936   //     flipbit = (and (extract_element
10937   //                     (xor (bitcast cst), (bitcast x)), 0),
10938   //                    signbit)
10939   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
10940   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
10941       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
10942       VT.isInteger() && !VT.isVector()) {
10943     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
10944     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
10945     if (isTypeLegal(IntXVT)) {
10946       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
10947       AddToWorklist(X.getNode());
10948
10949       // If X has a different width than the result/lhs, sext it or truncate it.
10950       unsigned VTWidth = VT.getSizeInBits();
10951       if (OrigXWidth < VTWidth) {
10952         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
10953         AddToWorklist(X.getNode());
10954       } else if (OrigXWidth > VTWidth) {
10955         // To get the sign bit in the right place, we have to shift it right
10956         // before truncating.
10957         SDLoc DL(X);
10958         X = DAG.getNode(ISD::SRL, DL,
10959                         X.getValueType(), X,
10960                         DAG.getConstant(OrigXWidth-VTWidth, DL,
10961                                         X.getValueType()));
10962         AddToWorklist(X.getNode());
10963         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
10964         AddToWorklist(X.getNode());
10965       }
10966
10967       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10968         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
10969         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10970         AddToWorklist(Cst.getNode());
10971         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
10972         AddToWorklist(X.getNode());
10973         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
10974         AddToWorklist(XorResult.getNode());
10975         SDValue XorResult64 = DAG.getNode(
10976             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10977             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10978                                   SDLoc(XorResult)));
10979         AddToWorklist(XorResult64.getNode());
10980         SDValue FlipBit =
10981             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10982                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10983         AddToWorklist(FlipBit.getNode());
10984         SDValue FlipBits =
10985             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10986         AddToWorklist(FlipBits.getNode());
10987         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10988       }
10989       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10990       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10991                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
10992       AddToWorklist(X.getNode());
10993
10994       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10995       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10996                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10997       AddToWorklist(Cst.getNode());
10998
10999       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11000     }
11001   }
11002
11003   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11004   if (N0.getOpcode() == ISD::BUILD_PAIR)
11005     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11006       return CombineLD;
11007
11008   // Remove double bitcasts from shuffles - this is often a legacy of
11009   // XformToShuffleWithZero being used to combine bitmaskings (of
11010   // float vectors bitcast to integer vectors) into shuffles.
11011   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11012   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11013       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11014       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11015       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11016     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11017
11018     // If operands are a bitcast, peek through if it casts the original VT.
11019     // If operands are a constant, just bitcast back to original VT.
11020     auto PeekThroughBitcast = [&](SDValue Op) {
11021       if (Op.getOpcode() == ISD::BITCAST &&
11022           Op.getOperand(0).getValueType() == VT)
11023         return SDValue(Op.getOperand(0));
11024       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11025           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11026         return DAG.getBitcast(VT, Op);
11027       return SDValue();
11028     };
11029
11030     // FIXME: If either input vector is bitcast, try to convert the shuffle to
11031     // the result type of this bitcast. This would eliminate at least one
11032     // bitcast. See the transform in InstCombine.
11033     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11034     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11035     if (!(SV0 && SV1))
11036       return SDValue();
11037
11038     int MaskScale =
11039         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11040     SmallVector<int, 8> NewMask;
11041     for (int M : SVN->getMask())
11042       for (int i = 0; i != MaskScale; ++i)
11043         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11044
11045     SDValue LegalShuffle =
11046         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
11047     if (LegalShuffle)
11048       return LegalShuffle;
11049   }
11050
11051   return SDValue();
11052 }
11053
11054 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11055   EVT VT = N->getValueType(0);
11056   return CombineConsecutiveLoads(N, VT);
11057 }
11058
11059 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11060 /// operands. DstEltVT indicates the destination element value type.
11061 SDValue DAGCombiner::
11062 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11063   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11064
11065   // If this is already the right type, we're done.
11066   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11067
11068   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11069   unsigned DstBitSize = DstEltVT.getSizeInBits();
11070
11071   // If this is a conversion of N elements of one type to N elements of another
11072   // type, convert each element.  This handles FP<->INT cases.
11073   if (SrcBitSize == DstBitSize) {
11074     SmallVector<SDValue, 8> Ops;
11075     for (SDValue Op : BV->op_values()) {
11076       // If the vector element type is not legal, the BUILD_VECTOR operands
11077       // are promoted and implicitly truncated.  Make that explicit here.
11078       if (Op.getValueType() != SrcEltVT)
11079         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11080       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11081       AddToWorklist(Ops.back().getNode());
11082     }
11083     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11084                               BV->getValueType(0).getVectorNumElements());
11085     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11086   }
11087
11088   // Otherwise, we're growing or shrinking the elements.  To avoid having to
11089   // handle annoying details of growing/shrinking FP values, we convert them to
11090   // int first.
11091   if (SrcEltVT.isFloatingPoint()) {
11092     // Convert the input float vector to a int vector where the elements are the
11093     // same sizes.
11094     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11095     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11096     SrcEltVT = IntVT;
11097   }
11098
11099   // Now we know the input is an integer vector.  If the output is a FP type,
11100   // convert to integer first, then to FP of the right size.
11101   if (DstEltVT.isFloatingPoint()) {
11102     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11103     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11104
11105     // Next, convert to FP elements of the same size.
11106     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11107   }
11108
11109   SDLoc DL(BV);
11110
11111   // Okay, we know the src/dst types are both integers of differing types.
11112   // Handling growing first.
11113   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11114   if (SrcBitSize < DstBitSize) {
11115     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11116
11117     SmallVector<SDValue, 8> Ops;
11118     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11119          i += NumInputsPerOutput) {
11120       bool isLE = DAG.getDataLayout().isLittleEndian();
11121       APInt NewBits = APInt(DstBitSize, 0);
11122       bool EltIsUndef = true;
11123       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11124         // Shift the previously computed bits over.
11125         NewBits <<= SrcBitSize;
11126         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11127         if (Op.isUndef()) continue;
11128         EltIsUndef = false;
11129
11130         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11131                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
11132       }
11133
11134       if (EltIsUndef)
11135         Ops.push_back(DAG.getUNDEF(DstEltVT));
11136       else
11137         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11138     }
11139
11140     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11141     return DAG.getBuildVector(VT, DL, Ops);
11142   }
11143
11144   // Finally, this must be the case where we are shrinking elements: each input
11145   // turns into multiple outputs.
11146   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11147   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11148                             NumOutputsPerInput*BV->getNumOperands());
11149   SmallVector<SDValue, 8> Ops;
11150
11151   for (const SDValue &Op : BV->op_values()) {
11152     if (Op.isUndef()) {
11153       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11154       continue;
11155     }
11156
11157     APInt OpVal = cast<ConstantSDNode>(Op)->
11158                   getAPIntValue().zextOrTrunc(SrcBitSize);
11159
11160     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11161       APInt ThisVal = OpVal.trunc(DstBitSize);
11162       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11163       OpVal.lshrInPlace(DstBitSize);
11164     }
11165
11166     // For big endian targets, swap the order of the pieces of each element.
11167     if (DAG.getDataLayout().isBigEndian())
11168       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11169   }
11170
11171   return DAG.getBuildVector(VT, DL, Ops);
11172 }
11173
11174 static bool isContractable(SDNode *N) {
11175   SDNodeFlags F = N->getFlags();
11176   return F.hasAllowContract() || F.hasAllowReassociation();
11177 }
11178
11179 /// Try to perform FMA combining on a given FADD node.
11180 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11181   SDValue N0 = N->getOperand(0);
11182   SDValue N1 = N->getOperand(1);
11183   EVT VT = N->getValueType(0);
11184   SDLoc SL(N);
11185
11186   const TargetOptions &Options = DAG.getTarget().Options;
11187
11188   // Floating-point multiply-add with intermediate rounding.
11189   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11190
11191   // Floating-point multiply-add without intermediate rounding.
11192   bool HasFMA =
11193       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11194       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11195
11196   // No valid opcode, do not combine.
11197   if (!HasFMAD && !HasFMA)
11198     return SDValue();
11199
11200   SDNodeFlags Flags = N->getFlags();
11201   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11202   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11203                               CanFuse || HasFMAD);
11204   // If the addition is not contractable, do not combine.
11205   if (!AllowFusionGlobally && !isContractable(N))
11206     return SDValue();
11207
11208   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11209   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11210     return SDValue();
11211
11212   // Always prefer FMAD to FMA for precision.
11213   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11214   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11215
11216   // Is the node an FMUL and contractable either due to global flags or
11217   // SDNodeFlags.
11218   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11219     if (N.getOpcode() != ISD::FMUL)
11220       return false;
11221     return AllowFusionGlobally || isContractable(N.getNode());
11222   };
11223   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
11224   // prefer to fold the multiply with fewer uses.
11225   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
11226     if (N0.getNode()->use_size() > N1.getNode()->use_size())
11227       std::swap(N0, N1);
11228   }
11229
11230   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
11231   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11232     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11233                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
11234   }
11235
11236   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
11237   // Note: Commutes FADD operands.
11238   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11239     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11240                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
11241   }
11242
11243   // Look through FP_EXTEND nodes to do more combining.
11244
11245   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
11246   if (N0.getOpcode() == ISD::FP_EXTEND) {
11247     SDValue N00 = N0.getOperand(0);
11248     if (isContractableFMUL(N00) &&
11249         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11250       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11251                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11252                                      N00.getOperand(0)),
11253                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11254                                      N00.getOperand(1)), N1, Flags);
11255     }
11256   }
11257
11258   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
11259   // Note: Commutes FADD operands.
11260   if (N1.getOpcode() == ISD::FP_EXTEND) {
11261     SDValue N10 = N1.getOperand(0);
11262     if (isContractableFMUL(N10) &&
11263         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11264       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11265                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11266                                      N10.getOperand(0)),
11267                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11268                                      N10.getOperand(1)), N0, Flags);
11269     }
11270   }
11271
11272   // More folding opportunities when target permits.
11273   if (Aggressive) {
11274     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
11275     if (CanFuse &&
11276         N0.getOpcode() == PreferredFusedOpcode &&
11277         N0.getOperand(2).getOpcode() == ISD::FMUL &&
11278         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
11279       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11280                          N0.getOperand(0), N0.getOperand(1),
11281                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11282                                      N0.getOperand(2).getOperand(0),
11283                                      N0.getOperand(2).getOperand(1),
11284                                      N1, Flags), Flags);
11285     }
11286
11287     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
11288     if (CanFuse &&
11289         N1->getOpcode() == PreferredFusedOpcode &&
11290         N1.getOperand(2).getOpcode() == ISD::FMUL &&
11291         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
11292       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11293                          N1.getOperand(0), N1.getOperand(1),
11294                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11295                                      N1.getOperand(2).getOperand(0),
11296                                      N1.getOperand(2).getOperand(1),
11297                                      N0, Flags), Flags);
11298     }
11299
11300
11301     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
11302     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
11303     auto FoldFAddFMAFPExtFMul = [&] (
11304       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11305       SDNodeFlags Flags) {
11306       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
11307                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11308                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11309                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11310                                      Z, Flags), Flags);
11311     };
11312     if (N0.getOpcode() == PreferredFusedOpcode) {
11313       SDValue N02 = N0.getOperand(2);
11314       if (N02.getOpcode() == ISD::FP_EXTEND) {
11315         SDValue N020 = N02.getOperand(0);
11316         if (isContractableFMUL(N020) &&
11317             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11318           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
11319                                       N020.getOperand(0), N020.getOperand(1),
11320                                       N1, Flags);
11321         }
11322       }
11323     }
11324
11325     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
11326     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
11327     // FIXME: This turns two single-precision and one double-precision
11328     // operation into two double-precision operations, which might not be
11329     // interesting for all targets, especially GPUs.
11330     auto FoldFAddFPExtFMAFMul = [&] (
11331       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11332       SDNodeFlags Flags) {
11333       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11334                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
11335                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
11336                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11337                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11338                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11339                                      Z, Flags), Flags);
11340     };
11341     if (N0.getOpcode() == ISD::FP_EXTEND) {
11342       SDValue N00 = N0.getOperand(0);
11343       if (N00.getOpcode() == PreferredFusedOpcode) {
11344         SDValue N002 = N00.getOperand(2);
11345         if (isContractableFMUL(N002) &&
11346             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11347           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
11348                                       N002.getOperand(0), N002.getOperand(1),
11349                                       N1, Flags);
11350         }
11351       }
11352     }
11353
11354     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
11355     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
11356     if (N1.getOpcode() == PreferredFusedOpcode) {
11357       SDValue N12 = N1.getOperand(2);
11358       if (N12.getOpcode() == ISD::FP_EXTEND) {
11359         SDValue N120 = N12.getOperand(0);
11360         if (isContractableFMUL(N120) &&
11361             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11362           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11363                                       N120.getOperand(0), N120.getOperand(1),
11364                                       N0, Flags);
11365         }
11366       }
11367     }
11368
11369     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11370     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11371     // FIXME: This turns two single-precision and one double-precision
11372     // operation into two double-precision operations, which might not be
11373     // interesting for all targets, especially GPUs.
11374     if (N1.getOpcode() == ISD::FP_EXTEND) {
11375       SDValue N10 = N1.getOperand(0);
11376       if (N10.getOpcode() == PreferredFusedOpcode) {
11377         SDValue N102 = N10.getOperand(2);
11378         if (isContractableFMUL(N102) &&
11379             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11380           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11381                                       N102.getOperand(0), N102.getOperand(1),
11382                                       N0, Flags);
11383         }
11384       }
11385     }
11386   }
11387
11388   return SDValue();
11389 }
11390
11391 /// Try to perform FMA combining on a given FSUB node.
11392 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11393   SDValue N0 = N->getOperand(0);
11394   SDValue N1 = N->getOperand(1);
11395   EVT VT = N->getValueType(0);
11396   SDLoc SL(N);
11397
11398   const TargetOptions &Options = DAG.getTarget().Options;
11399   // Floating-point multiply-add with intermediate rounding.
11400   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11401
11402   // Floating-point multiply-add without intermediate rounding.
11403   bool HasFMA =
11404       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11405       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11406
11407   // No valid opcode, do not combine.
11408   if (!HasFMAD && !HasFMA)
11409     return SDValue();
11410
11411   const SDNodeFlags Flags = N->getFlags();
11412   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11413   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11414                               CanFuse || HasFMAD);
11415
11416   // If the subtraction is not contractable, do not combine.
11417   if (!AllowFusionGlobally && !isContractable(N))
11418     return SDValue();
11419
11420   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11421   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11422     return SDValue();
11423
11424   // Always prefer FMAD to FMA for precision.
11425   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11426   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11427
11428   // Is the node an FMUL and contractable either due to global flags or
11429   // SDNodeFlags.
11430   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11431     if (N.getOpcode() != ISD::FMUL)
11432       return false;
11433     return AllowFusionGlobally || isContractable(N.getNode());
11434   };
11435
11436   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11437   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11438     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11439                        N0.getOperand(0), N0.getOperand(1),
11440                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11441   }
11442
11443   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11444   // Note: Commutes FSUB operands.
11445   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11446     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11447                        DAG.getNode(ISD::FNEG, SL, VT,
11448                                    N1.getOperand(0)),
11449                        N1.getOperand(1), N0, Flags);
11450   }
11451
11452   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11453   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11454       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11455     SDValue N00 = N0.getOperand(0).getOperand(0);
11456     SDValue N01 = N0.getOperand(0).getOperand(1);
11457     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11458                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11459                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11460   }
11461
11462   // Look through FP_EXTEND nodes to do more combining.
11463
11464   // fold (fsub (fpext (fmul x, y)), z)
11465   //   -> (fma (fpext x), (fpext y), (fneg z))
11466   if (N0.getOpcode() == ISD::FP_EXTEND) {
11467     SDValue N00 = N0.getOperand(0);
11468     if (isContractableFMUL(N00) &&
11469         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11470       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11471                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11472                                      N00.getOperand(0)),
11473                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11474                                      N00.getOperand(1)),
11475                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11476     }
11477   }
11478
11479   // fold (fsub x, (fpext (fmul y, z)))
11480   //   -> (fma (fneg (fpext y)), (fpext z), x)
11481   // Note: Commutes FSUB operands.
11482   if (N1.getOpcode() == ISD::FP_EXTEND) {
11483     SDValue N10 = N1.getOperand(0);
11484     if (isContractableFMUL(N10) &&
11485         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11486       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11487                          DAG.getNode(ISD::FNEG, SL, VT,
11488                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
11489                                                  N10.getOperand(0))),
11490                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11491                                      N10.getOperand(1)),
11492                          N0, Flags);
11493     }
11494   }
11495
11496   // fold (fsub (fpext (fneg (fmul, x, y))), z)
11497   //   -> (fneg (fma (fpext x), (fpext y), z))
11498   // Note: This could be removed with appropriate canonicalization of the
11499   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11500   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11501   // from implementing the canonicalization in visitFSUB.
11502   if (N0.getOpcode() == ISD::FP_EXTEND) {
11503     SDValue N00 = N0.getOperand(0);
11504     if (N00.getOpcode() == ISD::FNEG) {
11505       SDValue N000 = N00.getOperand(0);
11506       if (isContractableFMUL(N000) &&
11507           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11508         return DAG.getNode(ISD::FNEG, SL, VT,
11509                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11510                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11511                                                    N000.getOperand(0)),
11512                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11513                                                    N000.getOperand(1)),
11514                                        N1, Flags));
11515       }
11516     }
11517   }
11518
11519   // fold (fsub (fneg (fpext (fmul, x, y))), z)
11520   //   -> (fneg (fma (fpext x)), (fpext y), z)
11521   // Note: This could be removed with appropriate canonicalization of the
11522   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11523   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11524   // from implementing the canonicalization in visitFSUB.
11525   if (N0.getOpcode() == ISD::FNEG) {
11526     SDValue N00 = N0.getOperand(0);
11527     if (N00.getOpcode() == ISD::FP_EXTEND) {
11528       SDValue N000 = N00.getOperand(0);
11529       if (isContractableFMUL(N000) &&
11530           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
11531         return DAG.getNode(ISD::FNEG, SL, VT,
11532                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11533                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11534                                                    N000.getOperand(0)),
11535                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11536                                                    N000.getOperand(1)),
11537                                        N1, Flags));
11538       }
11539     }
11540   }
11541
11542   // More folding opportunities when target permits.
11543   if (Aggressive) {
11544     // fold (fsub (fma x, y, (fmul u, v)), z)
11545     //   -> (fma x, y (fma u, v, (fneg z)))
11546     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
11547         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
11548         N0.getOperand(2)->hasOneUse()) {
11549       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11550                          N0.getOperand(0), N0.getOperand(1),
11551                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11552                                      N0.getOperand(2).getOperand(0),
11553                                      N0.getOperand(2).getOperand(1),
11554                                      DAG.getNode(ISD::FNEG, SL, VT,
11555                                                  N1), Flags), Flags);
11556     }
11557
11558     // fold (fsub x, (fma y, z, (fmul u, v)))
11559     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
11560     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
11561         isContractableFMUL(N1.getOperand(2))) {
11562       SDValue N20 = N1.getOperand(2).getOperand(0);
11563       SDValue N21 = N1.getOperand(2).getOperand(1);
11564       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11565                          DAG.getNode(ISD::FNEG, SL, VT,
11566                                      N1.getOperand(0)),
11567                          N1.getOperand(1),
11568                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11569                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
11570                                      N21, N0, Flags), Flags);
11571     }
11572
11573
11574     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11575     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11576     if (N0.getOpcode() == PreferredFusedOpcode) {
11577       SDValue N02 = N0.getOperand(2);
11578       if (N02.getOpcode() == ISD::FP_EXTEND) {
11579         SDValue N020 = N02.getOperand(0);
11580         if (isContractableFMUL(N020) &&
11581             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11582           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11583                              N0.getOperand(0), N0.getOperand(1),
11584                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11585                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11586                                                      N020.getOperand(0)),
11587                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11588                                                      N020.getOperand(1)),
11589                                          DAG.getNode(ISD::FNEG, SL, VT,
11590                                                      N1), Flags), Flags);
11591         }
11592       }
11593     }
11594
11595     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11596     //   -> (fma (fpext x), (fpext y),
11597     //           (fma (fpext u), (fpext v), (fneg z)))
11598     // FIXME: This turns two single-precision and one double-precision
11599     // operation into two double-precision operations, which might not be
11600     // interesting for all targets, especially GPUs.
11601     if (N0.getOpcode() == ISD::FP_EXTEND) {
11602       SDValue N00 = N0.getOperand(0);
11603       if (N00.getOpcode() == PreferredFusedOpcode) {
11604         SDValue N002 = N00.getOperand(2);
11605         if (isContractableFMUL(N002) &&
11606             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11607           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11608                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11609                                          N00.getOperand(0)),
11610                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11611                                          N00.getOperand(1)),
11612                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11613                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11614                                                      N002.getOperand(0)),
11615                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11616                                                      N002.getOperand(1)),
11617                                          DAG.getNode(ISD::FNEG, SL, VT,
11618                                                      N1), Flags), Flags);
11619         }
11620       }
11621     }
11622
11623     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11624     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11625     if (N1.getOpcode() == PreferredFusedOpcode &&
11626         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11627       SDValue N120 = N1.getOperand(2).getOperand(0);
11628       if (isContractableFMUL(N120) &&
11629           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11630         SDValue N1200 = N120.getOperand(0);
11631         SDValue N1201 = N120.getOperand(1);
11632         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11633                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11634                            N1.getOperand(1),
11635                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11636                                        DAG.getNode(ISD::FNEG, SL, VT,
11637                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11638                                                                VT, N1200)),
11639                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11640                                                    N1201),
11641                                        N0, Flags), Flags);
11642       }
11643     }
11644
11645     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11646     //   -> (fma (fneg (fpext y)), (fpext z),
11647     //           (fma (fneg (fpext u)), (fpext v), x))
11648     // FIXME: This turns two single-precision and one double-precision
11649     // operation into two double-precision operations, which might not be
11650     // interesting for all targets, especially GPUs.
11651     if (N1.getOpcode() == ISD::FP_EXTEND &&
11652         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11653       SDValue CvtSrc = N1.getOperand(0);
11654       SDValue N100 = CvtSrc.getOperand(0);
11655       SDValue N101 = CvtSrc.getOperand(1);
11656       SDValue N102 = CvtSrc.getOperand(2);
11657       if (isContractableFMUL(N102) &&
11658           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11659         SDValue N1020 = N102.getOperand(0);
11660         SDValue N1021 = N102.getOperand(1);
11661         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11662                            DAG.getNode(ISD::FNEG, SL, VT,
11663                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11664                                                    N100)),
11665                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11666                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11667                                        DAG.getNode(ISD::FNEG, SL, VT,
11668                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11669                                                                VT, N1020)),
11670                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11671                                                    N1021),
11672                                        N0, Flags), Flags);
11673       }
11674     }
11675   }
11676
11677   return SDValue();
11678 }
11679
11680 /// Try to perform FMA combining on a given FMUL node based on the distributive
11681 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11682 /// subtraction instead of addition).
11683 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11684   SDValue N0 = N->getOperand(0);
11685   SDValue N1 = N->getOperand(1);
11686   EVT VT = N->getValueType(0);
11687   SDLoc SL(N);
11688   const SDNodeFlags Flags = N->getFlags();
11689
11690   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11691
11692   const TargetOptions &Options = DAG.getTarget().Options;
11693
11694   // The transforms below are incorrect when x == 0 and y == inf, because the
11695   // intermediate multiplication produces a nan.
11696   if (!Options.NoInfsFPMath)
11697     return SDValue();
11698
11699   // Floating-point multiply-add without intermediate rounding.
11700   bool HasFMA =
11701       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11702       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11703       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11704
11705   // Floating-point multiply-add with intermediate rounding. This can result
11706   // in a less precise result due to the changed rounding order.
11707   bool HasFMAD = Options.UnsafeFPMath &&
11708                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11709
11710   // No valid opcode, do not combine.
11711   if (!HasFMAD && !HasFMA)
11712     return SDValue();
11713
11714   // Always prefer FMAD to FMA for precision.
11715   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11716   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11717
11718   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11719   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11720   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11721     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11722       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11723         if (C->isExactlyValue(+1.0))
11724           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11725                              Y, Flags);
11726         if (C->isExactlyValue(-1.0))
11727           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11728                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11729       }
11730     }
11731     return SDValue();
11732   };
11733
11734   if (SDValue FMA = FuseFADD(N0, N1, Flags))
11735     return FMA;
11736   if (SDValue FMA = FuseFADD(N1, N0, Flags))
11737     return FMA;
11738
11739   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11740   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11741   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11742   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11743   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11744     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11745       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11746         if (C0->isExactlyValue(+1.0))
11747           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11748                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11749                              Y, Flags);
11750         if (C0->isExactlyValue(-1.0))
11751           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11752                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11753                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11754       }
11755       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11756         if (C1->isExactlyValue(+1.0))
11757           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11758                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11759         if (C1->isExactlyValue(-1.0))
11760           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11761                              Y, Flags);
11762       }
11763     }
11764     return SDValue();
11765   };
11766
11767   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11768     return FMA;
11769   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11770     return FMA;
11771
11772   return SDValue();
11773 }
11774
11775 SDValue DAGCombiner::visitFADD(SDNode *N) {
11776   SDValue N0 = N->getOperand(0);
11777   SDValue N1 = N->getOperand(1);
11778   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11779   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11780   EVT VT = N->getValueType(0);
11781   SDLoc DL(N);
11782   const TargetOptions &Options = DAG.getTarget().Options;
11783   const SDNodeFlags Flags = N->getFlags();
11784
11785   // fold vector ops
11786   if (VT.isVector())
11787     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11788       return FoldedVOp;
11789
11790   // fold (fadd c1, c2) -> c1 + c2
11791   if (N0CFP && N1CFP)
11792     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11793
11794   // canonicalize constant to RHS
11795   if (N0CFP && !N1CFP)
11796     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11797
11798   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11799   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
11800   if (N1C && N1C->isZero())
11801     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
11802       return N0;
11803
11804   if (SDValue NewSel = foldBinOpIntoSelect(N))
11805     return NewSel;
11806
11807   // fold (fadd A, (fneg B)) -> (fsub A, B)
11808   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11809       TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)
11810     return DAG.getNode(
11811         ISD::FSUB, DL, VT, N0,
11812         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
11813
11814   // fold (fadd (fneg A), B) -> (fsub B, A)
11815   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11816       TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)
11817     return DAG.getNode(
11818         ISD::FSUB, DL, VT, N1,
11819         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
11820
11821   auto isFMulNegTwo = [](SDValue FMul) {
11822     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
11823       return false;
11824     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
11825     return C && C->isExactlyValue(-2.0);
11826   };
11827
11828   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11829   if (isFMulNegTwo(N0)) {
11830     SDValue B = N0.getOperand(0);
11831     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11832     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
11833   }
11834   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11835   if (isFMulNegTwo(N1)) {
11836     SDValue B = N1.getOperand(0);
11837     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11838     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
11839   }
11840
11841   // No FP constant should be created after legalization as Instruction
11842   // Selection pass has a hard time dealing with FP constants.
11843   bool AllowNewConst = (Level < AfterLegalizeDAG);
11844
11845   // If nnan is enabled, fold lots of things.
11846   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
11847     // If allowed, fold (fadd (fneg x), x) -> 0.0
11848     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
11849       return DAG.getConstantFP(0.0, DL, VT);
11850
11851     // If allowed, fold (fadd x, (fneg x)) -> 0.0
11852     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
11853       return DAG.getConstantFP(0.0, DL, VT);
11854   }
11855
11856   // If 'unsafe math' or reassoc and nsz, fold lots of things.
11857   // TODO: break out portions of the transformations below for which Unsafe is
11858   //       considered and which do not require both nsz and reassoc
11859   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
11860        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
11861       AllowNewConst) {
11862     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11863     if (N1CFP && N0.getOpcode() == ISD::FADD &&
11864         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11865       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
11866       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
11867     }
11868
11869     // We can fold chains of FADD's of the same value into multiplications.
11870     // This transform is not safe in general because we are reducing the number
11871     // of rounding steps.
11872     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
11873       if (N0.getOpcode() == ISD::FMUL) {
11874         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11875         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
11876
11877         // (fadd (fmul x, c), x) -> (fmul x, c+1)
11878         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
11879           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11880                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11881           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
11882         }
11883
11884         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11885         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
11886             N1.getOperand(0) == N1.getOperand(1) &&
11887             N0.getOperand(0) == N1.getOperand(0)) {
11888           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11889                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11890           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
11891         }
11892       }
11893
11894       if (N1.getOpcode() == ISD::FMUL) {
11895         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11896         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
11897
11898         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
11899         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
11900           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11901                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11902           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
11903         }
11904
11905         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
11906         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
11907             N0.getOperand(0) == N0.getOperand(1) &&
11908             N1.getOperand(0) == N0.getOperand(0)) {
11909           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11910                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11911           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
11912         }
11913       }
11914
11915       if (N0.getOpcode() == ISD::FADD) {
11916         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11917         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
11918         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
11919             (N0.getOperand(0) == N1)) {
11920           return DAG.getNode(ISD::FMUL, DL, VT,
11921                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
11922         }
11923       }
11924
11925       if (N1.getOpcode() == ISD::FADD) {
11926         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11927         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11928         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
11929             N1.getOperand(0) == N0) {
11930           return DAG.getNode(ISD::FMUL, DL, VT,
11931                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
11932         }
11933       }
11934
11935       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11936       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
11937           N0.getOperand(0) == N0.getOperand(1) &&
11938           N1.getOperand(0) == N1.getOperand(1) &&
11939           N0.getOperand(0) == N1.getOperand(0)) {
11940         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
11941                            DAG.getConstantFP(4.0, DL, VT), Flags);
11942       }
11943     }
11944   } // enable-unsafe-fp-math
11945
11946   // FADD -> FMA combines:
11947   if (SDValue Fused = visitFADDForFMACombine(N)) {
11948     AddToWorklist(Fused.getNode());
11949     return Fused;
11950   }
11951   return SDValue();
11952 }
11953
11954 SDValue DAGCombiner::visitFSUB(SDNode *N) {
11955   SDValue N0 = N->getOperand(0);
11956   SDValue N1 = N->getOperand(1);
11957   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11958   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11959   EVT VT = N->getValueType(0);
11960   SDLoc DL(N);
11961   const TargetOptions &Options = DAG.getTarget().Options;
11962   const SDNodeFlags Flags = N->getFlags();
11963
11964   // fold vector ops
11965   if (VT.isVector())
11966     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11967       return FoldedVOp;
11968
11969   // fold (fsub c1, c2) -> c1-c2
11970   if (N0CFP && N1CFP)
11971     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
11972
11973   if (SDValue NewSel = foldBinOpIntoSelect(N))
11974     return NewSel;
11975
11976   // (fsub A, 0) -> A
11977   if (N1CFP && N1CFP->isZero()) {
11978     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
11979         Flags.hasNoSignedZeros()) {
11980       return N0;
11981     }
11982   }
11983
11984   if (N0 == N1) {
11985     // (fsub x, x) -> 0.0
11986     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
11987       return DAG.getConstantFP(0.0f, DL, VT);
11988   }
11989
11990   // (fsub -0.0, N1) -> -N1
11991   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
11992   //       FSUB does not specify the sign bit of a NaN. Also note that for
11993   //       the same reason, the inverse transform is not safe, unless fast math
11994   //       flags are in play.
11995   if (N0CFP && N0CFP->isZero()) {
11996     if (N0CFP->isNegative() ||
11997         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11998       if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
11999         return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12000       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12001         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12002     }
12003   }
12004
12005   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12006        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12007       N1.getOpcode() == ISD::FADD) {
12008     // X - (X + Y) -> -Y
12009     if (N0 == N1->getOperand(0))
12010       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12011     // X - (Y + X) -> -Y
12012     if (N0 == N1->getOperand(1))
12013       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12014   }
12015
12016   // fold (fsub A, (fneg B)) -> (fadd A, B)
12017   if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
12018     return DAG.getNode(
12019         ISD::FADD, DL, VT, N0,
12020         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12021
12022   // FSUB -> FMA combines:
12023   if (SDValue Fused = visitFSUBForFMACombine(N)) {
12024     AddToWorklist(Fused.getNode());
12025     return Fused;
12026   }
12027
12028   return SDValue();
12029 }
12030
12031 /// Return true if both inputs are at least as cheap in negated form and at
12032 /// least one input is strictly cheaper in negated form.
12033 bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
12034   if (char LHSNeg =
12035           TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))
12036     if (char RHSNeg =
12037             TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))
12038       // Both negated operands are at least as cheap as their counterparts.
12039       // Check to see if at least one is cheaper negated.
12040       if (LHSNeg == 2 || RHSNeg == 2)
12041         return true;
12042
12043   return false;
12044 }
12045
12046 SDValue DAGCombiner::visitFMUL(SDNode *N) {
12047   SDValue N0 = N->getOperand(0);
12048   SDValue N1 = N->getOperand(1);
12049   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12050   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12051   EVT VT = N->getValueType(0);
12052   SDLoc DL(N);
12053   const TargetOptions &Options = DAG.getTarget().Options;
12054   const SDNodeFlags Flags = N->getFlags();
12055
12056   // fold vector ops
12057   if (VT.isVector()) {
12058     // This just handles C1 * C2 for vectors. Other vector folds are below.
12059     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12060       return FoldedVOp;
12061   }
12062
12063   // fold (fmul c1, c2) -> c1*c2
12064   if (N0CFP && N1CFP)
12065     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12066
12067   // canonicalize constant to RHS
12068   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12069      !isConstantFPBuildVectorOrConstantFP(N1))
12070     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12071
12072   if (SDValue NewSel = foldBinOpIntoSelect(N))
12073     return NewSel;
12074
12075   if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12076       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12077     // fold (fmul A, 0) -> 0
12078     if (N1CFP && N1CFP->isZero())
12079       return N1;
12080   }
12081
12082   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12083     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12084     if (isConstantFPBuildVectorOrConstantFP(N1) &&
12085         N0.getOpcode() == ISD::FMUL) {
12086       SDValue N00 = N0.getOperand(0);
12087       SDValue N01 = N0.getOperand(1);
12088       // Avoid an infinite loop by making sure that N00 is not a constant
12089       // (the inner multiply has not been constant folded yet).
12090       if (isConstantFPBuildVectorOrConstantFP(N01) &&
12091           !isConstantFPBuildVectorOrConstantFP(N00)) {
12092         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12093         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12094       }
12095     }
12096
12097     // Match a special-case: we convert X * 2.0 into fadd.
12098     // fmul (fadd X, X), C -> fmul X, 2.0 * C
12099     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12100         N0.getOperand(0) == N0.getOperand(1)) {
12101       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12102       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12103       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12104     }
12105   }
12106
12107   // fold (fmul X, 2.0) -> (fadd X, X)
12108   if (N1CFP && N1CFP->isExactlyValue(+2.0))
12109     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12110
12111   // fold (fmul X, -1.0) -> (fneg X)
12112   if (N1CFP && N1CFP->isExactlyValue(-1.0))
12113     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12114       return DAG.getNode(ISD::FNEG, DL, VT, N0);
12115
12116   // -N0 * -N1 --> N0 * N1
12117   if (isCheaperToUseNegatedFPOps(N0, N1)) {
12118     SDValue NegN0 =
12119         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12120     SDValue NegN1 =
12121         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12122     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
12123   }
12124
12125   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12126   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12127   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12128       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12129       TLI.isOperationLegal(ISD::FABS, VT)) {
12130     SDValue Select = N0, X = N1;
12131     if (Select.getOpcode() != ISD::SELECT)
12132       std::swap(Select, X);
12133
12134     SDValue Cond = Select.getOperand(0);
12135     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12136     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12137
12138     if (TrueOpnd && FalseOpnd &&
12139         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12140         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12141         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12142       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12143       switch (CC) {
12144       default: break;
12145       case ISD::SETOLT:
12146       case ISD::SETULT:
12147       case ISD::SETOLE:
12148       case ISD::SETULE:
12149       case ISD::SETLT:
12150       case ISD::SETLE:
12151         std::swap(TrueOpnd, FalseOpnd);
12152         LLVM_FALLTHROUGH;
12153       case ISD::SETOGT:
12154       case ISD::SETUGT:
12155       case ISD::SETOGE:
12156       case ISD::SETUGE:
12157       case ISD::SETGT:
12158       case ISD::SETGE:
12159         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12160             TLI.isOperationLegal(ISD::FNEG, VT))
12161           return DAG.getNode(ISD::FNEG, DL, VT,
12162                    DAG.getNode(ISD::FABS, DL, VT, X));
12163         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12164           return DAG.getNode(ISD::FABS, DL, VT, X);
12165
12166         break;
12167       }
12168     }
12169   }
12170
12171   // FMUL -> FMA combines:
12172   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
12173     AddToWorklist(Fused.getNode());
12174     return Fused;
12175   }
12176
12177   return SDValue();
12178 }
12179
12180 SDValue DAGCombiner::visitFMA(SDNode *N) {
12181   SDValue N0 = N->getOperand(0);
12182   SDValue N1 = N->getOperand(1);
12183   SDValue N2 = N->getOperand(2);
12184   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12185   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12186   EVT VT = N->getValueType(0);
12187   SDLoc DL(N);
12188   const TargetOptions &Options = DAG.getTarget().Options;
12189
12190   // FMA nodes have flags that propagate to the created nodes.
12191   const SDNodeFlags Flags = N->getFlags();
12192   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
12193
12194   // Constant fold FMA.
12195   if (isa<ConstantFPSDNode>(N0) &&
12196       isa<ConstantFPSDNode>(N1) &&
12197       isa<ConstantFPSDNode>(N2)) {
12198     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
12199   }
12200
12201   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
12202   if (isCheaperToUseNegatedFPOps(N0, N1)) {
12203     SDValue NegN0 =
12204         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12205     SDValue NegN1 =
12206         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12207     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
12208   }
12209
12210   if (UnsafeFPMath) {
12211     if (N0CFP && N0CFP->isZero())
12212       return N2;
12213     if (N1CFP && N1CFP->isZero())
12214       return N2;
12215   }
12216   // TODO: The FMA node should have flags that propagate to these nodes.
12217   if (N0CFP && N0CFP->isExactlyValue(1.0))
12218     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
12219   if (N1CFP && N1CFP->isExactlyValue(1.0))
12220     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
12221
12222   // Canonicalize (fma c, x, y) -> (fma x, c, y)
12223   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12224      !isConstantFPBuildVectorOrConstantFP(N1))
12225     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
12226
12227   if (UnsafeFPMath) {
12228     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
12229     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
12230         isConstantFPBuildVectorOrConstantFP(N1) &&
12231         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
12232       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12233                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
12234                                      Flags), Flags);
12235     }
12236
12237     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
12238     if (N0.getOpcode() == ISD::FMUL &&
12239         isConstantFPBuildVectorOrConstantFP(N1) &&
12240         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12241       return DAG.getNode(ISD::FMA, DL, VT,
12242                          N0.getOperand(0),
12243                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
12244                                      Flags),
12245                          N2);
12246     }
12247   }
12248
12249   // (fma x, 1, y) -> (fadd x, y)
12250   // (fma x, -1, y) -> (fadd (fneg x), y)
12251   if (N1CFP) {
12252     if (N1CFP->isExactlyValue(1.0))
12253       // TODO: The FMA node should have flags that propagate to this node.
12254       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
12255
12256     if (N1CFP->isExactlyValue(-1.0) &&
12257         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
12258       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
12259       AddToWorklist(RHSNeg.getNode());
12260       // TODO: The FMA node should have flags that propagate to this node.
12261       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
12262     }
12263
12264     // fma (fneg x), K, y -> fma x -K, y
12265     if (N0.getOpcode() == ISD::FNEG &&
12266         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12267          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
12268                                               ForCodeSize)))) {
12269       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
12270                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
12271     }
12272   }
12273
12274   if (UnsafeFPMath) {
12275     // (fma x, c, x) -> (fmul x, (c+1))
12276     if (N1CFP && N0 == N2) {
12277       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12278                          DAG.getNode(ISD::FADD, DL, VT, N1,
12279                                      DAG.getConstantFP(1.0, DL, VT), Flags),
12280                          Flags);
12281     }
12282
12283     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
12284     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
12285       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12286                          DAG.getNode(ISD::FADD, DL, VT, N1,
12287                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
12288                          Flags);
12289     }
12290   }
12291
12292   return SDValue();
12293 }
12294
12295 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12296 // reciprocal.
12297 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
12298 // Notice that this is not always beneficial. One reason is different targets
12299 // may have different costs for FDIV and FMUL, so sometimes the cost of two
12300 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
12301 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
12302 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
12303   // TODO: Limit this transform based on optsize/minsize - it always creates at
12304   //       least 1 extra instruction. But the perf win may be substantial enough
12305   //       that only minsize should restrict this.
12306   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
12307   const SDNodeFlags Flags = N->getFlags();
12308   if (!UnsafeMath && !Flags.hasAllowReciprocal())
12309     return SDValue();
12310
12311   // Skip if current node is a reciprocal/fneg-reciprocal.
12312   SDValue N0 = N->getOperand(0);
12313   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
12314   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
12315     return SDValue();
12316
12317   // Exit early if the target does not want this transform or if there can't
12318   // possibly be enough uses of the divisor to make the transform worthwhile.
12319   SDValue N1 = N->getOperand(1);
12320   unsigned MinUses = TLI.combineRepeatedFPDivisors();
12321
12322   // For splat vectors, scale the number of uses by the splat factor. If we can
12323   // convert the division into a scalar op, that will likely be much faster.
12324   unsigned NumElts = 1;
12325   EVT VT = N->getValueType(0);
12326   if (VT.isVector() && DAG.isSplatValue(N1))
12327     NumElts = VT.getVectorNumElements();
12328
12329   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
12330     return SDValue();
12331
12332   // Find all FDIV users of the same divisor.
12333   // Use a set because duplicates may be present in the user list.
12334   SetVector<SDNode *> Users;
12335   for (auto *U : N1->uses()) {
12336     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
12337       // This division is eligible for optimization only if global unsafe math
12338       // is enabled or if this division allows reciprocal formation.
12339       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
12340         Users.insert(U);
12341     }
12342   }
12343
12344   // Now that we have the actual number of divisor uses, make sure it meets
12345   // the minimum threshold specified by the target.
12346   if ((Users.size() * NumElts) < MinUses)
12347     return SDValue();
12348
12349   SDLoc DL(N);
12350   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
12351   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
12352
12353   // Dividend / Divisor -> Dividend * Reciprocal
12354   for (auto *U : Users) {
12355     SDValue Dividend = U->getOperand(0);
12356     if (Dividend != FPOne) {
12357       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
12358                                     Reciprocal, Flags);
12359       CombineTo(U, NewNode);
12360     } else if (U != Reciprocal.getNode()) {
12361       // In the absence of fast-math-flags, this user node is always the
12362       // same node as Reciprocal, but with FMF they may be different nodes.
12363       CombineTo(U, Reciprocal);
12364     }
12365   }
12366   return SDValue(N, 0);  // N was replaced.
12367 }
12368
12369 SDValue DAGCombiner::visitFDIV(SDNode *N) {
12370   SDValue N0 = N->getOperand(0);
12371   SDValue N1 = N->getOperand(1);
12372   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12373   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12374   EVT VT = N->getValueType(0);
12375   SDLoc DL(N);
12376   const TargetOptions &Options = DAG.getTarget().Options;
12377   SDNodeFlags Flags = N->getFlags();
12378
12379   // fold vector ops
12380   if (VT.isVector())
12381     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12382       return FoldedVOp;
12383
12384   // fold (fdiv c1, c2) -> c1/c2
12385   if (N0CFP && N1CFP)
12386     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12387
12388   if (SDValue NewSel = foldBinOpIntoSelect(N))
12389     return NewSel;
12390
12391   if (SDValue V = combineRepeatedFPDivisors(N))
12392     return V;
12393
12394   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12395     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12396     if (N1CFP) {
12397       // Compute the reciprocal 1.0 / c2.
12398       const APFloat &N1APF = N1CFP->getValueAPF();
12399       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12400       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12401       // Only do the transform if the reciprocal is a legal fp immediate that
12402       // isn't too nasty (eg NaN, denormal, ...).
12403       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12404           (!LegalOperations ||
12405            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12406            // backend)... we should handle this gracefully after Legalize.
12407            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12408            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12409            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12410         return DAG.getNode(ISD::FMUL, DL, VT, N0,
12411                            DAG.getConstantFP(Recip, DL, VT), Flags);
12412     }
12413
12414     // If this FDIV is part of a reciprocal square root, it may be folded
12415     // into a target-specific square root estimate instruction.
12416     if (N1.getOpcode() == ISD::FSQRT) {
12417       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
12418         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12419     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12420                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12421       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12422                                           Flags)) {
12423         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12424         AddToWorklist(RV.getNode());
12425         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12426       }
12427     } else if (N1.getOpcode() == ISD::FP_ROUND &&
12428                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12429       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12430                                           Flags)) {
12431         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12432         AddToWorklist(RV.getNode());
12433         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12434       }
12435     } else if (N1.getOpcode() == ISD::FMUL) {
12436       // Look through an FMUL. Even though this won't remove the FDIV directly,
12437       // it's still worthwhile to get rid of the FSQRT if possible.
12438       SDValue SqrtOp;
12439       SDValue OtherOp;
12440       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12441         SqrtOp = N1.getOperand(0);
12442         OtherOp = N1.getOperand(1);
12443       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12444         SqrtOp = N1.getOperand(1);
12445         OtherOp = N1.getOperand(0);
12446       }
12447       if (SqrtOp.getNode()) {
12448         // We found a FSQRT, so try to make this fold:
12449         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12450         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12451           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12452           AddToWorklist(RV.getNode());
12453           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12454         }
12455       }
12456     }
12457
12458     // Fold into a reciprocal estimate and multiply instead of a real divide.
12459     if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
12460       return RV;
12461   }
12462
12463   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12464   if (isCheaperToUseNegatedFPOps(N0, N1))
12465     return DAG.getNode(
12466         ISD::FDIV, SDLoc(N), VT,
12467         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
12468         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12469
12470   return SDValue();
12471 }
12472
12473 SDValue DAGCombiner::visitFREM(SDNode *N) {
12474   SDValue N0 = N->getOperand(0);
12475   SDValue N1 = N->getOperand(1);
12476   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12477   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12478   EVT VT = N->getValueType(0);
12479
12480   // fold (frem c1, c2) -> fmod(c1,c2)
12481   if (N0CFP && N1CFP)
12482     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12483
12484   if (SDValue NewSel = foldBinOpIntoSelect(N))
12485     return NewSel;
12486
12487   return SDValue();
12488 }
12489
12490 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12491   SDNodeFlags Flags = N->getFlags();
12492   if (!DAG.getTarget().Options.UnsafeFPMath &&
12493       !Flags.hasApproximateFuncs())
12494     return SDValue();
12495
12496   SDValue N0 = N->getOperand(0);
12497   if (TLI.isFsqrtCheap(N0, DAG))
12498     return SDValue();
12499
12500   // FSQRT nodes have flags that propagate to the created nodes.
12501   return buildSqrtEstimate(N0, Flags);
12502 }
12503
12504 /// copysign(x, fp_extend(y)) -> copysign(x, y)
12505 /// copysign(x, fp_round(y)) -> copysign(x, y)
12506 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
12507   SDValue N1 = N->getOperand(1);
12508   if ((N1.getOpcode() == ISD::FP_EXTEND ||
12509        N1.getOpcode() == ISD::FP_ROUND)) {
12510     // Do not optimize out type conversion of f128 type yet.
12511     // For some targets like x86_64, configuration is changed to keep one f128
12512     // value in one SSE register, but instruction selection cannot handle
12513     // FCOPYSIGN on SSE registers yet.
12514     EVT N1VT = N1->getValueType(0);
12515     EVT N1Op0VT = N1->getOperand(0).getValueType();
12516     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12517   }
12518   return false;
12519 }
12520
12521 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12522   SDValue N0 = N->getOperand(0);
12523   SDValue N1 = N->getOperand(1);
12524   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12525   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12526   EVT VT = N->getValueType(0);
12527
12528   if (N0CFP && N1CFP) // Constant fold
12529     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12530
12531   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12532     const APFloat &V = N1C->getValueAPF();
12533     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
12534     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12535     if (!V.isNegative()) {
12536       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
12537         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12538     } else {
12539       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12540         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12541                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12542     }
12543   }
12544
12545   // copysign(fabs(x), y) -> copysign(x, y)
12546   // copysign(fneg(x), y) -> copysign(x, y)
12547   // copysign(copysign(x,z), y) -> copysign(x, y)
12548   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
12549       N0.getOpcode() == ISD::FCOPYSIGN)
12550     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12551
12552   // copysign(x, abs(y)) -> abs(x)
12553   if (N1.getOpcode() == ISD::FABS)
12554     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12555
12556   // copysign(x, copysign(y,z)) -> copysign(x, z)
12557   if (N1.getOpcode() == ISD::FCOPYSIGN)
12558     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12559
12560   // copysign(x, fp_extend(y)) -> copysign(x, y)
12561   // copysign(x, fp_round(y)) -> copysign(x, y)
12562   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
12563     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12564
12565   return SDValue();
12566 }
12567
12568 SDValue DAGCombiner::visitFPOW(SDNode *N) {
12569   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12570   if (!ExponentC)
12571     return SDValue();
12572
12573   // Try to convert x ** (1/3) into cube root.
12574   // TODO: Handle the various flavors of long double.
12575   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12576   //       Some range near 1/3 should be fine.
12577   EVT VT = N->getValueType(0);
12578   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
12579       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
12580     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12581     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12582     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
12583     // For regular numbers, rounding may cause the results to differ.
12584     // Therefore, we require { nsz ninf nnan afn } for this transform.
12585     // TODO: We could select out the special cases if we don't have nsz/ninf.
12586     SDNodeFlags Flags = N->getFlags();
12587     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
12588         !Flags.hasApproximateFuncs())
12589       return SDValue();
12590
12591     // Do not create a cbrt() libcall if the target does not have it, and do not
12592     // turn a pow that has lowering support into a cbrt() libcall.
12593     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12594         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12595          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
12596       return SDValue();
12597
12598     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12599   }
12600
12601   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12602   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12603   // TODO: This could be extended (using a target hook) to handle smaller
12604   // power-of-2 fractional exponents.
12605   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12606   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12607   if (ExponentIs025 || ExponentIs075) {
12608     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12609     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
12610     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12611     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
12612     // For regular numbers, rounding may cause the results to differ.
12613     // Therefore, we require { nsz ninf afn } for this transform.
12614     // TODO: We could select out the special cases if we don't have nsz/ninf.
12615     SDNodeFlags Flags = N->getFlags();
12616
12617     // We only need no signed zeros for the 0.25 case.
12618     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12619         !Flags.hasApproximateFuncs())
12620       return SDValue();
12621
12622     // Don't double the number of libcalls. We are trying to inline fast code.
12623     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12624       return SDValue();
12625
12626     // Assume that libcalls are the smallest code.
12627     // TODO: This restriction should probably be lifted for vectors.
12628     if (DAG.getMachineFunction().getFunction().hasOptSize())
12629       return SDValue();
12630
12631     // pow(X, 0.25) --> sqrt(sqrt(X))
12632     SDLoc DL(N);
12633     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12634     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12635     if (ExponentIs025)
12636       return SqrtSqrt;
12637     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12638     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12639   }
12640
12641   return SDValue();
12642 }
12643
12644 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12645                                const TargetLowering &TLI) {
12646   // This optimization is guarded by a function attribute because it may produce
12647   // unexpected results. Ie, programs may be relying on the platform-specific
12648   // undefined behavior when the float-to-int conversion overflows.
12649   const Function &F = DAG.getMachineFunction().getFunction();
12650   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12651   if (StrictOverflow.getValueAsString().equals("false"))
12652     return SDValue();
12653
12654   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12655   // replacing casts with a libcall. We also must be allowed to ignore -0.0
12656   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12657   // conversions would return +0.0.
12658   // FIXME: We should be able to use node-level FMF here.
12659   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12660   EVT VT = N->getValueType(0);
12661   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12662       !DAG.getTarget().Options.NoSignedZerosFPMath)
12663     return SDValue();
12664
12665   // fptosi/fptoui round towards zero, so converting from FP to integer and
12666   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12667   SDValue N0 = N->getOperand(0);
12668   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12669       N0.getOperand(0).getValueType() == VT)
12670     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12671
12672   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12673       N0.getOperand(0).getValueType() == VT)
12674     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12675
12676   return SDValue();
12677 }
12678
12679 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12680   SDValue N0 = N->getOperand(0);
12681   EVT VT = N->getValueType(0);
12682   EVT OpVT = N0.getValueType();
12683
12684   // [us]itofp(undef) = 0, because the result value is bounded.
12685   if (N0.isUndef())
12686     return DAG.getConstantFP(0.0, SDLoc(N), VT);
12687
12688   // fold (sint_to_fp c1) -> c1fp
12689   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12690       // ...but only if the target supports immediate floating-point values
12691       (!LegalOperations ||
12692        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12693     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12694
12695   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12696   // but UINT_TO_FP is legal on this target, try to convert.
12697   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12698       hasOperation(ISD::UINT_TO_FP, OpVT)) {
12699     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12700     if (DAG.SignBitIsZero(N0))
12701       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12702   }
12703
12704   // The next optimizations are desirable only if SELECT_CC can be lowered.
12705   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12706     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12707     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12708         !VT.isVector() &&
12709         (!LegalOperations ||
12710          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12711       SDLoc DL(N);
12712       SDValue Ops[] =
12713         { N0.getOperand(0), N0.getOperand(1),
12714           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12715           N0.getOperand(2) };
12716       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12717     }
12718
12719     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12720     //      (select_cc x, y, 1.0, 0.0,, cc)
12721     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12722         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12723         (!LegalOperations ||
12724          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12725       SDLoc DL(N);
12726       SDValue Ops[] =
12727         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12728           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12729           N0.getOperand(0).getOperand(2) };
12730       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12731     }
12732   }
12733
12734   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12735     return FTrunc;
12736
12737   return SDValue();
12738 }
12739
12740 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12741   SDValue N0 = N->getOperand(0);
12742   EVT VT = N->getValueType(0);
12743   EVT OpVT = N0.getValueType();
12744
12745   // [us]itofp(undef) = 0, because the result value is bounded.
12746   if (N0.isUndef())
12747     return DAG.getConstantFP(0.0, SDLoc(N), VT);
12748
12749   // fold (uint_to_fp c1) -> c1fp
12750   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12751       // ...but only if the target supports immediate floating-point values
12752       (!LegalOperations ||
12753        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12754     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12755
12756   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12757   // but SINT_TO_FP is legal on this target, try to convert.
12758   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12759       hasOperation(ISD::SINT_TO_FP, OpVT)) {
12760     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12761     if (DAG.SignBitIsZero(N0))
12762       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12763   }
12764
12765   // The next optimizations are desirable only if SELECT_CC can be lowered.
12766   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12767     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12768     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12769         (!LegalOperations ||
12770          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12771       SDLoc DL(N);
12772       SDValue Ops[] =
12773         { N0.getOperand(0), N0.getOperand(1),
12774           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12775           N0.getOperand(2) };
12776       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12777     }
12778   }
12779
12780   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12781     return FTrunc;
12782
12783   return SDValue();
12784 }
12785
12786 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12787 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12788   SDValue N0 = N->getOperand(0);
12789   EVT VT = N->getValueType(0);
12790
12791   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12792     return SDValue();
12793
12794   SDValue Src = N0.getOperand(0);
12795   EVT SrcVT = Src.getValueType();
12796   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12797   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12798
12799   // We can safely assume the conversion won't overflow the output range,
12800   // because (for example) (uint8_t)18293.f is undefined behavior.
12801
12802   // Since we can assume the conversion won't overflow, our decision as to
12803   // whether the input will fit in the float should depend on the minimum
12804   // of the input range and output range.
12805
12806   // This means this is also safe for a signed input and unsigned output, since
12807   // a negative input would lead to undefined behavior.
12808   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
12809   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
12810   unsigned ActualSize = std::min(InputSize, OutputSize);
12811   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
12812
12813   // We can only fold away the float conversion if the input range can be
12814   // represented exactly in the float range.
12815   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
12816     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
12817       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
12818                                                        : ISD::ZERO_EXTEND;
12819       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
12820     }
12821     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
12822       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
12823     return DAG.getBitcast(VT, Src);
12824   }
12825   return SDValue();
12826 }
12827
12828 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
12829   SDValue N0 = N->getOperand(0);
12830   EVT VT = N->getValueType(0);
12831
12832   // fold (fp_to_sint undef) -> undef
12833   if (N0.isUndef())
12834     return DAG.getUNDEF(VT);
12835
12836   // fold (fp_to_sint c1fp) -> c1
12837   if (isConstantFPBuildVectorOrConstantFP(N0))
12838     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
12839
12840   return FoldIntToFPToInt(N, DAG);
12841 }
12842
12843 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
12844   SDValue N0 = N->getOperand(0);
12845   EVT VT = N->getValueType(0);
12846
12847   // fold (fp_to_uint undef) -> undef
12848   if (N0.isUndef())
12849     return DAG.getUNDEF(VT);
12850
12851   // fold (fp_to_uint c1fp) -> c1
12852   if (isConstantFPBuildVectorOrConstantFP(N0))
12853     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
12854
12855   return FoldIntToFPToInt(N, DAG);
12856 }
12857
12858 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
12859   SDValue N0 = N->getOperand(0);
12860   SDValue N1 = N->getOperand(1);
12861   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12862   EVT VT = N->getValueType(0);
12863
12864   // fold (fp_round c1fp) -> c1fp
12865   if (N0CFP)
12866     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
12867
12868   // fold (fp_round (fp_extend x)) -> x
12869   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
12870     return N0.getOperand(0);
12871
12872   // fold (fp_round (fp_round x)) -> (fp_round x)
12873   if (N0.getOpcode() == ISD::FP_ROUND) {
12874     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
12875     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
12876
12877     // Skip this folding if it results in an fp_round from f80 to f16.
12878     //
12879     // f80 to f16 always generates an expensive (and as yet, unimplemented)
12880     // libcall to __truncxfhf2 instead of selecting native f16 conversion
12881     // instructions from f32 or f64.  Moreover, the first (value-preserving)
12882     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12883     // x86.
12884     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
12885       return SDValue();
12886
12887     // If the first fp_round isn't a value preserving truncation, it might
12888     // introduce a tie in the second fp_round, that wouldn't occur in the
12889     // single-step fp_round we want to fold to.
12890     // In other words, double rounding isn't the same as rounding.
12891     // Also, this is a value preserving truncation iff both fp_round's are.
12892     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
12893       SDLoc DL(N);
12894       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
12895                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
12896     }
12897   }
12898
12899   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
12900   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
12901     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
12902                               N0.getOperand(0), N1);
12903     AddToWorklist(Tmp.getNode());
12904     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
12905                        Tmp, N0.getOperand(1));
12906   }
12907
12908   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12909     return NewVSel;
12910
12911   return SDValue();
12912 }
12913
12914 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
12915   SDValue N0 = N->getOperand(0);
12916   EVT VT = N->getValueType(0);
12917
12918   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
12919   if (N->hasOneUse() &&
12920       N->use_begin()->getOpcode() == ISD::FP_ROUND)
12921     return SDValue();
12922
12923   // fold (fp_extend c1fp) -> c1fp
12924   if (isConstantFPBuildVectorOrConstantFP(N0))
12925     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
12926
12927   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
12928   if (N0.getOpcode() == ISD::FP16_TO_FP &&
12929       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
12930     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
12931
12932   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
12933   // value of X.
12934   if (N0.getOpcode() == ISD::FP_ROUND
12935       && N0.getConstantOperandVal(1) == 1) {
12936     SDValue In = N0.getOperand(0);
12937     if (In.getValueType() == VT) return In;
12938     if (VT.bitsLT(In.getValueType()))
12939       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
12940                          In, N0.getOperand(1));
12941     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
12942   }
12943
12944   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
12945   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12946        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12947     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12948     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12949                                      LN0->getChain(),
12950                                      LN0->getBasePtr(), N0.getValueType(),
12951                                      LN0->getMemOperand());
12952     CombineTo(N, ExtLoad);
12953     CombineTo(N0.getNode(),
12954               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
12955                           N0.getValueType(), ExtLoad,
12956                           DAG.getIntPtrConstant(1, SDLoc(N0))),
12957               ExtLoad.getValue(1));
12958     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12959   }
12960
12961   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12962     return NewVSel;
12963
12964   return SDValue();
12965 }
12966
12967 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
12968   SDValue N0 = N->getOperand(0);
12969   EVT VT = N->getValueType(0);
12970
12971   // fold (fceil c1) -> fceil(c1)
12972   if (isConstantFPBuildVectorOrConstantFP(N0))
12973     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
12974
12975   return SDValue();
12976 }
12977
12978 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
12979   SDValue N0 = N->getOperand(0);
12980   EVT VT = N->getValueType(0);
12981
12982   // fold (ftrunc c1) -> ftrunc(c1)
12983   if (isConstantFPBuildVectorOrConstantFP(N0))
12984     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
12985
12986   // fold ftrunc (known rounded int x) -> x
12987   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12988   // likely to be generated to extract integer from a rounded floating value.
12989   switch (N0.getOpcode()) {
12990   default: break;
12991   case ISD::FRINT:
12992   case ISD::FTRUNC:
12993   case ISD::FNEARBYINT:
12994   case ISD::FFLOOR:
12995   case ISD::FCEIL:
12996     return N0;
12997   }
12998
12999   return SDValue();
13000 }
13001
13002 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13003   SDValue N0 = N->getOperand(0);
13004   EVT VT = N->getValueType(0);
13005
13006   // fold (ffloor c1) -> ffloor(c1)
13007   if (isConstantFPBuildVectorOrConstantFP(N0))
13008     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13009
13010   return SDValue();
13011 }
13012
13013 // FIXME: FNEG and FABS have a lot in common; refactor.
13014 SDValue DAGCombiner::visitFNEG(SDNode *N) {
13015   SDValue N0 = N->getOperand(0);
13016   EVT VT = N->getValueType(0);
13017
13018   // Constant fold FNEG.
13019   if (isConstantFPBuildVectorOrConstantFP(N0))
13020     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13021
13022   if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
13023     return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
13024
13025   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13026   // constant pool values.
13027   if (!TLI.isFNegFree(VT) &&
13028       N0.getOpcode() == ISD::BITCAST &&
13029       N0.getNode()->hasOneUse()) {
13030     SDValue Int = N0.getOperand(0);
13031     EVT IntVT = Int.getValueType();
13032     if (IntVT.isInteger() && !IntVT.isVector()) {
13033       APInt SignMask;
13034       if (N0.getValueType().isVector()) {
13035         // For a vector, get a mask such as 0x80... per scalar element
13036         // and splat it.
13037         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13038         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13039       } else {
13040         // For a scalar, just generate 0x80...
13041         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13042       }
13043       SDLoc DL0(N0);
13044       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13045                         DAG.getConstant(SignMask, DL0, IntVT));
13046       AddToWorklist(Int.getNode());
13047       return DAG.getBitcast(VT, Int);
13048     }
13049   }
13050
13051   // (fneg (fmul c, x)) -> (fmul -c, x)
13052   if (N0.getOpcode() == ISD::FMUL &&
13053       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13054     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13055     if (CFP1) {
13056       APFloat CVal = CFP1->getValueAPF();
13057       CVal.changeSign();
13058       if (Level >= AfterLegalizeDAG &&
13059           (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13060            TLI.isOperationLegal(ISD::ConstantFP, VT)))
13061         return DAG.getNode(
13062             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13063             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13064             N0->getFlags());
13065     }
13066   }
13067
13068   return SDValue();
13069 }
13070
13071 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13072                             APFloat (*Op)(const APFloat &, const APFloat &)) {
13073   SDValue N0 = N->getOperand(0);
13074   SDValue N1 = N->getOperand(1);
13075   EVT VT = N->getValueType(0);
13076   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13077   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13078
13079   if (N0CFP && N1CFP) {
13080     const APFloat &C0 = N0CFP->getValueAPF();
13081     const APFloat &C1 = N1CFP->getValueAPF();
13082     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13083   }
13084
13085   // Canonicalize to constant on RHS.
13086   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13087       !isConstantFPBuildVectorOrConstantFP(N1))
13088     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13089
13090   return SDValue();
13091 }
13092
13093 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13094   return visitFMinMax(DAG, N, minnum);
13095 }
13096
13097 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13098   return visitFMinMax(DAG, N, maxnum);
13099 }
13100
13101 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13102   return visitFMinMax(DAG, N, minimum);
13103 }
13104
13105 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13106   return visitFMinMax(DAG, N, maximum);
13107 }
13108
13109 SDValue DAGCombiner::visitFABS(SDNode *N) {
13110   SDValue N0 = N->getOperand(0);
13111   EVT VT = N->getValueType(0);
13112
13113   // fold (fabs c1) -> fabs(c1)
13114   if (isConstantFPBuildVectorOrConstantFP(N0))
13115     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13116
13117   // fold (fabs (fabs x)) -> (fabs x)
13118   if (N0.getOpcode() == ISD::FABS)
13119     return N->getOperand(0);
13120
13121   // fold (fabs (fneg x)) -> (fabs x)
13122   // fold (fabs (fcopysign x, y)) -> (fabs x)
13123   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13124     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13125
13126   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13127   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
13128     SDValue Int = N0.getOperand(0);
13129     EVT IntVT = Int.getValueType();
13130     if (IntVT.isInteger() && !IntVT.isVector()) {
13131       APInt SignMask;
13132       if (N0.getValueType().isVector()) {
13133         // For a vector, get a mask such as 0x7f... per scalar element
13134         // and splat it.
13135         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
13136         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13137       } else {
13138         // For a scalar, just generate 0x7f...
13139         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
13140       }
13141       SDLoc DL(N0);
13142       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
13143                         DAG.getConstant(SignMask, DL, IntVT));
13144       AddToWorklist(Int.getNode());
13145       return DAG.getBitcast(N->getValueType(0), Int);
13146     }
13147   }
13148
13149   return SDValue();
13150 }
13151
13152 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
13153   SDValue Chain = N->getOperand(0);
13154   SDValue N1 = N->getOperand(1);
13155   SDValue N2 = N->getOperand(2);
13156
13157   // If N is a constant we could fold this into a fallthrough or unconditional
13158   // branch. However that doesn't happen very often in normal code, because
13159   // Instcombine/SimplifyCFG should have handled the available opportunities.
13160   // If we did this folding here, it would be necessary to update the
13161   // MachineBasicBlock CFG, which is awkward.
13162
13163   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
13164   // on the target.
13165   if (N1.getOpcode() == ISD::SETCC &&
13166       TLI.isOperationLegalOrCustom(ISD::BR_CC,
13167                                    N1.getOperand(0).getValueType())) {
13168     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13169                        Chain, N1.getOperand(2),
13170                        N1.getOperand(0), N1.getOperand(1), N2);
13171   }
13172
13173   if (N1.hasOneUse()) {
13174     if (SDValue NewN1 = rebuildSetCC(N1))
13175       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
13176   }
13177
13178   return SDValue();
13179 }
13180
13181 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
13182   if (N.getOpcode() == ISD::SRL ||
13183       (N.getOpcode() == ISD::TRUNCATE &&
13184        (N.getOperand(0).hasOneUse() &&
13185         N.getOperand(0).getOpcode() == ISD::SRL))) {
13186     // Look pass the truncate.
13187     if (N.getOpcode() == ISD::TRUNCATE)
13188       N = N.getOperand(0);
13189
13190     // Match this pattern so that we can generate simpler code:
13191     //
13192     //   %a = ...
13193     //   %b = and i32 %a, 2
13194     //   %c = srl i32 %b, 1
13195     //   brcond i32 %c ...
13196     //
13197     // into
13198     //
13199     //   %a = ...
13200     //   %b = and i32 %a, 2
13201     //   %c = setcc eq %b, 0
13202     //   brcond %c ...
13203     //
13204     // This applies only when the AND constant value has one bit set and the
13205     // SRL constant is equal to the log2 of the AND constant. The back-end is
13206     // smart enough to convert the result into a TEST/JMP sequence.
13207     SDValue Op0 = N.getOperand(0);
13208     SDValue Op1 = N.getOperand(1);
13209
13210     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
13211       SDValue AndOp1 = Op0.getOperand(1);
13212
13213       if (AndOp1.getOpcode() == ISD::Constant) {
13214         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
13215
13216         if (AndConst.isPowerOf2() &&
13217             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
13218           SDLoc DL(N);
13219           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
13220                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
13221                               ISD::SETNE);
13222         }
13223       }
13224     }
13225   }
13226
13227   // Transform br(xor(x, y)) -> br(x != y)
13228   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
13229   if (N.getOpcode() == ISD::XOR) {
13230     // Because we may call this on a speculatively constructed
13231     // SimplifiedSetCC Node, we need to simplify this node first.
13232     // Ideally this should be folded into SimplifySetCC and not
13233     // here. For now, grab a handle to N so we don't lose it from
13234     // replacements interal to the visit.
13235     HandleSDNode XORHandle(N);
13236     while (N.getOpcode() == ISD::XOR) {
13237       SDValue Tmp = visitXOR(N.getNode());
13238       // No simplification done.
13239       if (!Tmp.getNode())
13240         break;
13241       // Returning N is form in-visit replacement that may invalidated
13242       // N. Grab value from Handle.
13243       if (Tmp.getNode() == N.getNode())
13244         N = XORHandle.getValue();
13245       else // Node simplified. Try simplifying again.
13246         N = Tmp;
13247     }
13248
13249     if (N.getOpcode() != ISD::XOR)
13250       return N;
13251
13252     SDNode *TheXor = N.getNode();
13253
13254     SDValue Op0 = TheXor->getOperand(0);
13255     SDValue Op1 = TheXor->getOperand(1);
13256
13257     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
13258       bool Equal = false;
13259       if (isOneConstant(Op0) && Op0.hasOneUse() &&
13260           Op0.getOpcode() == ISD::XOR) {
13261         TheXor = Op0.getNode();
13262         Equal = true;
13263       }
13264
13265       EVT SetCCVT = N.getValueType();
13266       if (LegalTypes)
13267         SetCCVT = getSetCCResultType(SetCCVT);
13268       // Replace the uses of XOR with SETCC
13269       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
13270                           Equal ? ISD::SETEQ : ISD::SETNE);
13271     }
13272   }
13273
13274   return SDValue();
13275 }
13276
13277 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
13278 //
13279 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
13280   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
13281   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
13282
13283   // If N is a constant we could fold this into a fallthrough or unconditional
13284   // branch. However that doesn't happen very often in normal code, because
13285   // Instcombine/SimplifyCFG should have handled the available opportunities.
13286   // If we did this folding here, it would be necessary to update the
13287   // MachineBasicBlock CFG, which is awkward.
13288
13289   // Use SimplifySetCC to simplify SETCC's.
13290   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
13291                                CondLHS, CondRHS, CC->get(), SDLoc(N),
13292                                false);
13293   if (Simp.getNode()) AddToWorklist(Simp.getNode());
13294
13295   // fold to a simpler setcc
13296   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
13297     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13298                        N->getOperand(0), Simp.getOperand(2),
13299                        Simp.getOperand(0), Simp.getOperand(1),
13300                        N->getOperand(4));
13301
13302   return SDValue();
13303 }
13304
13305 /// Return true if 'Use' is a load or a store that uses N as its base pointer
13306 /// and that N may be folded in the load / store addressing mode.
13307 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
13308                                     SelectionDAG &DAG,
13309                                     const TargetLowering &TLI) {
13310   EVT VT;
13311   unsigned AS;
13312
13313   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
13314     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13315       return false;
13316     VT = LD->getMemoryVT();
13317     AS = LD->getAddressSpace();
13318   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
13319     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13320       return false;
13321     VT = ST->getMemoryVT();
13322     AS = ST->getAddressSpace();
13323   } else
13324     return false;
13325
13326   TargetLowering::AddrMode AM;
13327   if (N->getOpcode() == ISD::ADD) {
13328     AM.HasBaseReg = true;
13329     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13330     if (Offset)
13331       // [reg +/- imm]
13332       AM.BaseOffs = Offset->getSExtValue();
13333     else
13334       // [reg +/- reg]
13335       AM.Scale = 1;
13336   } else if (N->getOpcode() == ISD::SUB) {
13337     AM.HasBaseReg = true;
13338     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13339     if (Offset)
13340       // [reg +/- imm]
13341       AM.BaseOffs = -Offset->getSExtValue();
13342     else
13343       // [reg +/- reg]
13344       AM.Scale = 1;
13345   } else
13346     return false;
13347
13348   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
13349                                    VT.getTypeForEVT(*DAG.getContext()), AS);
13350 }
13351
13352 /// Try turning a load/store into a pre-indexed load/store when the base
13353 /// pointer is an add or subtract and it has other uses besides the load/store.
13354 /// After the transformation, the new indexed load/store has effectively folded
13355 /// the add/subtract in and all of its other uses are redirected to the
13356 /// new load/store.
13357 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13358   if (Level < AfterLegalizeDAG)
13359     return false;
13360
13361   bool isLoad = true;
13362   SDValue Ptr;
13363   EVT VT;
13364   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13365     if (LD->isIndexed())
13366       return false;
13367     VT = LD->getMemoryVT();
13368     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
13369         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
13370       return false;
13371     Ptr = LD->getBasePtr();
13372   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13373     if (ST->isIndexed())
13374       return false;
13375     VT = ST->getMemoryVT();
13376     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
13377         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
13378       return false;
13379     Ptr = ST->getBasePtr();
13380     isLoad = false;
13381   } else {
13382     return false;
13383   }
13384
13385   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13386   // out.  There is no reason to make this a preinc/predec.
13387   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13388       Ptr.getNode()->hasOneUse())
13389     return false;
13390
13391   // Ask the target to do addressing mode selection.
13392   SDValue BasePtr;
13393   SDValue Offset;
13394   ISD::MemIndexedMode AM = ISD::UNINDEXED;
13395   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13396     return false;
13397
13398   // Backends without true r+i pre-indexed forms may need to pass a
13399   // constant base with a variable offset so that constant coercion
13400   // will work with the patterns in canonical form.
13401   bool Swapped = false;
13402   if (isa<ConstantSDNode>(BasePtr)) {
13403     std::swap(BasePtr, Offset);
13404     Swapped = true;
13405   }
13406
13407   // Don't create a indexed load / store with zero offset.
13408   if (isNullConstant(Offset))
13409     return false;
13410
13411   // Try turning it into a pre-indexed load / store except when:
13412   // 1) The new base ptr is a frame index.
13413   // 2) If N is a store and the new base ptr is either the same as or is a
13414   //    predecessor of the value being stored.
13415   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13416   //    that would create a cycle.
13417   // 4) All uses are load / store ops that use it as old base ptr.
13418
13419   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
13420   // (plus the implicit offset) to a register to preinc anyway.
13421   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13422     return false;
13423
13424   // Check #2.
13425   if (!isLoad) {
13426     SDValue Val = cast<StoreSDNode>(N)->getValue();
13427
13428     // Would require a copy.
13429     if (Val == BasePtr)
13430       return false;
13431
13432     // Would create a cycle.
13433     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13434       return false;
13435   }
13436
13437   // Caches for hasPredecessorHelper.
13438   SmallPtrSet<const SDNode *, 32> Visited;
13439   SmallVector<const SDNode *, 16> Worklist;
13440   Worklist.push_back(N);
13441
13442   // If the offset is a constant, there may be other adds of constants that
13443   // can be folded with this one. We should do this to avoid having to keep
13444   // a copy of the original base pointer.
13445   SmallVector<SDNode *, 16> OtherUses;
13446   if (isa<ConstantSDNode>(Offset))
13447     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13448                               UE = BasePtr.getNode()->use_end();
13449          UI != UE; ++UI) {
13450       SDUse &Use = UI.getUse();
13451       // Skip the use that is Ptr and uses of other results from BasePtr's
13452       // node (important for nodes that return multiple results).
13453       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13454         continue;
13455
13456       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13457         continue;
13458
13459       if (Use.getUser()->getOpcode() != ISD::ADD &&
13460           Use.getUser()->getOpcode() != ISD::SUB) {
13461         OtherUses.clear();
13462         break;
13463       }
13464
13465       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13466       if (!isa<ConstantSDNode>(Op1)) {
13467         OtherUses.clear();
13468         break;
13469       }
13470
13471       // FIXME: In some cases, we can be smarter about this.
13472       if (Op1.getValueType() != Offset.getValueType()) {
13473         OtherUses.clear();
13474         break;
13475       }
13476
13477       OtherUses.push_back(Use.getUser());
13478     }
13479
13480   if (Swapped)
13481     std::swap(BasePtr, Offset);
13482
13483   // Now check for #3 and #4.
13484   bool RealUse = false;
13485
13486   for (SDNode *Use : Ptr.getNode()->uses()) {
13487     if (Use == N)
13488       continue;
13489     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13490       return false;
13491
13492     // If Ptr may be folded in addressing mode of other use, then it's
13493     // not profitable to do this transformation.
13494     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13495       RealUse = true;
13496   }
13497
13498   if (!RealUse)
13499     return false;
13500
13501   SDValue Result;
13502   if (isLoad)
13503     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13504                                 BasePtr, Offset, AM);
13505   else
13506     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13507                                  BasePtr, Offset, AM);
13508   ++PreIndexedNodes;
13509   ++NodesCombined;
13510   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13511              Result.getNode()->dump(&DAG); dbgs() << '\n');
13512   WorklistRemover DeadNodes(*this);
13513   if (isLoad) {
13514     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13515     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13516   } else {
13517     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13518   }
13519
13520   // Finally, since the node is now dead, remove it from the graph.
13521   deleteAndRecombine(N);
13522
13523   if (Swapped)
13524     std::swap(BasePtr, Offset);
13525
13526   // Replace other uses of BasePtr that can be updated to use Ptr
13527   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
13528     unsigned OffsetIdx = 1;
13529     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13530       OffsetIdx = 0;
13531     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13532            BasePtr.getNode() && "Expected BasePtr operand");
13533
13534     // We need to replace ptr0 in the following expression:
13535     //   x0 * offset0 + y0 * ptr0 = t0
13536     // knowing that
13537     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13538     //
13539     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13540     // indexed load/store and the expression that needs to be re-written.
13541     //
13542     // Therefore, we have:
13543     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13544
13545     ConstantSDNode *CN =
13546       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13547     int X0, X1, Y0, Y1;
13548     const APInt &Offset0 = CN->getAPIntValue();
13549     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13550
13551     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
13552     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
13553     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
13554     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
13555
13556     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
13557
13558     APInt CNV = Offset0;
13559     if (X0 < 0) CNV = -CNV;
13560     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
13561     else CNV = CNV - Offset1;
13562
13563     SDLoc DL(OtherUses[i]);
13564
13565     // We can now generate the new expression.
13566     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
13567     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
13568
13569     SDValue NewUse = DAG.getNode(Opcode,
13570                                  DL,
13571                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
13572     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
13573     deleteAndRecombine(OtherUses[i]);
13574   }
13575
13576   // Replace the uses of Ptr with uses of the updated base value.
13577   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
13578   deleteAndRecombine(Ptr.getNode());
13579   AddToWorklist(Result.getNode());
13580
13581   return true;
13582 }
13583
13584 /// Try to combine a load/store with a add/sub of the base pointer node into a
13585 /// post-indexed load/store. The transformation folded the add/subtract into the
13586 /// new indexed load/store effectively and all of its uses are redirected to the
13587 /// new load/store.
13588 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
13589   if (Level < AfterLegalizeDAG)
13590     return false;
13591
13592   bool isLoad = true;
13593   SDValue Ptr;
13594   EVT VT;
13595   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13596     if (LD->isIndexed())
13597       return false;
13598     VT = LD->getMemoryVT();
13599     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13600         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
13601       return false;
13602     Ptr = LD->getBasePtr();
13603   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13604     if (ST->isIndexed())
13605       return false;
13606     VT = ST->getMemoryVT();
13607     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13608         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
13609       return false;
13610     Ptr = ST->getBasePtr();
13611     isLoad = false;
13612   } else {
13613     return false;
13614   }
13615
13616   if (Ptr.getNode()->hasOneUse())
13617     return false;
13618
13619   for (SDNode *Op : Ptr.getNode()->uses()) {
13620     if (Op == N ||
13621         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
13622       continue;
13623
13624     SDValue BasePtr;
13625     SDValue Offset;
13626     ISD::MemIndexedMode AM = ISD::UNINDEXED;
13627     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13628       // Don't create a indexed load / store with zero offset.
13629       if (isNullConstant(Offset))
13630         continue;
13631
13632       // Try turning it into a post-indexed load / store except when
13633       // 1) All uses are load / store ops that use it as base ptr (and
13634       //    it may be folded as addressing mmode).
13635       // 2) Op must be independent of N, i.e. Op is neither a predecessor
13636       //    nor a successor of N. Otherwise, if Op is folded that would
13637       //    create a cycle.
13638
13639       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13640         continue;
13641
13642       // Check for #1.
13643       bool TryNext = false;
13644       for (SDNode *Use : BasePtr.getNode()->uses()) {
13645         if (Use == Ptr.getNode())
13646           continue;
13647
13648         // If all the uses are load / store addresses, then don't do the
13649         // transformation.
13650         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13651           bool RealUse = false;
13652           for (SDNode *UseUse : Use->uses()) {
13653             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13654               RealUse = true;
13655           }
13656
13657           if (!RealUse) {
13658             TryNext = true;
13659             break;
13660           }
13661         }
13662       }
13663
13664       if (TryNext)
13665         continue;
13666
13667       // Check for #2.
13668       SmallPtrSet<const SDNode *, 32> Visited;
13669       SmallVector<const SDNode *, 8> Worklist;
13670       // Ptr is predecessor to both N and Op.
13671       Visited.insert(Ptr.getNode());
13672       Worklist.push_back(N);
13673       Worklist.push_back(Op);
13674       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13675           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13676         SDValue Result = isLoad
13677           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13678                                BasePtr, Offset, AM)
13679           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13680                                 BasePtr, Offset, AM);
13681         ++PostIndexedNodes;
13682         ++NodesCombined;
13683         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13684                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13685                    dbgs() << '\n');
13686         WorklistRemover DeadNodes(*this);
13687         if (isLoad) {
13688           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13689           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13690         } else {
13691           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13692         }
13693
13694         // Finally, since the node is now dead, remove it from the graph.
13695         deleteAndRecombine(N);
13696
13697         // Replace the uses of Use with uses of the updated base value.
13698         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13699                                       Result.getValue(isLoad ? 1 : 0));
13700         deleteAndRecombine(Op);
13701         return true;
13702       }
13703     }
13704   }
13705
13706   return false;
13707 }
13708
13709 /// Return the base-pointer arithmetic from an indexed \p LD.
13710 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13711   ISD::MemIndexedMode AM = LD->getAddressingMode();
13712   assert(AM != ISD::UNINDEXED);
13713   SDValue BP = LD->getOperand(1);
13714   SDValue Inc = LD->getOperand(2);
13715
13716   // Some backends use TargetConstants for load offsets, but don't expect
13717   // TargetConstants in general ADD nodes. We can convert these constants into
13718   // regular Constants (if the constant is not opaque).
13719   assert((Inc.getOpcode() != ISD::TargetConstant ||
13720           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13721          "Cannot split out indexing using opaque target constants");
13722   if (Inc.getOpcode() == ISD::TargetConstant) {
13723     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13724     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13725                           ConstInc->getValueType(0));
13726   }
13727
13728   unsigned Opc =
13729       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13730   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13731 }
13732
13733 static inline int numVectorEltsOrZero(EVT T) {
13734   return T.isVector() ? T.getVectorNumElements() : 0;
13735 }
13736
13737 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13738   Val = ST->getValue();
13739   EVT STType = Val.getValueType();
13740   EVT STMemType = ST->getMemoryVT();
13741   if (STType == STMemType)
13742     return true;
13743   if (isTypeLegal(STMemType))
13744     return false; // fail.
13745   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13746       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13747     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13748     return true;
13749   }
13750   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13751       STType.isInteger() && STMemType.isInteger()) {
13752     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13753     return true;
13754   }
13755   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13756     Val = DAG.getBitcast(STMemType, Val);
13757     return true;
13758   }
13759   return false; // fail.
13760 }
13761
13762 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13763   EVT LDMemType = LD->getMemoryVT();
13764   EVT LDType = LD->getValueType(0);
13765   assert(Val.getValueType() == LDMemType &&
13766          "Attempting to extend value of non-matching type");
13767   if (LDType == LDMemType)
13768     return true;
13769   if (LDMemType.isInteger() && LDType.isInteger()) {
13770     switch (LD->getExtensionType()) {
13771     case ISD::NON_EXTLOAD:
13772       Val = DAG.getBitcast(LDType, Val);
13773       return true;
13774     case ISD::EXTLOAD:
13775       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13776       return true;
13777     case ISD::SEXTLOAD:
13778       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13779       return true;
13780     case ISD::ZEXTLOAD:
13781       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
13782       return true;
13783     }
13784   }
13785   return false;
13786 }
13787
13788 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
13789   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
13790     return SDValue();
13791   SDValue Chain = LD->getOperand(0);
13792   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
13793   // TODO: Relax this restriction for unordered atomics (see D66309)
13794   if (!ST || !ST->isSimple())
13795     return SDValue();
13796
13797   EVT LDType = LD->getValueType(0);
13798   EVT LDMemType = LD->getMemoryVT();
13799   EVT STMemType = ST->getMemoryVT();
13800   EVT STType = ST->getValue().getValueType();
13801
13802   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
13803   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
13804   int64_t Offset;
13805   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
13806     return SDValue();
13807
13808   // Normalize for Endianness. After this Offset=0 will denote that the least
13809   // significant bit in the loaded value maps to the least significant bit in
13810   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13811   // n:th least significant byte of the stored value.
13812   if (DAG.getDataLayout().isBigEndian())
13813     Offset = (STMemType.getStoreSizeInBits() -
13814               LDMemType.getStoreSizeInBits()) / 8 - Offset;
13815
13816   // Check that the stored value cover all bits that are loaded.
13817   bool STCoversLD =
13818       (Offset >= 0) &&
13819       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
13820
13821   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
13822     if (LD->isIndexed()) {
13823       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
13824                     LD->getAddressingMode() == ISD::POST_DEC);
13825       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
13826       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
13827                              LD->getOperand(1), LD->getOperand(2));
13828       SDValue Ops[] = {Val, Idx, Chain};
13829       return CombineTo(LD, Ops, 3);
13830     }
13831     return CombineTo(LD, Val, Chain);
13832   };
13833
13834   if (!STCoversLD)
13835     return SDValue();
13836
13837   // Memory as copy space (potentially masked).
13838   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
13839     // Simple case: Direct non-truncating forwarding
13840     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
13841       return ReplaceLd(LD, ST->getValue(), Chain);
13842     // Can we model the truncate and extension with an and mask?
13843     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
13844         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
13845       // Mask to size of LDMemType
13846       auto Mask =
13847           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
13848                                                STMemType.getSizeInBits()),
13849                           SDLoc(ST), STType);
13850       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
13851       return ReplaceLd(LD, Val, Chain);
13852     }
13853   }
13854
13855   // TODO: Deal with nonzero offset.
13856   if (LD->getBasePtr().isUndef() || Offset != 0)
13857     return SDValue();
13858   // Model necessary truncations / extenstions.
13859   SDValue Val;
13860   // Truncate Value To Stored Memory Size.
13861   do {
13862     if (!getTruncatedStoreValue(ST, Val))
13863       continue;
13864     if (!isTypeLegal(LDMemType))
13865       continue;
13866     if (STMemType != LDMemType) {
13867       // TODO: Support vectors? This requires extract_subvector/bitcast.
13868       if (!STMemType.isVector() && !LDMemType.isVector() &&
13869           STMemType.isInteger() && LDMemType.isInteger())
13870         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
13871       else
13872         continue;
13873     }
13874     if (!extendLoadedValueToExtension(LD, Val))
13875       continue;
13876     return ReplaceLd(LD, Val, Chain);
13877   } while (false);
13878
13879   // On failure, cleanup dead nodes we may have created.
13880   if (Val->use_empty())
13881     deleteAndRecombine(Val.getNode());
13882   return SDValue();
13883 }
13884
13885 SDValue DAGCombiner::visitLOAD(SDNode *N) {
13886   LoadSDNode *LD  = cast<LoadSDNode>(N);
13887   SDValue Chain = LD->getChain();
13888   SDValue Ptr   = LD->getBasePtr();
13889
13890   // If load is not volatile and there are no uses of the loaded value (and
13891   // the updated indexed value in case of indexed loads), change uses of the
13892   // chain value into uses of the chain input (i.e. delete the dead load).
13893   // TODO: Allow this for unordered atomics (see D66309)
13894   if (LD->isSimple()) {
13895     if (N->getValueType(1) == MVT::Other) {
13896       // Unindexed loads.
13897       if (!N->hasAnyUseOfValue(0)) {
13898         // It's not safe to use the two value CombineTo variant here. e.g.
13899         // v1, chain2 = load chain1, loc
13900         // v2, chain3 = load chain2, loc
13901         // v3         = add v2, c
13902         // Now we replace use of chain2 with chain1.  This makes the second load
13903         // isomorphic to the one we are deleting, and thus makes this load live.
13904         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
13905                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
13906                    dbgs() << "\n");
13907         WorklistRemover DeadNodes(*this);
13908         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13909         AddUsersToWorklist(Chain.getNode());
13910         if (N->use_empty())
13911           deleteAndRecombine(N);
13912
13913         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13914       }
13915     } else {
13916       // Indexed loads.
13917       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
13918
13919       // If this load has an opaque TargetConstant offset, then we cannot split
13920       // the indexing into an add/sub directly (that TargetConstant may not be
13921       // valid for a different type of node, and we cannot convert an opaque
13922       // target constant into a regular constant).
13923       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
13924                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
13925
13926       if (!N->hasAnyUseOfValue(0) &&
13927           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
13928         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
13929         SDValue Index;
13930         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
13931           Index = SplitIndexingFromLoad(LD);
13932           // Try to fold the base pointer arithmetic into subsequent loads and
13933           // stores.
13934           AddUsersToWorklist(N);
13935         } else
13936           Index = DAG.getUNDEF(N->getValueType(1));
13937         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
13938                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
13939                    dbgs() << " and 2 other values\n");
13940         WorklistRemover DeadNodes(*this);
13941         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
13942         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
13943         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
13944         deleteAndRecombine(N);
13945         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13946       }
13947     }
13948   }
13949
13950   // If this load is directly stored, replace the load value with the stored
13951   // value.
13952   if (auto V = ForwardStoreValueToDirectLoad(LD))
13953     return V;
13954
13955   // Try to infer better alignment information than the load already has.
13956   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
13957     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13958       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
13959         SDValue NewLoad = DAG.getExtLoad(
13960             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
13961             LD->getPointerInfo(), LD->getMemoryVT(), Align,
13962             LD->getMemOperand()->getFlags(), LD->getAAInfo());
13963         // NewLoad will always be N as we are only refining the alignment
13964         assert(NewLoad.getNode() == N);
13965         (void)NewLoad;
13966       }
13967     }
13968   }
13969
13970   if (LD->isUnindexed()) {
13971     // Walk up chain skipping non-aliasing memory nodes.
13972     SDValue BetterChain = FindBetterChain(LD, Chain);
13973
13974     // If there is a better chain.
13975     if (Chain != BetterChain) {
13976       SDValue ReplLoad;
13977
13978       // Replace the chain to void dependency.
13979       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
13980         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
13981                                BetterChain, Ptr, LD->getMemOperand());
13982       } else {
13983         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
13984                                   LD->getValueType(0),
13985                                   BetterChain, Ptr, LD->getMemoryVT(),
13986                                   LD->getMemOperand());
13987       }
13988
13989       // Create token factor to keep old chain connected.
13990       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
13991                                   MVT::Other, Chain, ReplLoad.getValue(1));
13992
13993       // Replace uses with load result and token factor
13994       return CombineTo(N, ReplLoad.getValue(0), Token);
13995     }
13996   }
13997
13998   // Try transforming N to an indexed load.
13999   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14000     return SDValue(N, 0);
14001
14002   // Try to slice up N to more direct loads if the slices are mapped to
14003   // different register banks or pairing can take place.
14004   if (SliceUpLoad(N))
14005     return SDValue(N, 0);
14006
14007   return SDValue();
14008 }
14009
14010 namespace {
14011
14012 /// Helper structure used to slice a load in smaller loads.
14013 /// Basically a slice is obtained from the following sequence:
14014 /// Origin = load Ty1, Base
14015 /// Shift = srl Ty1 Origin, CstTy Amount
14016 /// Inst = trunc Shift to Ty2
14017 ///
14018 /// Then, it will be rewritten into:
14019 /// Slice = load SliceTy, Base + SliceOffset
14020 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14021 ///
14022 /// SliceTy is deduced from the number of bits that are actually used to
14023 /// build Inst.
14024 struct LoadedSlice {
14025   /// Helper structure used to compute the cost of a slice.
14026   struct Cost {
14027     /// Are we optimizing for code size.
14028     bool ForCodeSize = false;
14029
14030     /// Various cost.
14031     unsigned Loads = 0;
14032     unsigned Truncates = 0;
14033     unsigned CrossRegisterBanksCopies = 0;
14034     unsigned ZExts = 0;
14035     unsigned Shift = 0;
14036
14037     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
14038
14039     /// Get the cost of one isolated slice.
14040     Cost(const LoadedSlice &LS, bool ForCodeSize)
14041         : ForCodeSize(ForCodeSize), Loads(1) {
14042       EVT TruncType = LS.Inst->getValueType(0);
14043       EVT LoadedType = LS.getLoadedType();
14044       if (TruncType != LoadedType &&
14045           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14046         ZExts = 1;
14047     }
14048
14049     /// Account for slicing gain in the current cost.
14050     /// Slicing provide a few gains like removing a shift or a
14051     /// truncate. This method allows to grow the cost of the original
14052     /// load with the gain from this slice.
14053     void addSliceGain(const LoadedSlice &LS) {
14054       // Each slice saves a truncate.
14055       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14056       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14057                               LS.Inst->getValueType(0)))
14058         ++Truncates;
14059       // If there is a shift amount, this slice gets rid of it.
14060       if (LS.Shift)
14061         ++Shift;
14062       // If this slice can merge a cross register bank copy, account for it.
14063       if (LS.canMergeExpensiveCrossRegisterBankCopy())
14064         ++CrossRegisterBanksCopies;
14065     }
14066
14067     Cost &operator+=(const Cost &RHS) {
14068       Loads += RHS.Loads;
14069       Truncates += RHS.Truncates;
14070       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
14071       ZExts += RHS.ZExts;
14072       Shift += RHS.Shift;
14073       return *this;
14074     }
14075
14076     bool operator==(const Cost &RHS) const {
14077       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
14078              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
14079              ZExts == RHS.ZExts && Shift == RHS.Shift;
14080     }
14081
14082     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
14083
14084     bool operator<(const Cost &RHS) const {
14085       // Assume cross register banks copies are as expensive as loads.
14086       // FIXME: Do we want some more target hooks?
14087       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
14088       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
14089       // Unless we are optimizing for code size, consider the
14090       // expensive operation first.
14091       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
14092         return ExpensiveOpsLHS < ExpensiveOpsRHS;
14093       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
14094              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
14095     }
14096
14097     bool operator>(const Cost &RHS) const { return RHS < *this; }
14098
14099     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
14100
14101     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
14102   };
14103
14104   // The last instruction that represent the slice. This should be a
14105   // truncate instruction.
14106   SDNode *Inst;
14107
14108   // The original load instruction.
14109   LoadSDNode *Origin;
14110
14111   // The right shift amount in bits from the original load.
14112   unsigned Shift;
14113
14114   // The DAG from which Origin came from.
14115   // This is used to get some contextual information about legal types, etc.
14116   SelectionDAG *DAG;
14117
14118   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
14119               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
14120       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
14121
14122   /// Get the bits used in a chunk of bits \p BitWidth large.
14123   /// \return Result is \p BitWidth and has used bits set to 1 and
14124   ///         not used bits set to 0.
14125   APInt getUsedBits() const {
14126     // Reproduce the trunc(lshr) sequence:
14127     // - Start from the truncated value.
14128     // - Zero extend to the desired bit width.
14129     // - Shift left.
14130     assert(Origin && "No original load to compare against.");
14131     unsigned BitWidth = Origin->getValueSizeInBits(0);
14132     assert(Inst && "This slice is not bound to an instruction");
14133     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
14134            "Extracted slice is bigger than the whole type!");
14135     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
14136     UsedBits.setAllBits();
14137     UsedBits = UsedBits.zext(BitWidth);
14138     UsedBits <<= Shift;
14139     return UsedBits;
14140   }
14141
14142   /// Get the size of the slice to be loaded in bytes.
14143   unsigned getLoadedSize() const {
14144     unsigned SliceSize = getUsedBits().countPopulation();
14145     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
14146     return SliceSize / 8;
14147   }
14148
14149   /// Get the type that will be loaded for this slice.
14150   /// Note: This may not be the final type for the slice.
14151   EVT getLoadedType() const {
14152     assert(DAG && "Missing context");
14153     LLVMContext &Ctxt = *DAG->getContext();
14154     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
14155   }
14156
14157   /// Get the alignment of the load used for this slice.
14158   unsigned getAlignment() const {
14159     unsigned Alignment = Origin->getAlignment();
14160     uint64_t Offset = getOffsetFromBase();
14161     if (Offset != 0)
14162       Alignment = MinAlign(Alignment, Alignment + Offset);
14163     return Alignment;
14164   }
14165
14166   /// Check if this slice can be rewritten with legal operations.
14167   bool isLegal() const {
14168     // An invalid slice is not legal.
14169     if (!Origin || !Inst || !DAG)
14170       return false;
14171
14172     // Offsets are for indexed load only, we do not handle that.
14173     if (!Origin->getOffset().isUndef())
14174       return false;
14175
14176     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14177
14178     // Check that the type is legal.
14179     EVT SliceType = getLoadedType();
14180     if (!TLI.isTypeLegal(SliceType))
14181       return false;
14182
14183     // Check that the load is legal for this type.
14184     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
14185       return false;
14186
14187     // Check that the offset can be computed.
14188     // 1. Check its type.
14189     EVT PtrType = Origin->getBasePtr().getValueType();
14190     if (PtrType == MVT::Untyped || PtrType.isExtended())
14191       return false;
14192
14193     // 2. Check that it fits in the immediate.
14194     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
14195       return false;
14196
14197     // 3. Check that the computation is legal.
14198     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
14199       return false;
14200
14201     // Check that the zext is legal if it needs one.
14202     EVT TruncateType = Inst->getValueType(0);
14203     if (TruncateType != SliceType &&
14204         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
14205       return false;
14206
14207     return true;
14208   }
14209
14210   /// Get the offset in bytes of this slice in the original chunk of
14211   /// bits.
14212   /// \pre DAG != nullptr.
14213   uint64_t getOffsetFromBase() const {
14214     assert(DAG && "Missing context.");
14215     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
14216     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
14217     uint64_t Offset = Shift / 8;
14218     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
14219     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
14220            "The size of the original loaded type is not a multiple of a"
14221            " byte.");
14222     // If Offset is bigger than TySizeInBytes, it means we are loading all
14223     // zeros. This should have been optimized before in the process.
14224     assert(TySizeInBytes > Offset &&
14225            "Invalid shift amount for given loaded size");
14226     if (IsBigEndian)
14227       Offset = TySizeInBytes - Offset - getLoadedSize();
14228     return Offset;
14229   }
14230
14231   /// Generate the sequence of instructions to load the slice
14232   /// represented by this object and redirect the uses of this slice to
14233   /// this new sequence of instructions.
14234   /// \pre this->Inst && this->Origin are valid Instructions and this
14235   /// object passed the legal check: LoadedSlice::isLegal returned true.
14236   /// \return The last instruction of the sequence used to load the slice.
14237   SDValue loadSlice() const {
14238     assert(Inst && Origin && "Unable to replace a non-existing slice.");
14239     const SDValue &OldBaseAddr = Origin->getBasePtr();
14240     SDValue BaseAddr = OldBaseAddr;
14241     // Get the offset in that chunk of bytes w.r.t. the endianness.
14242     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
14243     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
14244     if (Offset) {
14245       // BaseAddr = BaseAddr + Offset.
14246       EVT ArithType = BaseAddr.getValueType();
14247       SDLoc DL(Origin);
14248       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
14249                               DAG->getConstant(Offset, DL, ArithType));
14250     }
14251
14252     // Create the type of the loaded slice according to its size.
14253     EVT SliceType = getLoadedType();
14254
14255     // Create the load for the slice.
14256     SDValue LastInst =
14257         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
14258                      Origin->getPointerInfo().getWithOffset(Offset),
14259                      getAlignment(), Origin->getMemOperand()->getFlags());
14260     // If the final type is not the same as the loaded type, this means that
14261     // we have to pad with zero. Create a zero extend for that.
14262     EVT FinalType = Inst->getValueType(0);
14263     if (SliceType != FinalType)
14264       LastInst =
14265           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
14266     return LastInst;
14267   }
14268
14269   /// Check if this slice can be merged with an expensive cross register
14270   /// bank copy. E.g.,
14271   /// i = load i32
14272   /// f = bitcast i32 i to float
14273   bool canMergeExpensiveCrossRegisterBankCopy() const {
14274     if (!Inst || !Inst->hasOneUse())
14275       return false;
14276     SDNode *Use = *Inst->use_begin();
14277     if (Use->getOpcode() != ISD::BITCAST)
14278       return false;
14279     assert(DAG && "Missing context");
14280     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14281     EVT ResVT = Use->getValueType(0);
14282     const TargetRegisterClass *ResRC =
14283         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
14284     const TargetRegisterClass *ArgRC =
14285         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
14286                            Use->getOperand(0)->isDivergent());
14287     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
14288       return false;
14289
14290     // At this point, we know that we perform a cross-register-bank copy.
14291     // Check if it is expensive.
14292     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
14293     // Assume bitcasts are cheap, unless both register classes do not
14294     // explicitly share a common sub class.
14295     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
14296       return false;
14297
14298     // Check if it will be merged with the load.
14299     // 1. Check the alignment constraint.
14300     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
14301         ResVT.getTypeForEVT(*DAG->getContext()));
14302
14303     if (RequiredAlignment > getAlignment())
14304       return false;
14305
14306     // 2. Check that the load is a legal operation for that type.
14307     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
14308       return false;
14309
14310     // 3. Check that we do not have a zext in the way.
14311     if (Inst->getValueType(0) != getLoadedType())
14312       return false;
14313
14314     return true;
14315   }
14316 };
14317
14318 } // end anonymous namespace
14319
14320 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
14321 /// \p UsedBits looks like 0..0 1..1 0..0.
14322 static bool areUsedBitsDense(const APInt &UsedBits) {
14323   // If all the bits are one, this is dense!
14324   if (UsedBits.isAllOnesValue())
14325     return true;
14326
14327   // Get rid of the unused bits on the right.
14328   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
14329   // Get rid of the unused bits on the left.
14330   if (NarrowedUsedBits.countLeadingZeros())
14331     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
14332   // Check that the chunk of bits is completely used.
14333   return NarrowedUsedBits.isAllOnesValue();
14334 }
14335
14336 /// Check whether or not \p First and \p Second are next to each other
14337 /// in memory. This means that there is no hole between the bits loaded
14338 /// by \p First and the bits loaded by \p Second.
14339 static bool areSlicesNextToEachOther(const LoadedSlice &First,
14340                                      const LoadedSlice &Second) {
14341   assert(First.Origin == Second.Origin && First.Origin &&
14342          "Unable to match different memory origins.");
14343   APInt UsedBits = First.getUsedBits();
14344   assert((UsedBits & Second.getUsedBits()) == 0 &&
14345          "Slices are not supposed to overlap.");
14346   UsedBits |= Second.getUsedBits();
14347   return areUsedBitsDense(UsedBits);
14348 }
14349
14350 /// Adjust the \p GlobalLSCost according to the target
14351 /// paring capabilities and the layout of the slices.
14352 /// \pre \p GlobalLSCost should account for at least as many loads as
14353 /// there is in the slices in \p LoadedSlices.
14354 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14355                                  LoadedSlice::Cost &GlobalLSCost) {
14356   unsigned NumberOfSlices = LoadedSlices.size();
14357   // If there is less than 2 elements, no pairing is possible.
14358   if (NumberOfSlices < 2)
14359     return;
14360
14361   // Sort the slices so that elements that are likely to be next to each
14362   // other in memory are next to each other in the list.
14363   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14364     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14365     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14366   });
14367   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14368   // First (resp. Second) is the first (resp. Second) potentially candidate
14369   // to be placed in a paired load.
14370   const LoadedSlice *First = nullptr;
14371   const LoadedSlice *Second = nullptr;
14372   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14373                 // Set the beginning of the pair.
14374                                                            First = Second) {
14375     Second = &LoadedSlices[CurrSlice];
14376
14377     // If First is NULL, it means we start a new pair.
14378     // Get to the next slice.
14379     if (!First)
14380       continue;
14381
14382     EVT LoadedType = First->getLoadedType();
14383
14384     // If the types of the slices are different, we cannot pair them.
14385     if (LoadedType != Second->getLoadedType())
14386       continue;
14387
14388     // Check if the target supplies paired loads for this type.
14389     unsigned RequiredAlignment = 0;
14390     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14391       // move to the next pair, this type is hopeless.
14392       Second = nullptr;
14393       continue;
14394     }
14395     // Check if we meet the alignment requirement.
14396     if (RequiredAlignment > First->getAlignment())
14397       continue;
14398
14399     // Check that both loads are next to each other in memory.
14400     if (!areSlicesNextToEachOther(*First, *Second))
14401       continue;
14402
14403     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14404     --GlobalLSCost.Loads;
14405     // Move to the next pair.
14406     Second = nullptr;
14407   }
14408 }
14409
14410 /// Check the profitability of all involved LoadedSlice.
14411 /// Currently, it is considered profitable if there is exactly two
14412 /// involved slices (1) which are (2) next to each other in memory, and
14413 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14414 ///
14415 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
14416 /// the elements themselves.
14417 ///
14418 /// FIXME: When the cost model will be mature enough, we can relax
14419 /// constraints (1) and (2).
14420 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14421                                 const APInt &UsedBits, bool ForCodeSize) {
14422   unsigned NumberOfSlices = LoadedSlices.size();
14423   if (StressLoadSlicing)
14424     return NumberOfSlices > 1;
14425
14426   // Check (1).
14427   if (NumberOfSlices != 2)
14428     return false;
14429
14430   // Check (2).
14431   if (!areUsedBitsDense(UsedBits))
14432     return false;
14433
14434   // Check (3).
14435   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14436   // The original code has one big load.
14437   OrigCost.Loads = 1;
14438   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14439     const LoadedSlice &LS = LoadedSlices[CurrSlice];
14440     // Accumulate the cost of all the slices.
14441     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14442     GlobalSlicingCost += SliceCost;
14443
14444     // Account as cost in the original configuration the gain obtained
14445     // with the current slices.
14446     OrigCost.addSliceGain(LS);
14447   }
14448
14449   // If the target supports paired load, adjust the cost accordingly.
14450   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14451   return OrigCost > GlobalSlicingCost;
14452 }
14453
14454 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
14455 /// operations, split it in the various pieces being extracted.
14456 ///
14457 /// This sort of thing is introduced by SROA.
14458 /// This slicing takes care not to insert overlapping loads.
14459 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
14460 bool DAGCombiner::SliceUpLoad(SDNode *N) {
14461   if (Level < AfterLegalizeDAG)
14462     return false;
14463
14464   LoadSDNode *LD = cast<LoadSDNode>(N);
14465   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
14466       !LD->getValueType(0).isInteger())
14467     return false;
14468
14469   // Keep track of already used bits to detect overlapping values.
14470   // In that case, we will just abort the transformation.
14471   APInt UsedBits(LD->getValueSizeInBits(0), 0);
14472
14473   SmallVector<LoadedSlice, 4> LoadedSlices;
14474
14475   // Check if this load is used as several smaller chunks of bits.
14476   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14477   // of computation for each trunc.
14478   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14479        UI != UIEnd; ++UI) {
14480     // Skip the uses of the chain.
14481     if (UI.getUse().getResNo() != 0)
14482       continue;
14483
14484     SDNode *User = *UI;
14485     unsigned Shift = 0;
14486
14487     // Check if this is a trunc(lshr).
14488     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
14489         isa<ConstantSDNode>(User->getOperand(1))) {
14490       Shift = User->getConstantOperandVal(1);
14491       User = *User->use_begin();
14492     }
14493
14494     // At this point, User is a Truncate, iff we encountered, trunc or
14495     // trunc(lshr).
14496     if (User->getOpcode() != ISD::TRUNCATE)
14497       return false;
14498
14499     // The width of the type must be a power of 2 and greater than 8-bits.
14500     // Otherwise the load cannot be represented in LLVM IR.
14501     // Moreover, if we shifted with a non-8-bits multiple, the slice
14502     // will be across several bytes. We do not support that.
14503     unsigned Width = User->getValueSizeInBits(0);
14504     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
14505       return false;
14506
14507     // Build the slice for this chain of computations.
14508     LoadedSlice LS(User, LD, Shift, &DAG);
14509     APInt CurrentUsedBits = LS.getUsedBits();
14510
14511     // Check if this slice overlaps with another.
14512     if ((CurrentUsedBits & UsedBits) != 0)
14513       return false;
14514     // Update the bits used globally.
14515     UsedBits |= CurrentUsedBits;
14516
14517     // Check if the new slice would be legal.
14518     if (!LS.isLegal())
14519       return false;
14520
14521     // Record the slice.
14522     LoadedSlices.push_back(LS);
14523   }
14524
14525   // Abort slicing if it does not seem to be profitable.
14526   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
14527     return false;
14528
14529   ++SlicedLoads;
14530
14531   // Rewrite each chain to use an independent load.
14532   // By construction, each chain can be represented by a unique load.
14533
14534   // Prepare the argument for the new token factor for all the slices.
14535   SmallVector<SDValue, 8> ArgChains;
14536   for (SmallVectorImpl<LoadedSlice>::const_iterator
14537            LSIt = LoadedSlices.begin(),
14538            LSItEnd = LoadedSlices.end();
14539        LSIt != LSItEnd; ++LSIt) {
14540     SDValue SliceInst = LSIt->loadSlice();
14541     CombineTo(LSIt->Inst, SliceInst, true);
14542     if (SliceInst.getOpcode() != ISD::LOAD)
14543       SliceInst = SliceInst.getOperand(0);
14544     assert(SliceInst->getOpcode() == ISD::LOAD &&
14545            "It takes more than a zext to get to the loaded slice!!");
14546     ArgChains.push_back(SliceInst.getValue(1));
14547   }
14548
14549   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
14550                               ArgChains);
14551   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14552   AddToWorklist(Chain.getNode());
14553   return true;
14554 }
14555
14556 /// Check to see if V is (and load (ptr), imm), where the load is having
14557 /// specific bytes cleared out.  If so, return the byte size being masked out
14558 /// and the shift amount.
14559 static std::pair<unsigned, unsigned>
14560 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
14561   std::pair<unsigned, unsigned> Result(0, 0);
14562
14563   // Check for the structure we're looking for.
14564   if (V->getOpcode() != ISD::AND ||
14565       !isa<ConstantSDNode>(V->getOperand(1)) ||
14566       !ISD::isNormalLoad(V->getOperand(0).getNode()))
14567     return Result;
14568
14569   // Check the chain and pointer.
14570   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14571   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
14572
14573   // This only handles simple types.
14574   if (V.getValueType() != MVT::i16 &&
14575       V.getValueType() != MVT::i32 &&
14576       V.getValueType() != MVT::i64)
14577     return Result;
14578
14579   // Check the constant mask.  Invert it so that the bits being masked out are
14580   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
14581   // follow the sign bit for uniformity.
14582   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
14583   unsigned NotMaskLZ = countLeadingZeros(NotMask);
14584   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
14585   unsigned NotMaskTZ = countTrailingZeros(NotMask);
14586   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
14587   if (NotMaskLZ == 64) return Result;  // All zero mask.
14588
14589   // See if we have a continuous run of bits.  If so, we have 0*1+0*
14590   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
14591     return Result;
14592
14593   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14594   if (V.getValueType() != MVT::i64 && NotMaskLZ)
14595     NotMaskLZ -= 64-V.getValueSizeInBits();
14596
14597   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14598   switch (MaskedBytes) {
14599   case 1:
14600   case 2:
14601   case 4: break;
14602   default: return Result; // All one mask, or 5-byte mask.
14603   }
14604
14605   // Verify that the first bit starts at a multiple of mask so that the access
14606   // is aligned the same as the access width.
14607   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
14608
14609   // For narrowing to be valid, it must be the case that the load the
14610   // immediately preceding memory operation before the store.
14611   if (LD == Chain.getNode())
14612     ; // ok.
14613   else if (Chain->getOpcode() == ISD::TokenFactor &&
14614            SDValue(LD, 1).hasOneUse()) {
14615     // LD has only 1 chain use so they are no indirect dependencies.
14616     if (!LD->isOperandOf(Chain.getNode()))
14617       return Result;
14618   } else
14619     return Result; // Fail.
14620
14621   Result.first = MaskedBytes;
14622   Result.second = NotMaskTZ/8;
14623   return Result;
14624 }
14625
14626 /// Check to see if IVal is something that provides a value as specified by
14627 /// MaskInfo. If so, replace the specified store with a narrower store of
14628 /// truncated IVal.
14629 static SDValue
14630 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14631                                 SDValue IVal, StoreSDNode *St,
14632                                 DAGCombiner *DC) {
14633   unsigned NumBytes = MaskInfo.first;
14634   unsigned ByteShift = MaskInfo.second;
14635   SelectionDAG &DAG = DC->getDAG();
14636
14637   // Check to see if IVal is all zeros in the part being masked in by the 'or'
14638   // that uses this.  If not, this is not a replacement.
14639   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14640                                   ByteShift*8, (ByteShift+NumBytes)*8);
14641   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
14642
14643   // Check that it is legal on the target to do this.  It is legal if the new
14644   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14645   // legalization (and the target doesn't explicitly think this is a bad idea).
14646   MVT VT = MVT::getIntegerVT(NumBytes * 8);
14647   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14648   if (!DC->isTypeLegal(VT))
14649     return SDValue();
14650   if (St->getMemOperand() &&
14651       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
14652                               *St->getMemOperand()))
14653     return SDValue();
14654
14655   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
14656   // shifted by ByteShift and truncated down to NumBytes.
14657   if (ByteShift) {
14658     SDLoc DL(IVal);
14659     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14660                        DAG.getConstant(ByteShift*8, DL,
14661                                     DC->getShiftAmountTy(IVal.getValueType())));
14662   }
14663
14664   // Figure out the offset for the store and the alignment of the access.
14665   unsigned StOffset;
14666   unsigned NewAlign = St->getAlignment();
14667
14668   if (DAG.getDataLayout().isLittleEndian())
14669     StOffset = ByteShift;
14670   else
14671     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14672
14673   SDValue Ptr = St->getBasePtr();
14674   if (StOffset) {
14675     SDLoc DL(IVal);
14676     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14677                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14678     NewAlign = MinAlign(NewAlign, StOffset);
14679   }
14680
14681   // Truncate down to the new size.
14682   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14683
14684   ++OpsNarrowed;
14685   return DAG
14686       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14687                 St->getPointerInfo().getWithOffset(StOffset), NewAlign);
14688 }
14689
14690 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14691 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14692 /// narrowing the load and store if it would end up being a win for performance
14693 /// or code size.
14694 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14695   StoreSDNode *ST  = cast<StoreSDNode>(N);
14696   if (!ST->isSimple())
14697     return SDValue();
14698
14699   SDValue Chain = ST->getChain();
14700   SDValue Value = ST->getValue();
14701   SDValue Ptr   = ST->getBasePtr();
14702   EVT VT = Value.getValueType();
14703
14704   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14705     return SDValue();
14706
14707   unsigned Opc = Value.getOpcode();
14708
14709   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14710   // is a byte mask indicating a consecutive number of bytes, check to see if
14711   // Y is known to provide just those bytes.  If so, we try to replace the
14712   // load + replace + store sequence with a single (narrower) store, which makes
14713   // the load dead.
14714   if (Opc == ISD::OR) {
14715     std::pair<unsigned, unsigned> MaskedLoad;
14716     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14717     if (MaskedLoad.first)
14718       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14719                                                   Value.getOperand(1), ST,this))
14720         return NewST;
14721
14722     // Or is commutative, so try swapping X and Y.
14723     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14724     if (MaskedLoad.first)
14725       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14726                                                   Value.getOperand(0), ST,this))
14727         return NewST;
14728   }
14729
14730   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14731       Value.getOperand(1).getOpcode() != ISD::Constant)
14732     return SDValue();
14733
14734   SDValue N0 = Value.getOperand(0);
14735   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14736       Chain == SDValue(N0.getNode(), 1)) {
14737     LoadSDNode *LD = cast<LoadSDNode>(N0);
14738     if (LD->getBasePtr() != Ptr ||
14739         LD->getPointerInfo().getAddrSpace() !=
14740         ST->getPointerInfo().getAddrSpace())
14741       return SDValue();
14742
14743     // Find the type to narrow it the load / op / store to.
14744     SDValue N1 = Value.getOperand(1);
14745     unsigned BitWidth = N1.getValueSizeInBits();
14746     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14747     if (Opc == ISD::AND)
14748       Imm ^= APInt::getAllOnesValue(BitWidth);
14749     if (Imm == 0 || Imm.isAllOnesValue())
14750       return SDValue();
14751     unsigned ShAmt = Imm.countTrailingZeros();
14752     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14753     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14754     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14755     // The narrowing should be profitable, the load/store operation should be
14756     // legal (or custom) and the store size should be equal to the NewVT width.
14757     while (NewBW < BitWidth &&
14758            (NewVT.getStoreSizeInBits() != NewBW ||
14759             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14760             !TLI.isNarrowingProfitable(VT, NewVT))) {
14761       NewBW = NextPowerOf2(NewBW);
14762       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14763     }
14764     if (NewBW >= BitWidth)
14765       return SDValue();
14766
14767     // If the lsb changed does not start at the type bitwidth boundary,
14768     // start at the previous one.
14769     if (ShAmt % NewBW)
14770       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14771     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14772                                    std::min(BitWidth, ShAmt + NewBW));
14773     if ((Imm & Mask) == Imm) {
14774       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14775       if (Opc == ISD::AND)
14776         NewImm ^= APInt::getAllOnesValue(NewBW);
14777       uint64_t PtrOff = ShAmt / 8;
14778       // For big endian targets, we need to adjust the offset to the pointer to
14779       // load the correct bytes.
14780       if (DAG.getDataLayout().isBigEndian())
14781         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14782
14783       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14784       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14785       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14786         return SDValue();
14787
14788       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
14789                                    Ptr.getValueType(), Ptr,
14790                                    DAG.getConstant(PtrOff, SDLoc(LD),
14791                                                    Ptr.getValueType()));
14792       SDValue NewLD =
14793           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
14794                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14795                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
14796       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
14797                                    DAG.getConstant(NewImm, SDLoc(Value),
14798                                                    NewVT));
14799       SDValue NewST =
14800           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
14801                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
14802
14803       AddToWorklist(NewPtr.getNode());
14804       AddToWorklist(NewLD.getNode());
14805       AddToWorklist(NewVal.getNode());
14806       WorklistRemover DeadNodes(*this);
14807       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
14808       ++OpsNarrowed;
14809       return NewST;
14810     }
14811   }
14812
14813   return SDValue();
14814 }
14815
14816 /// For a given floating point load / store pair, if the load value isn't used
14817 /// by any other operations, then consider transforming the pair to integer
14818 /// load / store operations if the target deems the transformation profitable.
14819 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
14820   StoreSDNode *ST  = cast<StoreSDNode>(N);
14821   SDValue Value = ST->getValue();
14822   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
14823       Value.hasOneUse()) {
14824     LoadSDNode *LD = cast<LoadSDNode>(Value);
14825     EVT VT = LD->getMemoryVT();
14826     if (!VT.isFloatingPoint() ||
14827         VT != ST->getMemoryVT() ||
14828         LD->isNonTemporal() ||
14829         ST->isNonTemporal() ||
14830         LD->getPointerInfo().getAddrSpace() != 0 ||
14831         ST->getPointerInfo().getAddrSpace() != 0)
14832       return SDValue();
14833
14834     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
14835     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
14836         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
14837         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
14838         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
14839       return SDValue();
14840
14841     unsigned LDAlign = LD->getAlignment();
14842     unsigned STAlign = ST->getAlignment();
14843     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
14844     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
14845     if (LDAlign < ABIAlign || STAlign < ABIAlign)
14846       return SDValue();
14847
14848     SDValue NewLD =
14849         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
14850                     LD->getPointerInfo(), LDAlign);
14851
14852     SDValue NewST =
14853         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
14854                      ST->getPointerInfo(), STAlign);
14855
14856     AddToWorklist(NewLD.getNode());
14857     AddToWorklist(NewST.getNode());
14858     WorklistRemover DeadNodes(*this);
14859     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
14860     ++LdStFP2Int;
14861     return NewST;
14862   }
14863
14864   return SDValue();
14865 }
14866
14867 // This is a helper function for visitMUL to check the profitability
14868 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14869 // MulNode is the original multiply, AddNode is (add x, c1),
14870 // and ConstNode is c2.
14871 //
14872 // If the (add x, c1) has multiple uses, we could increase
14873 // the number of adds if we make this transformation.
14874 // It would only be worth doing this if we can remove a
14875 // multiply in the process. Check for that here.
14876 // To illustrate:
14877 //     (A + c1) * c3
14878 //     (A + c2) * c3
14879 // We're checking for cases where we have common "c3 * A" expressions.
14880 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
14881                                               SDValue &AddNode,
14882                                               SDValue &ConstNode) {
14883   APInt Val;
14884
14885   // If the add only has one use, this would be OK to do.
14886   if (AddNode.getNode()->hasOneUse())
14887     return true;
14888
14889   // Walk all the users of the constant with which we're multiplying.
14890   for (SDNode *Use : ConstNode->uses()) {
14891     if (Use == MulNode) // This use is the one we're on right now. Skip it.
14892       continue;
14893
14894     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
14895       SDNode *OtherOp;
14896       SDNode *MulVar = AddNode.getOperand(0).getNode();
14897
14898       // OtherOp is what we're multiplying against the constant.
14899       if (Use->getOperand(0) == ConstNode)
14900         OtherOp = Use->getOperand(1).getNode();
14901       else
14902         OtherOp = Use->getOperand(0).getNode();
14903
14904       // Check to see if multiply is with the same operand of our "add".
14905       //
14906       //     ConstNode  = CONST
14907       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
14908       //     ...
14909       //     AddNode  = (A + c1)  <-- MulVar is A.
14910       //         = AddNode * ConstNode   <-- current visiting instruction.
14911       //
14912       // If we make this transformation, we will have a common
14913       // multiply (ConstNode * A) that we can save.
14914       if (OtherOp == MulVar)
14915         return true;
14916
14917       // Now check to see if a future expansion will give us a common
14918       // multiply.
14919       //
14920       //     ConstNode  = CONST
14921       //     AddNode    = (A + c1)
14922       //     ...   = AddNode * ConstNode <-- current visiting instruction.
14923       //     ...
14924       //     OtherOp = (A + c2)
14925       //     Use     = OtherOp * ConstNode <-- visiting Use.
14926       //
14927       // If we make this transformation, we will have a common
14928       // multiply (CONST * A) after we also do the same transformation
14929       // to the "t2" instruction.
14930       if (OtherOp->getOpcode() == ISD::ADD &&
14931           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
14932           OtherOp->getOperand(0).getNode() == MulVar)
14933         return true;
14934     }
14935   }
14936
14937   // Didn't find a case where this would be profitable.
14938   return false;
14939 }
14940
14941 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
14942                                          unsigned NumStores) {
14943   SmallVector<SDValue, 8> Chains;
14944   SmallPtrSet<const SDNode *, 8> Visited;
14945   SDLoc StoreDL(StoreNodes[0].MemNode);
14946
14947   for (unsigned i = 0; i < NumStores; ++i) {
14948     Visited.insert(StoreNodes[i].MemNode);
14949   }
14950
14951   // don't include nodes that are children or repeated nodes.
14952   for (unsigned i = 0; i < NumStores; ++i) {
14953     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
14954       Chains.push_back(StoreNodes[i].MemNode->getChain());
14955   }
14956
14957   assert(Chains.size() > 0 && "Chain should have generated a chain");
14958   return DAG.getTokenFactor(StoreDL, Chains);
14959 }
14960
14961 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
14962     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
14963     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
14964   // Make sure we have something to merge.
14965   if (NumStores < 2)
14966     return false;
14967
14968   // The latest Node in the DAG.
14969   SDLoc DL(StoreNodes[0].MemNode);
14970
14971   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
14972   unsigned SizeInBits = NumStores * ElementSizeBits;
14973   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14974
14975   EVT StoreTy;
14976   if (UseVector) {
14977     unsigned Elts = NumStores * NumMemElts;
14978     // Get the type for the merged vector store.
14979     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14980   } else
14981     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
14982
14983   SDValue StoredVal;
14984   if (UseVector) {
14985     if (IsConstantSrc) {
14986       SmallVector<SDValue, 8> BuildVector;
14987       for (unsigned I = 0; I != NumStores; ++I) {
14988         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
14989         SDValue Val = St->getValue();
14990         // If constant is of the wrong type, convert it now.
14991         if (MemVT != Val.getValueType()) {
14992           Val = peekThroughBitcasts(Val);
14993           // Deal with constants of wrong size.
14994           if (ElementSizeBits != Val.getValueSizeInBits()) {
14995             EVT IntMemVT =
14996                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
14997             if (isa<ConstantFPSDNode>(Val)) {
14998               // Not clear how to truncate FP values.
14999               return false;
15000             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15001               Val = DAG.getConstant(C->getAPIntValue()
15002                                         .zextOrTrunc(Val.getValueSizeInBits())
15003                                         .zextOrTrunc(ElementSizeBits),
15004                                     SDLoc(C), IntMemVT);
15005           }
15006           // Make sure correctly size type is the correct type.
15007           Val = DAG.getBitcast(MemVT, Val);
15008         }
15009         BuildVector.push_back(Val);
15010       }
15011       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15012                                                : ISD::BUILD_VECTOR,
15013                               DL, StoreTy, BuildVector);
15014     } else {
15015       SmallVector<SDValue, 8> Ops;
15016       for (unsigned i = 0; i < NumStores; ++i) {
15017         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15018         SDValue Val = peekThroughBitcasts(St->getValue());
15019         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15020         // type MemVT. If the underlying value is not the correct
15021         // type, but it is an extraction of an appropriate vector we
15022         // can recast Val to be of the correct type. This may require
15023         // converting between EXTRACT_VECTOR_ELT and
15024         // EXTRACT_SUBVECTOR.
15025         if ((MemVT != Val.getValueType()) &&
15026             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15027              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15028           EVT MemVTScalarTy = MemVT.getScalarType();
15029           // We may need to add a bitcast here to get types to line up.
15030           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15031             Val = DAG.getBitcast(MemVT, Val);
15032           } else {
15033             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15034                                             : ISD::EXTRACT_VECTOR_ELT;
15035             SDValue Vec = Val.getOperand(0);
15036             SDValue Idx = Val.getOperand(1);
15037             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15038           }
15039         }
15040         Ops.push_back(Val);
15041       }
15042
15043       // Build the extracted vector elements back into a vector.
15044       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15045                                                : ISD::BUILD_VECTOR,
15046                               DL, StoreTy, Ops);
15047     }
15048   } else {
15049     // We should always use a vector store when merging extracted vector
15050     // elements, so this path implies a store of constants.
15051     assert(IsConstantSrc && "Merged vector elements should use vector store");
15052
15053     APInt StoreInt(SizeInBits, 0);
15054
15055     // Construct a single integer constant which is made of the smaller
15056     // constant inputs.
15057     bool IsLE = DAG.getDataLayout().isLittleEndian();
15058     for (unsigned i = 0; i < NumStores; ++i) {
15059       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
15060       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
15061
15062       SDValue Val = St->getValue();
15063       Val = peekThroughBitcasts(Val);
15064       StoreInt <<= ElementSizeBits;
15065       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
15066         StoreInt |= C->getAPIntValue()
15067                         .zextOrTrunc(ElementSizeBits)
15068                         .zextOrTrunc(SizeInBits);
15069       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
15070         StoreInt |= C->getValueAPF()
15071                         .bitcastToAPInt()
15072                         .zextOrTrunc(ElementSizeBits)
15073                         .zextOrTrunc(SizeInBits);
15074         // If fp truncation is necessary give up for now.
15075         if (MemVT.getSizeInBits() != ElementSizeBits)
15076           return false;
15077       } else {
15078         llvm_unreachable("Invalid constant element type");
15079       }
15080     }
15081
15082     // Create the new Load and Store operations.
15083     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
15084   }
15085
15086   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15087   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
15088
15089   // make sure we use trunc store if it's necessary to be legal.
15090   SDValue NewStore;
15091   if (!UseTrunc) {
15092     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
15093                             FirstInChain->getPointerInfo(),
15094                             FirstInChain->getAlignment());
15095   } else { // Must be realized as a trunc store
15096     EVT LegalizedStoredValTy =
15097         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
15098     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
15099     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
15100     SDValue ExtendedStoreVal =
15101         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
15102                         LegalizedStoredValTy);
15103     NewStore = DAG.getTruncStore(
15104         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
15105         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
15106         FirstInChain->getAlignment(),
15107         FirstInChain->getMemOperand()->getFlags());
15108   }
15109
15110   // Replace all merged stores with the new store.
15111   for (unsigned i = 0; i < NumStores; ++i)
15112     CombineTo(StoreNodes[i].MemNode, NewStore);
15113
15114   AddToWorklist(NewChain.getNode());
15115   return true;
15116 }
15117
15118 void DAGCombiner::getStoreMergeCandidates(
15119     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
15120     SDNode *&RootNode) {
15121   // This holds the base pointer, index, and the offset in bytes from the base
15122   // pointer.
15123   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
15124   EVT MemVT = St->getMemoryVT();
15125
15126   SDValue Val = peekThroughBitcasts(St->getValue());
15127   // We must have a base and an offset.
15128   if (!BasePtr.getBase().getNode())
15129     return;
15130
15131   // Do not handle stores to undef base pointers.
15132   if (BasePtr.getBase().isUndef())
15133     return;
15134
15135   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
15136   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15137                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15138   bool IsLoadSrc = isa<LoadSDNode>(Val);
15139   BaseIndexOffset LBasePtr;
15140   // Match on loadbaseptr if relevant.
15141   EVT LoadVT;
15142   if (IsLoadSrc) {
15143     auto *Ld = cast<LoadSDNode>(Val);
15144     LBasePtr = BaseIndexOffset::match(Ld, DAG);
15145     LoadVT = Ld->getMemoryVT();
15146     // Load and store should be the same type.
15147     if (MemVT != LoadVT)
15148       return;
15149     // Loads must only have one use.
15150     if (!Ld->hasNUsesOfValue(1, 0))
15151       return;
15152     // The memory operands must not be volatile/indexed/atomic.
15153     // TODO: May be able to relax for unordered atomics (see D66309)
15154     if (!Ld->isSimple() || Ld->isIndexed())
15155       return;
15156   }
15157   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
15158                             int64_t &Offset) -> bool {
15159     // The memory operands must not be volatile/indexed/atomic.
15160     // TODO: May be able to relax for unordered atomics (see D66309)
15161     if (!Other->isSimple() ||  Other->isIndexed())
15162       return false;
15163     // Don't mix temporal stores with non-temporal stores.
15164     if (St->isNonTemporal() != Other->isNonTemporal())
15165       return false;
15166     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
15167     // Allow merging constants of different types as integers.
15168     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
15169                                            : Other->getMemoryVT() != MemVT;
15170     if (IsLoadSrc) {
15171       if (NoTypeMatch)
15172         return false;
15173       // The Load's Base Ptr must also match
15174       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
15175         BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
15176         if (LoadVT != OtherLd->getMemoryVT())
15177           return false;
15178         // Loads must only have one use.
15179         if (!OtherLd->hasNUsesOfValue(1, 0))
15180           return false;
15181         // The memory operands must not be volatile/indexed/atomic.
15182         // TODO: May be able to relax for unordered atomics (see D66309)
15183         if (!OtherLd->isSimple() ||
15184             OtherLd->isIndexed())
15185           return false;
15186         // Don't mix temporal loads with non-temporal loads.
15187         if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
15188           return false;
15189         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
15190           return false;
15191       } else
15192         return false;
15193     }
15194     if (IsConstantSrc) {
15195       if (NoTypeMatch)
15196         return false;
15197       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
15198         return false;
15199     }
15200     if (IsExtractVecSrc) {
15201       // Do not merge truncated stores here.
15202       if (Other->isTruncatingStore())
15203         return false;
15204       if (!MemVT.bitsEq(OtherBC.getValueType()))
15205         return false;
15206       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
15207           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15208         return false;
15209     }
15210     Ptr = BaseIndexOffset::match(Other, DAG);
15211     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
15212   };
15213
15214   // Check if the pair of StoreNode and the RootNode already bail out many
15215   // times which is over the limit in dependence check.
15216   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
15217                                         SDNode *RootNode) -> bool {
15218     auto RootCount = StoreRootCountMap.find(StoreNode);
15219     if (RootCount != StoreRootCountMap.end() &&
15220         RootCount->second.first == RootNode &&
15221         RootCount->second.second > StoreMergeDependenceLimit)
15222       return true;
15223     return false;
15224   };
15225
15226   // We looking for a root node which is an ancestor to all mergable
15227   // stores. We search up through a load, to our root and then down
15228   // through all children. For instance we will find Store{1,2,3} if
15229   // St is Store1, Store2. or Store3 where the root is not a load
15230   // which always true for nonvolatile ops. TODO: Expand
15231   // the search to find all valid candidates through multiple layers of loads.
15232   //
15233   // Root
15234   // |-------|-------|
15235   // Load    Load    Store3
15236   // |       |
15237   // Store1   Store2
15238   //
15239   // FIXME: We should be able to climb and
15240   // descend TokenFactors to find candidates as well.
15241
15242   RootNode = St->getChain().getNode();
15243
15244   unsigned NumNodesExplored = 0;
15245   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
15246     RootNode = Ldn->getChain().getNode();
15247     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15248          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15249       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
15250         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
15251           if (I2.getOperandNo() == 0)
15252             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
15253               BaseIndexOffset Ptr;
15254               int64_t PtrDiff;
15255               if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15256                   !OverLimitInDependenceCheck(OtherST, RootNode))
15257                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15258             }
15259   } else
15260     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15261          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15262       if (I.getOperandNo() == 0)
15263         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
15264           BaseIndexOffset Ptr;
15265           int64_t PtrDiff;
15266           if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15267               !OverLimitInDependenceCheck(OtherST, RootNode))
15268             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15269         }
15270 }
15271
15272 // We need to check that merging these stores does not cause a loop in
15273 // the DAG. Any store candidate may depend on another candidate
15274 // indirectly through its operand (we already consider dependencies
15275 // through the chain). Check in parallel by searching up from
15276 // non-chain operands of candidates.
15277 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
15278     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
15279     SDNode *RootNode) {
15280   // FIXME: We should be able to truncate a full search of
15281   // predecessors by doing a BFS and keeping tabs the originating
15282   // stores from which worklist nodes come from in a similar way to
15283   // TokenFactor simplfication.
15284
15285   SmallPtrSet<const SDNode *, 32> Visited;
15286   SmallVector<const SDNode *, 8> Worklist;
15287
15288   // RootNode is a predecessor to all candidates so we need not search
15289   // past it. Add RootNode (peeking through TokenFactors). Do not count
15290   // these towards size check.
15291
15292   Worklist.push_back(RootNode);
15293   while (!Worklist.empty()) {
15294     auto N = Worklist.pop_back_val();
15295     if (!Visited.insert(N).second)
15296       continue; // Already present in Visited.
15297     if (N->getOpcode() == ISD::TokenFactor) {
15298       for (SDValue Op : N->ops())
15299         Worklist.push_back(Op.getNode());
15300     }
15301   }
15302
15303   // Don't count pruning nodes towards max.
15304   unsigned int Max = 1024 + Visited.size();
15305   // Search Ops of store candidates.
15306   for (unsigned i = 0; i < NumStores; ++i) {
15307     SDNode *N = StoreNodes[i].MemNode;
15308     // Of the 4 Store Operands:
15309     //   * Chain (Op 0) -> We have already considered these
15310     //                    in candidate selection and can be
15311     //                    safely ignored
15312     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
15313     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
15314     //                       but aren't necessarily fromt the same base node, so
15315     //                       cycles possible (e.g. via indexed store).
15316     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
15317     //               non-indexed stores). Not constant on all targets (e.g. ARM)
15318     //               and so can participate in a cycle.
15319     for (unsigned j = 1; j < N->getNumOperands(); ++j)
15320       Worklist.push_back(N->getOperand(j).getNode());
15321   }
15322   // Search through DAG. We can stop early if we find a store node.
15323   for (unsigned i = 0; i < NumStores; ++i)
15324     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
15325                                      Max)) {
15326       // If the searching bail out, record the StoreNode and RootNode in the
15327       // StoreRootCountMap. If we have seen the pair many times over a limit,
15328       // we won't add the StoreNode into StoreNodes set again.
15329       if (Visited.size() >= Max) {
15330         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
15331         if (RootCount.first == RootNode)
15332           RootCount.second++;
15333         else
15334           RootCount = {RootNode, 1};
15335       }
15336       return false;
15337     }
15338   return true;
15339 }
15340
15341 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
15342   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
15343     return false;
15344
15345   EVT MemVT = St->getMemoryVT();
15346   int64_t ElementSizeBytes = MemVT.getStoreSize();
15347   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15348
15349   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
15350     return false;
15351
15352   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
15353       Attribute::NoImplicitFloat);
15354
15355   // This function cannot currently deal with non-byte-sized memory sizes.
15356   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
15357     return false;
15358
15359   if (!MemVT.isSimple())
15360     return false;
15361
15362   // Perform an early exit check. Do not bother looking at stored values that
15363   // are not constants, loads, or extracted vector elements.
15364   SDValue StoredVal = peekThroughBitcasts(St->getValue());
15365   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
15366   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
15367                        isa<ConstantFPSDNode>(StoredVal);
15368   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15369                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15370   bool IsNonTemporalStore = St->isNonTemporal();
15371   bool IsNonTemporalLoad =
15372       IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
15373
15374   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
15375     return false;
15376
15377   SmallVector<MemOpLink, 8> StoreNodes;
15378   SDNode *RootNode;
15379   // Find potential store merge candidates by searching through chain sub-DAG
15380   getStoreMergeCandidates(St, StoreNodes, RootNode);
15381
15382   // Check if there is anything to merge.
15383   if (StoreNodes.size() < 2)
15384     return false;
15385
15386   // Sort the memory operands according to their distance from the
15387   // base pointer.
15388   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
15389     return LHS.OffsetFromBase < RHS.OffsetFromBase;
15390   });
15391
15392   // Store Merge attempts to merge the lowest stores. This generally
15393   // works out as if successful, as the remaining stores are checked
15394   // after the first collection of stores is merged. However, in the
15395   // case that a non-mergeable store is found first, e.g., {p[-2],
15396   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15397   // mergeable cases. To prevent this, we prune such stores from the
15398   // front of StoreNodes here.
15399
15400   bool RV = false;
15401   while (StoreNodes.size() > 1) {
15402     size_t StartIdx = 0;
15403     while ((StartIdx + 1 < StoreNodes.size()) &&
15404            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15405                StoreNodes[StartIdx + 1].OffsetFromBase)
15406       ++StartIdx;
15407
15408     // Bail if we don't have enough candidates to merge.
15409     if (StartIdx + 1 >= StoreNodes.size())
15410       return RV;
15411
15412     if (StartIdx)
15413       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15414
15415     // Scan the memory operations on the chain and find the first
15416     // non-consecutive store memory address.
15417     unsigned NumConsecutiveStores = 1;
15418     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15419     // Check that the addresses are consecutive starting from the second
15420     // element in the list of stores.
15421     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15422       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15423       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15424         break;
15425       NumConsecutiveStores = i + 1;
15426     }
15427
15428     if (NumConsecutiveStores < 2) {
15429       StoreNodes.erase(StoreNodes.begin(),
15430                        StoreNodes.begin() + NumConsecutiveStores);
15431       continue;
15432     }
15433
15434     // The node with the lowest store address.
15435     LLVMContext &Context = *DAG.getContext();
15436     const DataLayout &DL = DAG.getDataLayout();
15437
15438     // Store the constants into memory as one consecutive store.
15439     if (IsConstantSrc) {
15440       while (NumConsecutiveStores >= 2) {
15441         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15442         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15443         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15444         unsigned LastLegalType = 1;
15445         unsigned LastLegalVectorType = 1;
15446         bool LastIntegerTrunc = false;
15447         bool NonZero = false;
15448         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15449         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15450           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15451           SDValue StoredVal = ST->getValue();
15452           bool IsElementZero = false;
15453           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15454             IsElementZero = C->isNullValue();
15455           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15456             IsElementZero = C->getConstantFPValue()->isNullValue();
15457           if (IsElementZero) {
15458             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
15459               FirstZeroAfterNonZero = i;
15460           }
15461           NonZero |= !IsElementZero;
15462
15463           // Find a legal type for the constant store.
15464           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15465           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15466           bool IsFast = false;
15467
15468           // Break early when size is too large to be legal.
15469           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15470             break;
15471
15472           if (TLI.isTypeLegal(StoreTy) &&
15473               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15474               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15475                                      *FirstInChain->getMemOperand(), &IsFast) &&
15476               IsFast) {
15477             LastIntegerTrunc = false;
15478             LastLegalType = i + 1;
15479             // Or check whether a truncstore is legal.
15480           } else if (TLI.getTypeAction(Context, StoreTy) ==
15481                      TargetLowering::TypePromoteInteger) {
15482             EVT LegalizedStoredValTy =
15483                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15484             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15485                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15486                 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15487                                        *FirstInChain->getMemOperand(),
15488                                        &IsFast) &&
15489                 IsFast) {
15490               LastIntegerTrunc = true;
15491               LastLegalType = i + 1;
15492             }
15493           }
15494
15495           // We only use vectors if the constant is known to be zero or the
15496           // target allows it and the function is not marked with the
15497           // noimplicitfloat attribute.
15498           if ((!NonZero ||
15499                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
15500               !NoVectors) {
15501             // Find a legal type for the vector store.
15502             unsigned Elts = (i + 1) * NumMemElts;
15503             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15504             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
15505                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15506                 TLI.allowsMemoryAccess(
15507                     Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
15508                 IsFast)
15509               LastLegalVectorType = i + 1;
15510           }
15511         }
15512
15513         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
15514         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
15515
15516         // Check if we found a legal integer type that creates a meaningful
15517         // merge.
15518         if (NumElem < 2) {
15519           // We know that candidate stores are in order and of correct
15520           // shape. While there is no mergeable sequence from the
15521           // beginning one may start later in the sequence. The only
15522           // reason a merge of size N could have failed where another of
15523           // the same size would not have, is if the alignment has
15524           // improved or we've dropped a non-zero value. Drop as many
15525           // candidates as we can here.
15526           unsigned NumSkip = 1;
15527           while (
15528               (NumSkip < NumConsecutiveStores) &&
15529               (NumSkip < FirstZeroAfterNonZero) &&
15530               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15531             NumSkip++;
15532
15533           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15534           NumConsecutiveStores -= NumSkip;
15535           continue;
15536         }
15537
15538         // Check that we can merge these candidates without causing a cycle.
15539         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15540                                                       RootNode)) {
15541           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15542           NumConsecutiveStores -= NumElem;
15543           continue;
15544         }
15545
15546         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15547                                               UseVector, LastIntegerTrunc);
15548
15549         // Remove merged stores for next iteration.
15550         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15551         NumConsecutiveStores -= NumElem;
15552       }
15553       continue;
15554     }
15555
15556     // When extracting multiple vector elements, try to store them
15557     // in one vector store rather than a sequence of scalar stores.
15558     if (IsExtractVecSrc) {
15559       // Loop on Consecutive Stores on success.
15560       while (NumConsecutiveStores >= 2) {
15561         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15562         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15563         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15564         unsigned NumStoresToMerge = 1;
15565         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15566           // Find a legal type for the vector store.
15567           unsigned Elts = (i + 1) * NumMemElts;
15568           EVT Ty =
15569               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15570           bool IsFast;
15571
15572           // Break early when size is too large to be legal.
15573           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
15574             break;
15575
15576           if (TLI.isTypeLegal(Ty) &&
15577               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15578               TLI.allowsMemoryAccess(Context, DL, Ty,
15579                                      *FirstInChain->getMemOperand(), &IsFast) &&
15580               IsFast)
15581             NumStoresToMerge = i + 1;
15582         }
15583
15584         // Check if we found a legal integer type creating a meaningful
15585         // merge.
15586         if (NumStoresToMerge < 2) {
15587           // We know that candidate stores are in order and of correct
15588           // shape. While there is no mergeable sequence from the
15589           // beginning one may start later in the sequence. The only
15590           // reason a merge of size N could have failed where another of
15591           // the same size would not have, is if the alignment has
15592           // improved. Drop as many candidates as we can here.
15593           unsigned NumSkip = 1;
15594           while (
15595               (NumSkip < NumConsecutiveStores) &&
15596               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15597             NumSkip++;
15598
15599           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15600           NumConsecutiveStores -= NumSkip;
15601           continue;
15602         }
15603
15604         // Check that we can merge these candidates without causing a cycle.
15605         if (!checkMergeStoreCandidatesForDependencies(
15606                 StoreNodes, NumStoresToMerge, RootNode)) {
15607           StoreNodes.erase(StoreNodes.begin(),
15608                            StoreNodes.begin() + NumStoresToMerge);
15609           NumConsecutiveStores -= NumStoresToMerge;
15610           continue;
15611         }
15612
15613         RV |= MergeStoresOfConstantsOrVecElts(
15614             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
15615
15616         StoreNodes.erase(StoreNodes.begin(),
15617                          StoreNodes.begin() + NumStoresToMerge);
15618         NumConsecutiveStores -= NumStoresToMerge;
15619       }
15620       continue;
15621     }
15622
15623     // Below we handle the case of multiple consecutive stores that
15624     // come from multiple consecutive loads. We merge them into a single
15625     // wide load and a single wide store.
15626
15627     // Look for load nodes which are used by the stored values.
15628     SmallVector<MemOpLink, 8> LoadNodes;
15629
15630     // Find acceptable loads. Loads need to have the same chain (token factor),
15631     // must not be zext, volatile, indexed, and they must be consecutive.
15632     BaseIndexOffset LdBasePtr;
15633
15634     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15635       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15636       SDValue Val = peekThroughBitcasts(St->getValue());
15637       LoadSDNode *Ld = cast<LoadSDNode>(Val);
15638
15639       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15640       // If this is not the first ptr that we check.
15641       int64_t LdOffset = 0;
15642       if (LdBasePtr.getBase().getNode()) {
15643         // The base ptr must be the same.
15644         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15645           break;
15646       } else {
15647         // Check that all other base pointers are the same as this one.
15648         LdBasePtr = LdPtr;
15649       }
15650
15651       // We found a potential memory operand to merge.
15652       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15653     }
15654
15655     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
15656       // If we have load/store pair instructions and we only have two values,
15657       // don't bother merging.
15658       unsigned RequiredAlignment;
15659       if (LoadNodes.size() == 2 &&
15660           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
15661           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
15662         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15663         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15664         break;
15665       }
15666       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15667       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15668       unsigned FirstStoreAlign = FirstInChain->getAlignment();
15669       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15670       unsigned FirstLoadAlign = FirstLoad->getAlignment();
15671
15672       // Scan the memory operations on the chain and find the first
15673       // non-consecutive load memory address. These variables hold the index in
15674       // the store node array.
15675
15676       unsigned LastConsecutiveLoad = 1;
15677
15678       // This variable refers to the size and not index in the array.
15679       unsigned LastLegalVectorType = 1;
15680       unsigned LastLegalIntegerType = 1;
15681       bool isDereferenceable = true;
15682       bool DoIntegerTruncate = false;
15683       StartAddress = LoadNodes[0].OffsetFromBase;
15684       SDValue FirstChain = FirstLoad->getChain();
15685       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15686         // All loads must share the same chain.
15687         if (LoadNodes[i].MemNode->getChain() != FirstChain)
15688           break;
15689
15690         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15691         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15692           break;
15693         LastConsecutiveLoad = i;
15694
15695         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15696           isDereferenceable = false;
15697
15698         // Find a legal type for the vector store.
15699         unsigned Elts = (i + 1) * NumMemElts;
15700         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15701
15702         // Break early when size is too large to be legal.
15703         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15704           break;
15705
15706         bool IsFastSt, IsFastLd;
15707         if (TLI.isTypeLegal(StoreTy) &&
15708             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15709             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15710                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
15711             IsFastSt &&
15712             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15713                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
15714             IsFastLd) {
15715           LastLegalVectorType = i + 1;
15716         }
15717
15718         // Find a legal type for the integer store.
15719         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15720         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15721         if (TLI.isTypeLegal(StoreTy) &&
15722             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15723             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15724                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
15725             IsFastSt &&
15726             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15727                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
15728             IsFastLd) {
15729           LastLegalIntegerType = i + 1;
15730           DoIntegerTruncate = false;
15731           // Or check whether a truncstore and extload is legal.
15732         } else if (TLI.getTypeAction(Context, StoreTy) ==
15733                    TargetLowering::TypePromoteInteger) {
15734           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15735           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15736               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15737               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15738                                  StoreTy) &&
15739               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15740                                  StoreTy) &&
15741               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15742               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15743                                      *FirstInChain->getMemOperand(),
15744                                      &IsFastSt) &&
15745               IsFastSt &&
15746               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15747                                      *FirstLoad->getMemOperand(), &IsFastLd) &&
15748               IsFastLd) {
15749             LastLegalIntegerType = i + 1;
15750             DoIntegerTruncate = true;
15751           }
15752         }
15753       }
15754
15755       // Only use vector types if the vector type is larger than the integer
15756       // type. If they are the same, use integers.
15757       bool UseVectorTy =
15758           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15759       unsigned LastLegalType =
15760           std::max(LastLegalVectorType, LastLegalIntegerType);
15761
15762       // We add +1 here because the LastXXX variables refer to location while
15763       // the NumElem refers to array/index size.
15764       unsigned NumElem =
15765           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15766       NumElem = std::min(LastLegalType, NumElem);
15767
15768       if (NumElem < 2) {
15769         // We know that candidate stores are in order and of correct
15770         // shape. While there is no mergeable sequence from the
15771         // beginning one may start later in the sequence. The only
15772         // reason a merge of size N could have failed where another of
15773         // the same size would not have is if the alignment or either
15774         // the load or store has improved. Drop as many candidates as we
15775         // can here.
15776         unsigned NumSkip = 1;
15777         while ((NumSkip < LoadNodes.size()) &&
15778                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
15779                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15780           NumSkip++;
15781         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15782         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15783         NumConsecutiveStores -= NumSkip;
15784         continue;
15785       }
15786
15787       // Check that we can merge these candidates without causing a cycle.
15788       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15789                                                     RootNode)) {
15790         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15791         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15792         NumConsecutiveStores -= NumElem;
15793         continue;
15794       }
15795
15796       // Find if it is better to use vectors or integers to load and store
15797       // to memory.
15798       EVT JointMemOpVT;
15799       if (UseVectorTy) {
15800         // Find a legal type for the vector store.
15801         unsigned Elts = NumElem * NumMemElts;
15802         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15803       } else {
15804         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15805         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15806       }
15807
15808       SDLoc LoadDL(LoadNodes[0].MemNode);
15809       SDLoc StoreDL(StoreNodes[0].MemNode);
15810
15811       // The merged loads are required to have the same incoming chain, so
15812       // using the first's chain is acceptable.
15813
15814       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
15815       AddToWorklist(NewStoreChain.getNode());
15816
15817       MachineMemOperand::Flags LdMMOFlags =
15818           isDereferenceable ? MachineMemOperand::MODereferenceable
15819                             : MachineMemOperand::MONone;
15820       if (IsNonTemporalLoad)
15821         LdMMOFlags |= MachineMemOperand::MONonTemporal;
15822
15823       MachineMemOperand::Flags StMMOFlags =
15824           IsNonTemporalStore ? MachineMemOperand::MONonTemporal
15825                              : MachineMemOperand::MONone;
15826
15827       SDValue NewLoad, NewStore;
15828       if (UseVectorTy || !DoIntegerTruncate) {
15829         NewLoad =
15830             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
15831                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15832                         FirstLoadAlign, LdMMOFlags);
15833         NewStore = DAG.getStore(
15834             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
15835             FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
15836       } else { // This must be the truncstore/extload case
15837         EVT ExtendedTy =
15838             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
15839         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
15840                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
15841                                  FirstLoad->getPointerInfo(), JointMemOpVT,
15842                                  FirstLoadAlign, LdMMOFlags);
15843         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
15844                                      FirstInChain->getBasePtr(),
15845                                      FirstInChain->getPointerInfo(),
15846                                      JointMemOpVT, FirstInChain->getAlignment(),
15847                                      FirstInChain->getMemOperand()->getFlags());
15848       }
15849
15850       // Transfer chain users from old loads to the new load.
15851       for (unsigned i = 0; i < NumElem; ++i) {
15852         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
15853         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
15854                                       SDValue(NewLoad.getNode(), 1));
15855       }
15856
15857       // Replace the all stores with the new store. Recursively remove
15858       // corresponding value if its no longer used.
15859       for (unsigned i = 0; i < NumElem; ++i) {
15860         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
15861         CombineTo(StoreNodes[i].MemNode, NewStore);
15862         if (Val.getNode()->use_empty())
15863           recursivelyDeleteUnusedNodes(Val.getNode());
15864       }
15865
15866       RV = true;
15867       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15868       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15869       NumConsecutiveStores -= NumElem;
15870     }
15871   }
15872   return RV;
15873 }
15874
15875 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
15876   SDLoc SL(ST);
15877   SDValue ReplStore;
15878
15879   // Replace the chain to avoid dependency.
15880   if (ST->isTruncatingStore()) {
15881     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
15882                                   ST->getBasePtr(), ST->getMemoryVT(),
15883                                   ST->getMemOperand());
15884   } else {
15885     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
15886                              ST->getMemOperand());
15887   }
15888
15889   // Create token to keep both nodes around.
15890   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
15891                               MVT::Other, ST->getChain(), ReplStore);
15892
15893   // Make sure the new and old chains are cleaned up.
15894   AddToWorklist(Token.getNode());
15895
15896   // Don't add users to work list.
15897   return CombineTo(ST, Token, false);
15898 }
15899
15900 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
15901   SDValue Value = ST->getValue();
15902   if (Value.getOpcode() == ISD::TargetConstantFP)
15903     return SDValue();
15904
15905   SDLoc DL(ST);
15906
15907   SDValue Chain = ST->getChain();
15908   SDValue Ptr = ST->getBasePtr();
15909
15910   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
15911
15912   // NOTE: If the original store is volatile, this transform must not increase
15913   // the number of stores.  For example, on x86-32 an f64 can be stored in one
15914   // processor operation but an i64 (which is not legal) requires two.  So the
15915   // transform should not be done in this case.
15916
15917   SDValue Tmp;
15918   switch (CFP->getSimpleValueType(0).SimpleTy) {
15919   default:
15920     llvm_unreachable("Unknown FP type");
15921   case MVT::f16:    // We don't do this for these yet.
15922   case MVT::f80:
15923   case MVT::f128:
15924   case MVT::ppcf128:
15925     return SDValue();
15926   case MVT::f32:
15927     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
15928         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15929       ;
15930       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
15931                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
15932                             MVT::i32);
15933       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
15934     }
15935
15936     return SDValue();
15937   case MVT::f64:
15938     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
15939          ST->isSimple()) ||
15940         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
15941       ;
15942       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
15943                             getZExtValue(), SDLoc(CFP), MVT::i64);
15944       return DAG.getStore(Chain, DL, Tmp,
15945                           Ptr, ST->getMemOperand());
15946     }
15947
15948     if (ST->isSimple() &&
15949         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15950       // Many FP stores are not made apparent until after legalize, e.g. for
15951       // argument passing.  Since this is so common, custom legalize the
15952       // 64-bit integer store into two 32-bit stores.
15953       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
15954       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
15955       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
15956       if (DAG.getDataLayout().isBigEndian())
15957         std::swap(Lo, Hi);
15958
15959       unsigned Alignment = ST->getAlignment();
15960       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15961       AAMDNodes AAInfo = ST->getAAInfo();
15962
15963       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15964                                  ST->getAlignment(), MMOFlags, AAInfo);
15965       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15966                         DAG.getConstant(4, DL, Ptr.getValueType()));
15967       Alignment = MinAlign(Alignment, 4U);
15968       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
15969                                  ST->getPointerInfo().getWithOffset(4),
15970                                  Alignment, MMOFlags, AAInfo);
15971       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
15972                          St0, St1);
15973     }
15974
15975     return SDValue();
15976   }
15977 }
15978
15979 SDValue DAGCombiner::visitSTORE(SDNode *N) {
15980   StoreSDNode *ST  = cast<StoreSDNode>(N);
15981   SDValue Chain = ST->getChain();
15982   SDValue Value = ST->getValue();
15983   SDValue Ptr   = ST->getBasePtr();
15984
15985   // If this is a store of a bit convert, store the input value if the
15986   // resultant store does not need a higher alignment than the original.
15987   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
15988       ST->isUnindexed()) {
15989     EVT SVT = Value.getOperand(0).getValueType();
15990     // If the store is volatile, we only want to change the store type if the
15991     // resulting store is legal. Otherwise we might increase the number of
15992     // memory accesses. We don't care if the original type was legal or not
15993     // as we assume software couldn't rely on the number of accesses of an
15994     // illegal type.
15995     // TODO: May be able to relax for unordered atomics (see D66309)
15996     if (((!LegalOperations && ST->isSimple()) ||
15997          TLI.isOperationLegal(ISD::STORE, SVT)) &&
15998         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
15999                                      DAG, *ST->getMemOperand())) {
16000       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
16001                           ST->getPointerInfo(), ST->getAlignment(),
16002                           ST->getMemOperand()->getFlags(), ST->getAAInfo());
16003     }
16004   }
16005
16006   // Turn 'store undef, Ptr' -> nothing.
16007   if (Value.isUndef() && ST->isUnindexed())
16008     return Chain;
16009
16010   // Try to infer better alignment information than the store already has.
16011   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
16012     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
16013       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
16014         SDValue NewStore =
16015             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
16016                               ST->getMemoryVT(), Align,
16017                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
16018         // NewStore will always be N as we are only refining the alignment
16019         assert(NewStore.getNode() == N);
16020         (void)NewStore;
16021       }
16022     }
16023   }
16024
16025   // Try transforming a pair floating point load / store ops to integer
16026   // load / store ops.
16027   if (SDValue NewST = TransformFPLoadStorePair(N))
16028     return NewST;
16029
16030   // Try transforming several stores into STORE (BSWAP).
16031   if (SDValue Store = MatchStoreCombine(ST))
16032     return Store;
16033
16034   if (ST->isUnindexed()) {
16035     // Walk up chain skipping non-aliasing memory nodes, on this store and any
16036     // adjacent stores.
16037     if (findBetterNeighborChains(ST)) {
16038       // replaceStoreChain uses CombineTo, which handled all of the worklist
16039       // manipulation. Return the original node to not do anything else.
16040       return SDValue(ST, 0);
16041     }
16042     Chain = ST->getChain();
16043   }
16044
16045   // FIXME: is there such a thing as a truncating indexed store?
16046   if (ST->isTruncatingStore() && ST->isUnindexed() &&
16047       Value.getValueType().isInteger() &&
16048       (!isa<ConstantSDNode>(Value) ||
16049        !cast<ConstantSDNode>(Value)->isOpaque())) {
16050     APInt TruncDemandedBits =
16051         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
16052                              ST->getMemoryVT().getScalarSizeInBits());
16053
16054     // See if we can simplify the input to this truncstore with knowledge that
16055     // only the low bits are being used.  For example:
16056     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
16057     AddToWorklist(Value.getNode());
16058     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
16059       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
16060                                ST->getMemOperand());
16061
16062     // Otherwise, see if we can simplify the operation with
16063     // SimplifyDemandedBits, which only works if the value has a single use.
16064     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
16065       // Re-visit the store if anything changed and the store hasn't been merged
16066       // with another node (N is deleted) SimplifyDemandedBits will add Value's
16067       // node back to the worklist if necessary, but we also need to re-visit
16068       // the Store node itself.
16069       if (N->getOpcode() != ISD::DELETED_NODE)
16070         AddToWorklist(N);
16071       return SDValue(N, 0);
16072     }
16073   }
16074
16075   // If this is a load followed by a store to the same location, then the store
16076   // is dead/noop.
16077   // TODO: Can relax for unordered atomics (see D66309)
16078   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
16079     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
16080         ST->isUnindexed() && ST->isSimple() &&
16081         // There can't be any side effects between the load and store, such as
16082         // a call or store.
16083         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
16084       // The store is dead, remove it.
16085       return Chain;
16086     }
16087   }
16088
16089   // TODO: Can relax for unordered atomics (see D66309)
16090   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
16091     if (ST->isUnindexed() && ST->isSimple() &&
16092         ST1->isUnindexed() && ST1->isSimple()) {
16093       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
16094           ST->getMemoryVT() == ST1->getMemoryVT()) {
16095         // If this is a store followed by a store with the same value to the
16096         // same location, then the store is dead/noop.
16097         return Chain;
16098       }
16099
16100       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
16101           !ST1->getBasePtr().isUndef()) {
16102         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
16103         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
16104         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
16105         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
16106         // If this is a store who's preceding store to a subset of the current
16107         // location and no one other node is chained to that store we can
16108         // effectively drop the store. Do not remove stores to undef as they may
16109         // be used as data sinks.
16110         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
16111           CombineTo(ST1, ST1->getChain());
16112           return SDValue();
16113         }
16114
16115         // If ST stores to a subset of preceding store's write set, we may be
16116         // able to fold ST's value into the preceding stored value. As we know
16117         // the other uses of ST1's chain are unconcerned with ST, this folding
16118         // will not affect those nodes.
16119         int64_t BitOffset;
16120         if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
16121                                BitOffset)) {
16122           SDValue ChainValue = ST1->getValue();
16123           if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
16124             if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
16125               APInt Val = C1->getAPIntValue();
16126               APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
16127               // FIXME: Handle Big-endian mode.
16128               if (!DAG.getDataLayout().isBigEndian()) {
16129                 Val.insertBits(InsertVal, BitOffset);
16130                 SDValue NewSDVal =
16131                     DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
16132                                     C1->isTargetOpcode(), C1->isOpaque());
16133                 SDNode *NewST1 = DAG.UpdateNodeOperands(
16134                     ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
16135                     ST1->getOperand(3));
16136                 return CombineTo(ST, SDValue(NewST1, 0));
16137               }
16138             }
16139           }
16140         } // End ST subset of ST1 case.
16141       }
16142     }
16143   }
16144
16145   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
16146   // truncating store.  We can do this even if this is already a truncstore.
16147   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
16148       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
16149       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
16150                             ST->getMemoryVT())) {
16151     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
16152                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
16153   }
16154
16155   // Always perform this optimization before types are legal. If the target
16156   // prefers, also try this after legalization to catch stores that were created
16157   // by intrinsics or other nodes.
16158   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
16159     while (true) {
16160       // There can be multiple store sequences on the same chain.
16161       // Keep trying to merge store sequences until we are unable to do so
16162       // or until we merge the last store on the chain.
16163       bool Changed = MergeConsecutiveStores(ST);
16164       if (!Changed) break;
16165       // Return N as merge only uses CombineTo and no worklist clean
16166       // up is necessary.
16167       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
16168         return SDValue(N, 0);
16169     }
16170   }
16171
16172   // Try transforming N to an indexed store.
16173   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16174     return SDValue(N, 0);
16175
16176   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
16177   //
16178   // Make sure to do this only after attempting to merge stores in order to
16179   //  avoid changing the types of some subset of stores due to visit order,
16180   //  preventing their merging.
16181   if (isa<ConstantFPSDNode>(ST->getValue())) {
16182     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
16183       return NewSt;
16184   }
16185
16186   if (SDValue NewSt = splitMergedValStore(ST))
16187     return NewSt;
16188
16189   return ReduceLoadOpStoreWidth(N);
16190 }
16191
16192 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
16193   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
16194   if (!LifetimeEnd->hasOffset())
16195     return SDValue();
16196
16197   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
16198                                         LifetimeEnd->getOffset(), false);
16199
16200   // We walk up the chains to find stores.
16201   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
16202   while (!Chains.empty()) {
16203     SDValue Chain = Chains.back();
16204     Chains.pop_back();
16205     if (!Chain.hasOneUse())
16206       continue;
16207     switch (Chain.getOpcode()) {
16208     case ISD::TokenFactor:
16209       for (unsigned Nops = Chain.getNumOperands(); Nops;)
16210         Chains.push_back(Chain.getOperand(--Nops));
16211       break;
16212     case ISD::LIFETIME_START:
16213     case ISD::LIFETIME_END:
16214       // We can forward past any lifetime start/end that can be proven not to
16215       // alias the node.
16216       if (!isAlias(Chain.getNode(), N))
16217         Chains.push_back(Chain.getOperand(0));
16218       break;
16219     case ISD::STORE: {
16220       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
16221       // TODO: Can relax for unordered atomics (see D66309)
16222       if (!ST->isSimple() || ST->isIndexed())
16223         continue;
16224       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
16225       // If we store purely within object bounds just before its lifetime ends,
16226       // we can remove the store.
16227       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
16228                                    ST->getMemoryVT().getStoreSizeInBits())) {
16229         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
16230                    dbgs() << "\nwithin LIFETIME_END of : ";
16231                    LifetimeEndBase.dump(); dbgs() << "\n");
16232         CombineTo(ST, ST->getChain());
16233         return SDValue(N, 0);
16234       }
16235     }
16236     }
16237   }
16238   return SDValue();
16239 }
16240
16241 /// For the instruction sequence of store below, F and I values
16242 /// are bundled together as an i64 value before being stored into memory.
16243 /// Sometimes it is more efficent to generate separate stores for F and I,
16244 /// which can remove the bitwise instructions or sink them to colder places.
16245 ///
16246 ///   (store (or (zext (bitcast F to i32) to i64),
16247 ///              (shl (zext I to i64), 32)), addr)  -->
16248 ///   (store F, addr) and (store I, addr+4)
16249 ///
16250 /// Similarly, splitting for other merged store can also be beneficial, like:
16251 /// For pair of {i32, i32}, i64 store --> two i32 stores.
16252 /// For pair of {i32, i16}, i64 store --> two i32 stores.
16253 /// For pair of {i16, i16}, i32 store --> two i16 stores.
16254 /// For pair of {i16, i8},  i32 store --> two i16 stores.
16255 /// For pair of {i8, i8},   i16 store --> two i8 stores.
16256 ///
16257 /// We allow each target to determine specifically which kind of splitting is
16258 /// supported.
16259 ///
16260 /// The store patterns are commonly seen from the simple code snippet below
16261 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
16262 ///   void goo(const std::pair<int, float> &);
16263 ///   hoo() {
16264 ///     ...
16265 ///     goo(std::make_pair(tmp, ftmp));
16266 ///     ...
16267 ///   }
16268 ///
16269 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
16270   if (OptLevel == CodeGenOpt::None)
16271     return SDValue();
16272
16273   SDValue Val = ST->getValue();
16274   SDLoc DL(ST);
16275
16276   // Match OR operand.
16277   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
16278     return SDValue();
16279
16280   // Match SHL operand and get Lower and Higher parts of Val.
16281   SDValue Op1 = Val.getOperand(0);
16282   SDValue Op2 = Val.getOperand(1);
16283   SDValue Lo, Hi;
16284   if (Op1.getOpcode() != ISD::SHL) {
16285     std::swap(Op1, Op2);
16286     if (Op1.getOpcode() != ISD::SHL)
16287       return SDValue();
16288   }
16289   Lo = Op2;
16290   Hi = Op1.getOperand(0);
16291   if (!Op1.hasOneUse())
16292     return SDValue();
16293
16294   // Match shift amount to HalfValBitSize.
16295   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
16296   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
16297   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
16298     return SDValue();
16299
16300   // Lo and Hi are zero-extended from int with size less equal than 32
16301   // to i64.
16302   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
16303       !Lo.getOperand(0).getValueType().isScalarInteger() ||
16304       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
16305       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
16306       !Hi.getOperand(0).getValueType().isScalarInteger() ||
16307       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
16308     return SDValue();
16309
16310   // Use the EVT of low and high parts before bitcast as the input
16311   // of target query.
16312   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
16313                   ? Lo.getOperand(0).getValueType()
16314                   : Lo.getValueType();
16315   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
16316                    ? Hi.getOperand(0).getValueType()
16317                    : Hi.getValueType();
16318   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
16319     return SDValue();
16320
16321   // Start to split store.
16322   unsigned Alignment = ST->getAlignment();
16323   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16324   AAMDNodes AAInfo = ST->getAAInfo();
16325
16326   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
16327   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
16328   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
16329   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
16330
16331   SDValue Chain = ST->getChain();
16332   SDValue Ptr = ST->getBasePtr();
16333   // Lower value store.
16334   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16335                              ST->getAlignment(), MMOFlags, AAInfo);
16336   Ptr =
16337       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16338                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
16339   // Higher value store.
16340   SDValue St1 =
16341       DAG.getStore(St0, DL, Hi, Ptr,
16342                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
16343                    Alignment / 2, MMOFlags, AAInfo);
16344   return St1;
16345 }
16346
16347 /// Convert a disguised subvector insertion into a shuffle:
16348 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
16349   SDValue InsertVal = N->getOperand(1);
16350   SDValue Vec = N->getOperand(0);
16351
16352   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
16353   //   --> (vector_shuffle X, Y)
16354   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
16355       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16356       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
16357     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
16358     ArrayRef<int> Mask = SVN->getMask();
16359
16360     SDValue X = Vec.getOperand(0);
16361     SDValue Y = Vec.getOperand(1);
16362
16363     // Vec's operand 0 is using indices from 0 to N-1 and
16364     // operand 1 from N to 2N - 1, where N is the number of
16365     // elements in the vectors.
16366     int XOffset = -1;
16367     if (InsertVal.getOperand(0) == X) {
16368       XOffset = 0;
16369     } else if (InsertVal.getOperand(0) == Y) {
16370       XOffset = X.getValueType().getVectorNumElements();
16371     }
16372
16373     if (XOffset != -1) {
16374       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
16375
16376       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
16377       NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
16378       assert(NewMask[InsIndex] <
16379                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
16380              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
16381
16382       SDValue LegalShuffle =
16383               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
16384                                           Y, NewMask, DAG);
16385       if (LegalShuffle)
16386         return LegalShuffle;
16387     }
16388   }
16389
16390   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
16391   // bitcast(shuffle (bitcast V), (extended X), Mask)
16392   // Note: We do not use an insert_subvector node because that requires a
16393   // legal subvector type.
16394   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
16395       !InsertVal.getOperand(0).getValueType().isVector())
16396     return SDValue();
16397
16398   SDValue SubVec = InsertVal.getOperand(0);
16399   SDValue DestVec = N->getOperand(0);
16400   EVT SubVecVT = SubVec.getValueType();
16401   EVT VT = DestVec.getValueType();
16402   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
16403   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
16404   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
16405
16406   // Step 1: Create a shuffle mask that implements this insert operation. The
16407   // vector that we are inserting into will be operand 0 of the shuffle, so
16408   // those elements are just 'i'. The inserted subvector is in the first
16409   // positions of operand 1 of the shuffle. Example:
16410   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
16411   SmallVector<int, 16> Mask(NumMaskVals);
16412   for (unsigned i = 0; i != NumMaskVals; ++i) {
16413     if (i / NumSrcElts == InsIndex)
16414       Mask[i] = (i % NumSrcElts) + NumMaskVals;
16415     else
16416       Mask[i] = i;
16417   }
16418
16419   // Bail out if the target can not handle the shuffle we want to create.
16420   EVT SubVecEltVT = SubVecVT.getVectorElementType();
16421   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
16422   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
16423     return SDValue();
16424
16425   // Step 2: Create a wide vector from the inserted source vector by appending
16426   // undefined elements. This is the same size as our destination vector.
16427   SDLoc DL(N);
16428   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
16429   ConcatOps[0] = SubVec;
16430   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
16431
16432   // Step 3: Shuffle in the padded subvector.
16433   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
16434   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
16435   AddToWorklist(PaddedSubV.getNode());
16436   AddToWorklist(DestVecBC.getNode());
16437   AddToWorklist(Shuf.getNode());
16438   return DAG.getBitcast(VT, Shuf);
16439 }
16440
16441 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
16442   SDValue InVec = N->getOperand(0);
16443   SDValue InVal = N->getOperand(1);
16444   SDValue EltNo = N->getOperand(2);
16445   SDLoc DL(N);
16446
16447   // If the inserted element is an UNDEF, just use the input vector.
16448   if (InVal.isUndef())
16449     return InVec;
16450
16451   EVT VT = InVec.getValueType();
16452   unsigned NumElts = VT.getVectorNumElements();
16453
16454   // Remove redundant insertions:
16455   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16456   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16457       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
16458     return InVec;
16459
16460   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16461   if (!IndexC) {
16462     // If this is variable insert to undef vector, it might be better to splat:
16463     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16464     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
16465       SmallVector<SDValue, 8> Ops(NumElts, InVal);
16466       return DAG.getBuildVector(VT, DL, Ops);
16467     }
16468     return SDValue();
16469   }
16470
16471   // We must know which element is being inserted for folds below here.
16472   unsigned Elt = IndexC->getZExtValue();
16473   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16474     return Shuf;
16475
16476   // Canonicalize insert_vector_elt dag nodes.
16477   // Example:
16478   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16479   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16480   //
16481   // Do this only if the child insert_vector node has one use; also
16482   // do this only if indices are both constants and Idx1 < Idx0.
16483   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
16484       && isa<ConstantSDNode>(InVec.getOperand(2))) {
16485     unsigned OtherElt = InVec.getConstantOperandVal(2);
16486     if (Elt < OtherElt) {
16487       // Swap nodes.
16488       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16489                                   InVec.getOperand(0), InVal, EltNo);
16490       AddToWorklist(NewOp.getNode());
16491       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16492                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16493     }
16494   }
16495
16496   // If we can't generate a legal BUILD_VECTOR, exit
16497   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
16498     return SDValue();
16499
16500   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16501   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
16502   // vector elements.
16503   SmallVector<SDValue, 8> Ops;
16504   // Do not combine these two vectors if the output vector will not replace
16505   // the input vector.
16506   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
16507     Ops.append(InVec.getNode()->op_begin(),
16508                InVec.getNode()->op_end());
16509   } else if (InVec.isUndef()) {
16510     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16511   } else {
16512     return SDValue();
16513   }
16514   assert(Ops.size() == NumElts && "Unexpected vector size");
16515
16516   // Insert the element
16517   if (Elt < Ops.size()) {
16518     // All the operands of BUILD_VECTOR must have the same type;
16519     // we enforce that here.
16520     EVT OpVT = Ops[0].getValueType();
16521     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
16522   }
16523
16524   // Return the new vector
16525   return DAG.getBuildVector(VT, DL, Ops);
16526 }
16527
16528 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16529                                                   SDValue EltNo,
16530                                                   LoadSDNode *OriginalLoad) {
16531   assert(OriginalLoad->isSimple());
16532
16533   EVT ResultVT = EVE->getValueType(0);
16534   EVT VecEltVT = InVecVT.getVectorElementType();
16535   unsigned Align = OriginalLoad->getAlignment();
16536   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
16537       VecEltVT.getTypeForEVT(*DAG.getContext()));
16538
16539   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
16540     return SDValue();
16541
16542   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
16543     ISD::NON_EXTLOAD : ISD::EXTLOAD;
16544   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
16545     return SDValue();
16546
16547   Align = NewAlign;
16548
16549   SDValue NewPtr = OriginalLoad->getBasePtr();
16550   SDValue Offset;
16551   EVT PtrType = NewPtr.getValueType();
16552   MachinePointerInfo MPI;
16553   SDLoc DL(EVE);
16554   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
16555     int Elt = ConstEltNo->getZExtValue();
16556     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
16557     Offset = DAG.getConstant(PtrOff, DL, PtrType);
16558     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
16559   } else {
16560     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
16561     Offset = DAG.getNode(
16562         ISD::MUL, DL, PtrType, Offset,
16563         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
16564     // Discard the pointer info except the address space because the memory
16565     // operand can't represent this new access since the offset is variable.
16566     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
16567   }
16568   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
16569
16570   // The replacement we need to do here is a little tricky: we need to
16571   // replace an extractelement of a load with a load.
16572   // Use ReplaceAllUsesOfValuesWith to do the replacement.
16573   // Note that this replacement assumes that the extractvalue is the only
16574   // use of the load; that's okay because we don't want to perform this
16575   // transformation in other cases anyway.
16576   SDValue Load;
16577   SDValue Chain;
16578   if (ResultVT.bitsGT(VecEltVT)) {
16579     // If the result type of vextract is wider than the load, then issue an
16580     // extending load instead.
16581     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
16582                                                   VecEltVT)
16583                                    ? ISD::ZEXTLOAD
16584                                    : ISD::EXTLOAD;
16585     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
16586                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
16587                           Align, OriginalLoad->getMemOperand()->getFlags(),
16588                           OriginalLoad->getAAInfo());
16589     Chain = Load.getValue(1);
16590   } else {
16591     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
16592                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
16593                        OriginalLoad->getAAInfo());
16594     Chain = Load.getValue(1);
16595     if (ResultVT.bitsLT(VecEltVT))
16596       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
16597     else
16598       Load = DAG.getBitcast(ResultVT, Load);
16599   }
16600   WorklistRemover DeadNodes(*this);
16601   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
16602   SDValue To[] = { Load, Chain };
16603   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
16604   // Make sure to revisit this node to clean it up; it will usually be dead.
16605   AddToWorklist(EVE);
16606   // Since we're explicitly calling ReplaceAllUses, add the new node to the
16607   // worklist explicitly as well.
16608   AddUsersToWorklist(Load.getNode()); // Add users too
16609   AddToWorklist(Load.getNode());
16610   ++OpsNarrowed;
16611   return SDValue(EVE, 0);
16612 }
16613
16614 /// Transform a vector binary operation into a scalar binary operation by moving
16615 /// the math/logic after an extract element of a vector.
16616 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
16617                                        bool LegalOperations) {
16618   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16619   SDValue Vec = ExtElt->getOperand(0);
16620   SDValue Index = ExtElt->getOperand(1);
16621   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16622   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
16623       Vec.getNode()->getNumValues() != 1)
16624     return SDValue();
16625
16626   // Targets may want to avoid this to prevent an expensive register transfer.
16627   if (!TLI.shouldScalarizeBinop(Vec))
16628     return SDValue();
16629
16630   // Extracting an element of a vector constant is constant-folded, so this
16631   // transform is just replacing a vector op with a scalar op while moving the
16632   // extract.
16633   SDValue Op0 = Vec.getOperand(0);
16634   SDValue Op1 = Vec.getOperand(1);
16635   if (isAnyConstantBuildVector(Op0, true) ||
16636       isAnyConstantBuildVector(Op1, true)) {
16637     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16638     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16639     SDLoc DL(ExtElt);
16640     EVT VT = ExtElt->getValueType(0);
16641     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
16642     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
16643     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
16644   }
16645
16646   return SDValue();
16647 }
16648
16649 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
16650   SDValue VecOp = N->getOperand(0);
16651   SDValue Index = N->getOperand(1);
16652   EVT ScalarVT = N->getValueType(0);
16653   EVT VecVT = VecOp.getValueType();
16654   if (VecOp.isUndef())
16655     return DAG.getUNDEF(ScalarVT);
16656
16657   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16658   //
16659   // This only really matters if the index is non-constant since other combines
16660   // on the constant elements already work.
16661   SDLoc DL(N);
16662   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
16663       Index == VecOp.getOperand(2)) {
16664     SDValue Elt = VecOp.getOperand(1);
16665     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
16666   }
16667
16668   // (vextract (scalar_to_vector val, 0) -> val
16669   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16670     // Check if the result type doesn't match the inserted element type. A
16671     // SCALAR_TO_VECTOR may truncate the inserted element and the
16672     // EXTRACT_VECTOR_ELT may widen the extracted vector.
16673     SDValue InOp = VecOp.getOperand(0);
16674     if (InOp.getValueType() != ScalarVT) {
16675       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16676       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16677     }
16678     return InOp;
16679   }
16680
16681   // extract_vector_elt of out-of-bounds element -> UNDEF
16682   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16683   unsigned NumElts = VecVT.getVectorNumElements();
16684   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
16685     return DAG.getUNDEF(ScalarVT);
16686
16687   // extract_vector_elt (build_vector x, y), 1 -> y
16688   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
16689       TLI.isTypeLegal(VecVT) &&
16690       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
16691     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
16692     EVT InEltVT = Elt.getValueType();
16693
16694     // Sometimes build_vector's scalar input types do not match result type.
16695     if (ScalarVT == InEltVT)
16696       return Elt;
16697
16698     // TODO: It may be useful to truncate if free if the build_vector implicitly
16699     // converts.
16700   }
16701
16702   // TODO: These transforms should not require the 'hasOneUse' restriction, but
16703   // there are regressions on multiple targets without it. We can end up with a
16704   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16705   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
16706       VecOp.hasOneUse()) {
16707     // The vector index of the LSBs of the source depend on the endian-ness.
16708     bool IsLE = DAG.getDataLayout().isLittleEndian();
16709     unsigned ExtractIndex = IndexC->getZExtValue();
16710     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16711     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
16712     SDValue BCSrc = VecOp.getOperand(0);
16713     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
16714       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
16715
16716     if (LegalTypes && BCSrc.getValueType().isInteger() &&
16717         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16718       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16719       // trunc i64 X to i32
16720       SDValue X = BCSrc.getOperand(0);
16721       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
16722              "Extract element and scalar to vector can't change element type "
16723              "from FP to integer.");
16724       unsigned XBitWidth = X.getValueSizeInBits();
16725       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
16726       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
16727
16728       // An extract element return value type can be wider than its vector
16729       // operand element type. In that case, the high bits are undefined, so
16730       // it's possible that we may need to extend rather than truncate.
16731       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
16732         assert(XBitWidth % VecEltBitWidth == 0 &&
16733                "Scalar bitwidth must be a multiple of vector element bitwidth");
16734         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16735       }
16736     }
16737   }
16738
16739   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16740     return BO;
16741
16742   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16743   // We only perform this optimization before the op legalization phase because
16744   // we may introduce new vector instructions which are not backed by TD
16745   // patterns. For example on AVX, extracting elements from a wide vector
16746   // without using extract_subvector. However, if we can find an underlying
16747   // scalar value, then we can always use that.
16748   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16749     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16750     // Find the new index to extract from.
16751     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16752
16753     // Extracting an undef index is undef.
16754     if (OrigElt == -1)
16755       return DAG.getUNDEF(ScalarVT);
16756
16757     // Select the right vector half to extract from.
16758     SDValue SVInVec;
16759     if (OrigElt < (int)NumElts) {
16760       SVInVec = VecOp.getOperand(0);
16761     } else {
16762       SVInVec = VecOp.getOperand(1);
16763       OrigElt -= NumElts;
16764     }
16765
16766     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16767       SDValue InOp = SVInVec.getOperand(OrigElt);
16768       if (InOp.getValueType() != ScalarVT) {
16769         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16770         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16771       }
16772
16773       return InOp;
16774     }
16775
16776     // FIXME: We should handle recursing on other vector shuffles and
16777     // scalar_to_vector here as well.
16778
16779     if (!LegalOperations ||
16780         // FIXME: Should really be just isOperationLegalOrCustom.
16781         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
16782         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
16783       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16784       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16785                          DAG.getConstant(OrigElt, DL, IndexTy));
16786     }
16787   }
16788
16789   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16790   // simplify it based on the (valid) extraction indices.
16791   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
16792         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16793                Use->getOperand(0) == VecOp &&
16794                isa<ConstantSDNode>(Use->getOperand(1));
16795       })) {
16796     APInt DemandedElts = APInt::getNullValue(NumElts);
16797     for (SDNode *Use : VecOp->uses()) {
16798       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16799       if (CstElt->getAPIntValue().ult(NumElts))
16800         DemandedElts.setBit(CstElt->getZExtValue());
16801     }
16802     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16803       // We simplified the vector operand of this extract element. If this
16804       // extract is not dead, visit it again so it is folded properly.
16805       if (N->getOpcode() != ISD::DELETED_NODE)
16806         AddToWorklist(N);
16807       return SDValue(N, 0);
16808     }
16809   }
16810
16811   // Everything under here is trying to match an extract of a loaded value.
16812   // If the result of load has to be truncated, then it's not necessarily
16813   // profitable.
16814   bool BCNumEltsChanged = false;
16815   EVT ExtVT = VecVT.getVectorElementType();
16816   EVT LVT = ExtVT;
16817   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
16818     return SDValue();
16819
16820   if (VecOp.getOpcode() == ISD::BITCAST) {
16821     // Don't duplicate a load with other uses.
16822     if (!VecOp.hasOneUse())
16823       return SDValue();
16824
16825     EVT BCVT = VecOp.getOperand(0).getValueType();
16826     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
16827       return SDValue();
16828     if (NumElts != BCVT.getVectorNumElements())
16829       BCNumEltsChanged = true;
16830     VecOp = VecOp.getOperand(0);
16831     ExtVT = BCVT.getVectorElementType();
16832   }
16833
16834   // extract (vector load $addr), i --> load $addr + i * size
16835   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
16836       ISD::isNormalLoad(VecOp.getNode()) &&
16837       !Index->hasPredecessor(VecOp.getNode())) {
16838     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16839     if (VecLoad && VecLoad->isSimple())
16840       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16841   }
16842
16843   // Perform only after legalization to ensure build_vector / vector_shuffle
16844   // optimizations have already been done.
16845   if (!LegalOperations || !IndexC)
16846     return SDValue();
16847
16848   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16849   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16850   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16851   int Elt = IndexC->getZExtValue();
16852   LoadSDNode *LN0 = nullptr;
16853   if (ISD::isNormalLoad(VecOp.getNode())) {
16854     LN0 = cast<LoadSDNode>(VecOp);
16855   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16856              VecOp.getOperand(0).getValueType() == ExtVT &&
16857              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
16858     // Don't duplicate a load with other uses.
16859     if (!VecOp.hasOneUse())
16860       return SDValue();
16861
16862     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
16863   }
16864   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
16865     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
16866     // =>
16867     // (load $addr+1*size)
16868
16869     // Don't duplicate a load with other uses.
16870     if (!VecOp.hasOneUse())
16871       return SDValue();
16872
16873     // If the bit convert changed the number of elements, it is unsafe
16874     // to examine the mask.
16875     if (BCNumEltsChanged)
16876       return SDValue();
16877
16878     // Select the input vector, guarding against out of range extract vector.
16879     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
16880     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
16881
16882     if (VecOp.getOpcode() == ISD::BITCAST) {
16883       // Don't duplicate a load with other uses.
16884       if (!VecOp.hasOneUse())
16885         return SDValue();
16886
16887       VecOp = VecOp.getOperand(0);
16888     }
16889     if (ISD::isNormalLoad(VecOp.getNode())) {
16890       LN0 = cast<LoadSDNode>(VecOp);
16891       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
16892       Index = DAG.getConstant(Elt, DL, Index.getValueType());
16893     }
16894   }
16895
16896   // Make sure we found a non-volatile load and the extractelement is
16897   // the only use.
16898   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
16899     return SDValue();
16900
16901   // If Idx was -1 above, Elt is going to be -1, so just return undef.
16902   if (Elt == -1)
16903     return DAG.getUNDEF(LVT);
16904
16905   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
16906 }
16907
16908 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
16909 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
16910   // We perform this optimization post type-legalization because
16911   // the type-legalizer often scalarizes integer-promoted vectors.
16912   // Performing this optimization before may create bit-casts which
16913   // will be type-legalized to complex code sequences.
16914   // We perform this optimization only before the operation legalizer because we
16915   // may introduce illegal operations.
16916   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
16917     return SDValue();
16918
16919   unsigned NumInScalars = N->getNumOperands();
16920   SDLoc DL(N);
16921   EVT VT = N->getValueType(0);
16922
16923   // Check to see if this is a BUILD_VECTOR of a bunch of values
16924   // which come from any_extend or zero_extend nodes. If so, we can create
16925   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
16926   // optimizations. We do not handle sign-extend because we can't fill the sign
16927   // using shuffles.
16928   EVT SourceType = MVT::Other;
16929   bool AllAnyExt = true;
16930
16931   for (unsigned i = 0; i != NumInScalars; ++i) {
16932     SDValue In = N->getOperand(i);
16933     // Ignore undef inputs.
16934     if (In.isUndef()) continue;
16935
16936     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
16937     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
16938
16939     // Abort if the element is not an extension.
16940     if (!ZeroExt && !AnyExt) {
16941       SourceType = MVT::Other;
16942       break;
16943     }
16944
16945     // The input is a ZeroExt or AnyExt. Check the original type.
16946     EVT InTy = In.getOperand(0).getValueType();
16947
16948     // Check that all of the widened source types are the same.
16949     if (SourceType == MVT::Other)
16950       // First time.
16951       SourceType = InTy;
16952     else if (InTy != SourceType) {
16953       // Multiple income types. Abort.
16954       SourceType = MVT::Other;
16955       break;
16956     }
16957
16958     // Check if all of the extends are ANY_EXTENDs.
16959     AllAnyExt &= AnyExt;
16960   }
16961
16962   // In order to have valid types, all of the inputs must be extended from the
16963   // same source type and all of the inputs must be any or zero extend.
16964   // Scalar sizes must be a power of two.
16965   EVT OutScalarTy = VT.getScalarType();
16966   bool ValidTypes = SourceType != MVT::Other &&
16967                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
16968                  isPowerOf2_32(SourceType.getSizeInBits());
16969
16970   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
16971   // turn into a single shuffle instruction.
16972   if (!ValidTypes)
16973     return SDValue();
16974
16975   bool isLE = DAG.getDataLayout().isLittleEndian();
16976   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
16977   assert(ElemRatio > 1 && "Invalid element size ratio");
16978   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
16979                                DAG.getConstant(0, DL, SourceType);
16980
16981   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
16982   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
16983
16984   // Populate the new build_vector
16985   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16986     SDValue Cast = N->getOperand(i);
16987     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
16988             Cast.getOpcode() == ISD::ZERO_EXTEND ||
16989             Cast.isUndef()) && "Invalid cast opcode");
16990     SDValue In;
16991     if (Cast.isUndef())
16992       In = DAG.getUNDEF(SourceType);
16993     else
16994       In = Cast->getOperand(0);
16995     unsigned Index = isLE ? (i * ElemRatio) :
16996                             (i * ElemRatio + (ElemRatio - 1));
16997
16998     assert(Index < Ops.size() && "Invalid index");
16999     Ops[Index] = In;
17000   }
17001
17002   // The type of the new BUILD_VECTOR node.
17003   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
17004   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
17005          "Invalid vector size");
17006   // Check if the new vector type is legal.
17007   if (!isTypeLegal(VecVT) ||
17008       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
17009        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
17010     return SDValue();
17011
17012   // Make the new BUILD_VECTOR.
17013   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
17014
17015   // The new BUILD_VECTOR node has the potential to be further optimized.
17016   AddToWorklist(BV.getNode());
17017   // Bitcast to the desired type.
17018   return DAG.getBitcast(VT, BV);
17019 }
17020
17021 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
17022                                            ArrayRef<int> VectorMask,
17023                                            SDValue VecIn1, SDValue VecIn2,
17024                                            unsigned LeftIdx, bool DidSplitVec) {
17025   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17026   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
17027
17028   EVT VT = N->getValueType(0);
17029   EVT InVT1 = VecIn1.getValueType();
17030   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
17031
17032   unsigned NumElems = VT.getVectorNumElements();
17033   unsigned ShuffleNumElems = NumElems;
17034
17035   // If we artificially split a vector in two already, then the offsets in the
17036   // operands will all be based off of VecIn1, even those in VecIn2.
17037   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
17038
17039   // We can't generate a shuffle node with mismatched input and output types.
17040   // Try to make the types match the type of the output.
17041   if (InVT1 != VT || InVT2 != VT) {
17042     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
17043       // If the output vector length is a multiple of both input lengths,
17044       // we can concatenate them and pad the rest with undefs.
17045       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
17046       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
17047       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
17048       ConcatOps[0] = VecIn1;
17049       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
17050       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17051       VecIn2 = SDValue();
17052     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
17053       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
17054         return SDValue();
17055
17056       if (!VecIn2.getNode()) {
17057         // If we only have one input vector, and it's twice the size of the
17058         // output, split it in two.
17059         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
17060                              DAG.getConstant(NumElems, DL, IdxTy));
17061         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
17062         // Since we now have shorter input vectors, adjust the offset of the
17063         // second vector's start.
17064         Vec2Offset = NumElems;
17065       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
17066         // VecIn1 is wider than the output, and we have another, possibly
17067         // smaller input. Pad the smaller input with undefs, shuffle at the
17068         // input vector width, and extract the output.
17069         // The shuffle type is different than VT, so check legality again.
17070         if (LegalOperations &&
17071             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
17072           return SDValue();
17073
17074         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
17075         // lower it back into a BUILD_VECTOR. So if the inserted type is
17076         // illegal, don't even try.
17077         if (InVT1 != InVT2) {
17078           if (!TLI.isTypeLegal(InVT2))
17079             return SDValue();
17080           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
17081                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
17082         }
17083         ShuffleNumElems = NumElems * 2;
17084       } else {
17085         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
17086         // than VecIn1. We can't handle this for now - this case will disappear
17087         // when we start sorting the vectors by type.
17088         return SDValue();
17089       }
17090     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
17091                InVT1.getSizeInBits() == VT.getSizeInBits()) {
17092       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
17093       ConcatOps[0] = VecIn2;
17094       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17095     } else {
17096       // TODO: Support cases where the length mismatch isn't exactly by a
17097       // factor of 2.
17098       // TODO: Move this check upwards, so that if we have bad type
17099       // mismatches, we don't create any DAG nodes.
17100       return SDValue();
17101     }
17102   }
17103
17104   // Initialize mask to undef.
17105   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
17106
17107   // Only need to run up to the number of elements actually used, not the
17108   // total number of elements in the shuffle - if we are shuffling a wider
17109   // vector, the high lanes should be set to undef.
17110   for (unsigned i = 0; i != NumElems; ++i) {
17111     if (VectorMask[i] <= 0)
17112       continue;
17113
17114     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
17115     if (VectorMask[i] == (int)LeftIdx) {
17116       Mask[i] = ExtIndex;
17117     } else if (VectorMask[i] == (int)LeftIdx + 1) {
17118       Mask[i] = Vec2Offset + ExtIndex;
17119     }
17120   }
17121
17122   // The type the input vectors may have changed above.
17123   InVT1 = VecIn1.getValueType();
17124
17125   // If we already have a VecIn2, it should have the same type as VecIn1.
17126   // If we don't, get an undef/zero vector of the appropriate type.
17127   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
17128   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
17129
17130   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
17131   if (ShuffleNumElems > NumElems)
17132     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
17133
17134   return Shuffle;
17135 }
17136
17137 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
17138   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
17139
17140   // First, determine where the build vector is not undef.
17141   // TODO: We could extend this to handle zero elements as well as undefs.
17142   int NumBVOps = BV->getNumOperands();
17143   int ZextElt = -1;
17144   for (int i = 0; i != NumBVOps; ++i) {
17145     SDValue Op = BV->getOperand(i);
17146     if (Op.isUndef())
17147       continue;
17148     if (ZextElt == -1)
17149       ZextElt = i;
17150     else
17151       return SDValue();
17152   }
17153   // Bail out if there's no non-undef element.
17154   if (ZextElt == -1)
17155     return SDValue();
17156
17157   // The build vector contains some number of undef elements and exactly
17158   // one other element. That other element must be a zero-extended scalar
17159   // extracted from a vector at a constant index to turn this into a shuffle.
17160   // Also, require that the build vector does not implicitly truncate/extend
17161   // its elements.
17162   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
17163   EVT VT = BV->getValueType(0);
17164   SDValue Zext = BV->getOperand(ZextElt);
17165   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
17166       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17167       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
17168       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
17169     return SDValue();
17170
17171   // The zero-extend must be a multiple of the source size, and we must be
17172   // building a vector of the same size as the source of the extract element.
17173   SDValue Extract = Zext.getOperand(0);
17174   unsigned DestSize = Zext.getValueSizeInBits();
17175   unsigned SrcSize = Extract.getValueSizeInBits();
17176   if (DestSize % SrcSize != 0 ||
17177       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
17178     return SDValue();
17179
17180   // Create a shuffle mask that will combine the extracted element with zeros
17181   // and undefs.
17182   int ZextRatio = DestSize / SrcSize;
17183   int NumMaskElts = NumBVOps * ZextRatio;
17184   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
17185   for (int i = 0; i != NumMaskElts; ++i) {
17186     if (i / ZextRatio == ZextElt) {
17187       // The low bits of the (potentially translated) extracted element map to
17188       // the source vector. The high bits map to zero. We will use a zero vector
17189       // as the 2nd source operand of the shuffle, so use the 1st element of
17190       // that vector (mask value is number-of-elements) for the high bits.
17191       if (i % ZextRatio == 0)
17192         ShufMask[i] = Extract.getConstantOperandVal(1);
17193       else
17194         ShufMask[i] = NumMaskElts;
17195     }
17196
17197     // Undef elements of the build vector remain undef because we initialize
17198     // the shuffle mask with -1.
17199   }
17200
17201   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
17202   // bitcast (shuffle V, ZeroVec, VectorMask)
17203   SDLoc DL(BV);
17204   EVT VecVT = Extract.getOperand(0).getValueType();
17205   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
17206   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17207   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
17208                                              ZeroVec, ShufMask, DAG);
17209   if (!Shuf)
17210     return SDValue();
17211   return DAG.getBitcast(VT, Shuf);
17212 }
17213
17214 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
17215 // operations. If the types of the vectors we're extracting from allow it,
17216 // turn this into a vector_shuffle node.
17217 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
17218   SDLoc DL(N);
17219   EVT VT = N->getValueType(0);
17220
17221   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
17222   if (!isTypeLegal(VT))
17223     return SDValue();
17224
17225   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
17226     return V;
17227
17228   // May only combine to shuffle after legalize if shuffle is legal.
17229   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
17230     return SDValue();
17231
17232   bool UsesZeroVector = false;
17233   unsigned NumElems = N->getNumOperands();
17234
17235   // Record, for each element of the newly built vector, which input vector
17236   // that element comes from. -1 stands for undef, 0 for the zero vector,
17237   // and positive values for the input vectors.
17238   // VectorMask maps each element to its vector number, and VecIn maps vector
17239   // numbers to their initial SDValues.
17240
17241   SmallVector<int, 8> VectorMask(NumElems, -1);
17242   SmallVector<SDValue, 8> VecIn;
17243   VecIn.push_back(SDValue());
17244
17245   for (unsigned i = 0; i != NumElems; ++i) {
17246     SDValue Op = N->getOperand(i);
17247
17248     if (Op.isUndef())
17249       continue;
17250
17251     // See if we can use a blend with a zero vector.
17252     // TODO: Should we generalize this to a blend with an arbitrary constant
17253     // vector?
17254     if (isNullConstant(Op) || isNullFPConstant(Op)) {
17255       UsesZeroVector = true;
17256       VectorMask[i] = 0;
17257       continue;
17258     }
17259
17260     // Not an undef or zero. If the input is something other than an
17261     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
17262     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17263         !isa<ConstantSDNode>(Op.getOperand(1)))
17264       return SDValue();
17265     SDValue ExtractedFromVec = Op.getOperand(0);
17266
17267     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
17268     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
17269       return SDValue();
17270
17271     // All inputs must have the same element type as the output.
17272     if (VT.getVectorElementType() !=
17273         ExtractedFromVec.getValueType().getVectorElementType())
17274       return SDValue();
17275
17276     // Have we seen this input vector before?
17277     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
17278     // a map back from SDValues to numbers isn't worth it.
17279     unsigned Idx = std::distance(
17280         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
17281     if (Idx == VecIn.size())
17282       VecIn.push_back(ExtractedFromVec);
17283
17284     VectorMask[i] = Idx;
17285   }
17286
17287   // If we didn't find at least one input vector, bail out.
17288   if (VecIn.size() < 2)
17289     return SDValue();
17290
17291   // If all the Operands of BUILD_VECTOR extract from same
17292   // vector, then split the vector efficiently based on the maximum
17293   // vector access index and adjust the VectorMask and
17294   // VecIn accordingly.
17295   bool DidSplitVec = false;
17296   if (VecIn.size() == 2) {
17297     unsigned MaxIndex = 0;
17298     unsigned NearestPow2 = 0;
17299     SDValue Vec = VecIn.back();
17300     EVT InVT = Vec.getValueType();
17301     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17302     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
17303
17304     for (unsigned i = 0; i < NumElems; i++) {
17305       if (VectorMask[i] <= 0)
17306         continue;
17307       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
17308       IndexVec[i] = Index;
17309       MaxIndex = std::max(MaxIndex, Index);
17310     }
17311
17312     NearestPow2 = PowerOf2Ceil(MaxIndex);
17313     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
17314         NumElems * 2 < NearestPow2) {
17315       unsigned SplitSize = NearestPow2 / 2;
17316       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
17317                                      InVT.getVectorElementType(), SplitSize);
17318       if (TLI.isTypeLegal(SplitVT)) {
17319         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17320                                      DAG.getConstant(SplitSize, DL, IdxTy));
17321         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17322                                      DAG.getConstant(0, DL, IdxTy));
17323         VecIn.pop_back();
17324         VecIn.push_back(VecIn1);
17325         VecIn.push_back(VecIn2);
17326         DidSplitVec = true;
17327
17328         for (unsigned i = 0; i < NumElems; i++) {
17329           if (VectorMask[i] <= 0)
17330             continue;
17331           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
17332         }
17333       }
17334     }
17335   }
17336
17337   // TODO: We want to sort the vectors by descending length, so that adjacent
17338   // pairs have similar length, and the longer vector is always first in the
17339   // pair.
17340
17341   // TODO: Should this fire if some of the input vectors has illegal type (like
17342   // it does now), or should we let legalization run its course first?
17343
17344   // Shuffle phase:
17345   // Take pairs of vectors, and shuffle them so that the result has elements
17346   // from these vectors in the correct places.
17347   // For example, given:
17348   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
17349   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
17350   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
17351   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
17352   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
17353   // We will generate:
17354   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
17355   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
17356   SmallVector<SDValue, 4> Shuffles;
17357   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
17358     unsigned LeftIdx = 2 * In + 1;
17359     SDValue VecLeft = VecIn[LeftIdx];
17360     SDValue VecRight =
17361         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
17362
17363     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
17364                                                 VecRight, LeftIdx, DidSplitVec))
17365       Shuffles.push_back(Shuffle);
17366     else
17367       return SDValue();
17368   }
17369
17370   // If we need the zero vector as an "ingredient" in the blend tree, add it
17371   // to the list of shuffles.
17372   if (UsesZeroVector)
17373     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
17374                                       : DAG.getConstantFP(0.0, DL, VT));
17375
17376   // If we only have one shuffle, we're done.
17377   if (Shuffles.size() == 1)
17378     return Shuffles[0];
17379
17380   // Update the vector mask to point to the post-shuffle vectors.
17381   for (int &Vec : VectorMask)
17382     if (Vec == 0)
17383       Vec = Shuffles.size() - 1;
17384     else
17385       Vec = (Vec - 1) / 2;
17386
17387   // More than one shuffle. Generate a binary tree of blends, e.g. if from
17388   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
17389   // generate:
17390   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
17391   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
17392   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
17393   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
17394   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
17395   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
17396   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
17397
17398   // Make sure the initial size of the shuffle list is even.
17399   if (Shuffles.size() % 2)
17400     Shuffles.push_back(DAG.getUNDEF(VT));
17401
17402   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
17403     if (CurSize % 2) {
17404       Shuffles[CurSize] = DAG.getUNDEF(VT);
17405       CurSize++;
17406     }
17407     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
17408       int Left = 2 * In;
17409       int Right = 2 * In + 1;
17410       SmallVector<int, 8> Mask(NumElems, -1);
17411       for (unsigned i = 0; i != NumElems; ++i) {
17412         if (VectorMask[i] == Left) {
17413           Mask[i] = i;
17414           VectorMask[i] = In;
17415         } else if (VectorMask[i] == Right) {
17416           Mask[i] = i + NumElems;
17417           VectorMask[i] = In;
17418         }
17419       }
17420
17421       Shuffles[In] =
17422           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
17423     }
17424   }
17425   return Shuffles[0];
17426 }
17427
17428 // Try to turn a build vector of zero extends of extract vector elts into a
17429 // a vector zero extend and possibly an extract subvector.
17430 // TODO: Support sign extend?
17431 // TODO: Allow undef elements?
17432 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
17433   if (LegalOperations)
17434     return SDValue();
17435
17436   EVT VT = N->getValueType(0);
17437
17438   bool FoundZeroExtend = false;
17439   SDValue Op0 = N->getOperand(0);
17440   auto checkElem = [&](SDValue Op) -> int64_t {
17441     unsigned Opc = Op.getOpcode();
17442     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
17443     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
17444         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17445         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
17446       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17447         return C->getZExtValue();
17448     return -1;
17449   };
17450
17451   // Make sure the first element matches
17452   // (zext (extract_vector_elt X, C))
17453   int64_t Offset = checkElem(Op0);
17454   if (Offset < 0)
17455     return SDValue();
17456
17457   unsigned NumElems = N->getNumOperands();
17458   SDValue In = Op0.getOperand(0).getOperand(0);
17459   EVT InSVT = In.getValueType().getScalarType();
17460   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
17461
17462   // Don't create an illegal input type after type legalization.
17463   if (LegalTypes && !TLI.isTypeLegal(InVT))
17464     return SDValue();
17465
17466   // Ensure all the elements come from the same vector and are adjacent.
17467   for (unsigned i = 1; i != NumElems; ++i) {
17468     if ((Offset + i) != checkElem(N->getOperand(i)))
17469       return SDValue();
17470   }
17471
17472   SDLoc DL(N);
17473   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
17474                    Op0.getOperand(0).getOperand(1));
17475   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
17476                      VT, In);
17477 }
17478
17479 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
17480   EVT VT = N->getValueType(0);
17481
17482   // A vector built entirely of undefs is undef.
17483   if (ISD::allOperandsUndef(N))
17484     return DAG.getUNDEF(VT);
17485
17486   // If this is a splat of a bitcast from another vector, change to a
17487   // concat_vector.
17488   // For example:
17489   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17490   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17491   //
17492   // If X is a build_vector itself, the concat can become a larger build_vector.
17493   // TODO: Maybe this is useful for non-splat too?
17494   if (!LegalOperations) {
17495     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17496       Splat = peekThroughBitcasts(Splat);
17497       EVT SrcVT = Splat.getValueType();
17498       if (SrcVT.isVector()) {
17499         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
17500         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
17501                                      SrcVT.getVectorElementType(), NumElts);
17502         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
17503           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
17504           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
17505                                        NewVT, Ops);
17506           return DAG.getBitcast(VT, Concat);
17507         }
17508       }
17509     }
17510   }
17511
17512   // Check if we can express BUILD VECTOR via subvector extract.
17513   if (!LegalTypes && (N->getNumOperands() > 1)) {
17514     SDValue Op0 = N->getOperand(0);
17515     auto checkElem = [&](SDValue Op) -> uint64_t {
17516       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
17517           (Op0.getOperand(0) == Op.getOperand(0)))
17518         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
17519           return CNode->getZExtValue();
17520       return -1;
17521     };
17522
17523     int Offset = checkElem(Op0);
17524     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
17525       if (Offset + i != checkElem(N->getOperand(i))) {
17526         Offset = -1;
17527         break;
17528       }
17529     }
17530
17531     if ((Offset == 0) &&
17532         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
17533       return Op0.getOperand(0);
17534     if ((Offset != -1) &&
17535         ((Offset % N->getValueType(0).getVectorNumElements()) ==
17536          0)) // IDX must be multiple of output size.
17537       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
17538                          Op0.getOperand(0), Op0.getOperand(1));
17539   }
17540
17541   if (SDValue V = convertBuildVecZextToZext(N))
17542     return V;
17543
17544   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
17545     return V;
17546
17547   if (SDValue V = reduceBuildVecToShuffle(N))
17548     return V;
17549
17550   return SDValue();
17551 }
17552
17553 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
17554   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17555   EVT OpVT = N->getOperand(0).getValueType();
17556
17557   // If the operands are legal vectors, leave them alone.
17558   if (TLI.isTypeLegal(OpVT))
17559     return SDValue();
17560
17561   SDLoc DL(N);
17562   EVT VT = N->getValueType(0);
17563   SmallVector<SDValue, 8> Ops;
17564
17565   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
17566   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17567
17568   // Keep track of what we encounter.
17569   bool AnyInteger = false;
17570   bool AnyFP = false;
17571   for (const SDValue &Op : N->ops()) {
17572     if (ISD::BITCAST == Op.getOpcode() &&
17573         !Op.getOperand(0).getValueType().isVector())
17574       Ops.push_back(Op.getOperand(0));
17575     else if (ISD::UNDEF == Op.getOpcode())
17576       Ops.push_back(ScalarUndef);
17577     else
17578       return SDValue();
17579
17580     // Note whether we encounter an integer or floating point scalar.
17581     // If it's neither, bail out, it could be something weird like x86mmx.
17582     EVT LastOpVT = Ops.back().getValueType();
17583     if (LastOpVT.isFloatingPoint())
17584       AnyFP = true;
17585     else if (LastOpVT.isInteger())
17586       AnyInteger = true;
17587     else
17588       return SDValue();
17589   }
17590
17591   // If any of the operands is a floating point scalar bitcast to a vector,
17592   // use floating point types throughout, and bitcast everything.
17593   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17594   if (AnyFP) {
17595     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
17596     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17597     if (AnyInteger) {
17598       for (SDValue &Op : Ops) {
17599         if (Op.getValueType() == SVT)
17600           continue;
17601         if (Op.isUndef())
17602           Op = ScalarUndef;
17603         else
17604           Op = DAG.getBitcast(SVT, Op);
17605       }
17606     }
17607   }
17608
17609   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
17610                                VT.getSizeInBits() / SVT.getSizeInBits());
17611   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
17612 }
17613
17614 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17615 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17616 // most two distinct vectors the same size as the result, attempt to turn this
17617 // into a legal shuffle.
17618 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
17619   EVT VT = N->getValueType(0);
17620   EVT OpVT = N->getOperand(0).getValueType();
17621   int NumElts = VT.getVectorNumElements();
17622   int NumOpElts = OpVT.getVectorNumElements();
17623
17624   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
17625   SmallVector<int, 8> Mask;
17626
17627   for (SDValue Op : N->ops()) {
17628     Op = peekThroughBitcasts(Op);
17629
17630     // UNDEF nodes convert to UNDEF shuffle mask values.
17631     if (Op.isUndef()) {
17632       Mask.append((unsigned)NumOpElts, -1);
17633       continue;
17634     }
17635
17636     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17637       return SDValue();
17638
17639     // What vector are we extracting the subvector from and at what index?
17640     SDValue ExtVec = Op.getOperand(0);
17641
17642     // We want the EVT of the original extraction to correctly scale the
17643     // extraction index.
17644     EVT ExtVT = ExtVec.getValueType();
17645     ExtVec = peekThroughBitcasts(ExtVec);
17646
17647     // UNDEF nodes convert to UNDEF shuffle mask values.
17648     if (ExtVec.isUndef()) {
17649       Mask.append((unsigned)NumOpElts, -1);
17650       continue;
17651     }
17652
17653     if (!isa<ConstantSDNode>(Op.getOperand(1)))
17654       return SDValue();
17655     int ExtIdx = Op.getConstantOperandVal(1);
17656
17657     // Ensure that we are extracting a subvector from a vector the same
17658     // size as the result.
17659     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
17660       return SDValue();
17661
17662     // Scale the subvector index to account for any bitcast.
17663     int NumExtElts = ExtVT.getVectorNumElements();
17664     if (0 == (NumExtElts % NumElts))
17665       ExtIdx /= (NumExtElts / NumElts);
17666     else if (0 == (NumElts % NumExtElts))
17667       ExtIdx *= (NumElts / NumExtElts);
17668     else
17669       return SDValue();
17670
17671     // At most we can reference 2 inputs in the final shuffle.
17672     if (SV0.isUndef() || SV0 == ExtVec) {
17673       SV0 = ExtVec;
17674       for (int i = 0; i != NumOpElts; ++i)
17675         Mask.push_back(i + ExtIdx);
17676     } else if (SV1.isUndef() || SV1 == ExtVec) {
17677       SV1 = ExtVec;
17678       for (int i = 0; i != NumOpElts; ++i)
17679         Mask.push_back(i + ExtIdx + NumElts);
17680     } else {
17681       return SDValue();
17682     }
17683   }
17684
17685   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17686   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
17687                                      DAG.getBitcast(VT, SV1), Mask, DAG);
17688 }
17689
17690 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
17691   // If we only have one input vector, we don't need to do any concatenation.
17692   if (N->getNumOperands() == 1)
17693     return N->getOperand(0);
17694
17695   // Check if all of the operands are undefs.
17696   EVT VT = N->getValueType(0);
17697   if (ISD::allOperandsUndef(N))
17698     return DAG.getUNDEF(VT);
17699
17700   // Optimize concat_vectors where all but the first of the vectors are undef.
17701   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
17702         return Op.isUndef();
17703       })) {
17704     SDValue In = N->getOperand(0);
17705     assert(In.getValueType().isVector() && "Must concat vectors");
17706
17707     // If the input is a concat_vectors, just make a larger concat by padding
17708     // with smaller undefs.
17709     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
17710       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
17711       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
17712       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
17713       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17714     }
17715
17716     SDValue Scalar = peekThroughOneUseBitcasts(In);
17717
17718     // concat_vectors(scalar_to_vector(scalar), undef) ->
17719     //     scalar_to_vector(scalar)
17720     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17721          Scalar.hasOneUse()) {
17722       EVT SVT = Scalar.getValueType().getVectorElementType();
17723       if (SVT == Scalar.getOperand(0).getValueType())
17724         Scalar = Scalar.getOperand(0);
17725     }
17726
17727     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17728     if (!Scalar.getValueType().isVector()) {
17729       // If the bitcast type isn't legal, it might be a trunc of a legal type;
17730       // look through the trunc so we can still do the transform:
17731       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17732       if (Scalar->getOpcode() == ISD::TRUNCATE &&
17733           !TLI.isTypeLegal(Scalar.getValueType()) &&
17734           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
17735         Scalar = Scalar->getOperand(0);
17736
17737       EVT SclTy = Scalar.getValueType();
17738
17739       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17740         return SDValue();
17741
17742       // Bail out if the vector size is not a multiple of the scalar size.
17743       if (VT.getSizeInBits() % SclTy.getSizeInBits())
17744         return SDValue();
17745
17746       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17747       if (VNTNumElms < 2)
17748         return SDValue();
17749
17750       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17751       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17752         return SDValue();
17753
17754       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17755       return DAG.getBitcast(VT, Res);
17756     }
17757   }
17758
17759   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17760   // We have already tested above for an UNDEF only concatenation.
17761   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17762   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17763   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17764     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17765   };
17766   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17767     SmallVector<SDValue, 8> Opnds;
17768     EVT SVT = VT.getScalarType();
17769
17770     EVT MinVT = SVT;
17771     if (!SVT.isFloatingPoint()) {
17772       // If BUILD_VECTOR are from built from integer, they may have different
17773       // operand types. Get the smallest type and truncate all operands to it.
17774       bool FoundMinVT = false;
17775       for (const SDValue &Op : N->ops())
17776         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17777           EVT OpSVT = Op.getOperand(0).getValueType();
17778           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
17779           FoundMinVT = true;
17780         }
17781       assert(FoundMinVT && "Concat vector type mismatch");
17782     }
17783
17784     for (const SDValue &Op : N->ops()) {
17785       EVT OpVT = Op.getValueType();
17786       unsigned NumElts = OpVT.getVectorNumElements();
17787
17788       if (ISD::UNDEF == Op.getOpcode())
17789         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17790
17791       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17792         if (SVT.isFloatingPoint()) {
17793           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17794           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17795         } else {
17796           for (unsigned i = 0; i != NumElts; ++i)
17797             Opnds.push_back(
17798                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17799         }
17800       }
17801     }
17802
17803     assert(VT.getVectorNumElements() == Opnds.size() &&
17804            "Concat vector type mismatch");
17805     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17806   }
17807
17808   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17809   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
17810     return V;
17811
17812   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17813   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17814     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
17815       return V;
17816
17817   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17818   // nodes often generate nop CONCAT_VECTOR nodes.
17819   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17820   // place the incoming vectors at the exact same location.
17821   SDValue SingleSource = SDValue();
17822   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17823
17824   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17825     SDValue Op = N->getOperand(i);
17826
17827     if (Op.isUndef())
17828       continue;
17829
17830     // Check if this is the identity extract:
17831     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17832       return SDValue();
17833
17834     // Find the single incoming vector for the extract_subvector.
17835     if (SingleSource.getNode()) {
17836       if (Op.getOperand(0) != SingleSource)
17837         return SDValue();
17838     } else {
17839       SingleSource = Op.getOperand(0);
17840
17841       // Check the source type is the same as the type of the result.
17842       // If not, this concat may extend the vector, so we can not
17843       // optimize it away.
17844       if (SingleSource.getValueType() != N->getValueType(0))
17845         return SDValue();
17846     }
17847
17848     auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17849     // The extract index must be constant.
17850     if (!CS)
17851       return SDValue();
17852
17853     // Check that we are reading from the identity index.
17854     unsigned IdentityIndex = i * PartNumElem;
17855     if (CS->getAPIntValue() != IdentityIndex)
17856       return SDValue();
17857   }
17858
17859   if (SingleSource.getNode())
17860     return SingleSource;
17861
17862   return SDValue();
17863 }
17864
17865 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
17866 // if the subvector can be sourced for free.
17867 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
17868   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
17869       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
17870     return V.getOperand(1);
17871   }
17872   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17873   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
17874       V.getOperand(0).getValueType() == SubVT &&
17875       (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
17876     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
17877     return V.getOperand(SubIdx);
17878   }
17879   return SDValue();
17880 }
17881
17882 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
17883                                               SelectionDAG &DAG) {
17884   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17885   SDValue BinOp = Extract->getOperand(0);
17886   unsigned BinOpcode = BinOp.getOpcode();
17887   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
17888     return SDValue();
17889
17890   EVT VecVT = BinOp.getValueType();
17891   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
17892   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
17893     return SDValue();
17894
17895   SDValue Index = Extract->getOperand(1);
17896   EVT SubVT = Extract->getValueType(0);
17897   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
17898     return SDValue();
17899
17900   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
17901   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
17902
17903   // TODO: We could handle the case where only 1 operand is being inserted by
17904   //       creating an extract of the other operand, but that requires checking
17905   //       number of uses and/or costs.
17906   if (!Sub0 || !Sub1)
17907     return SDValue();
17908
17909   // We are inserting both operands of the wide binop only to extract back
17910   // to the narrow vector size. Eliminate all of the insert/extract:
17911   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
17912   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
17913                      BinOp->getFlags());
17914 }
17915
17916 /// If we are extracting a subvector produced by a wide binary operator try
17917 /// to use a narrow binary operator and/or avoid concatenation and extraction.
17918 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
17919   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
17920   // some of these bailouts with other transforms.
17921
17922   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
17923     return V;
17924
17925   // The extract index must be a constant, so we can map it to a concat operand.
17926   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17927   if (!ExtractIndexC)
17928     return SDValue();
17929
17930   // We are looking for an optionally bitcasted wide vector binary operator
17931   // feeding an extract subvector.
17932   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17933   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
17934   unsigned BOpcode = BinOp.getOpcode();
17935   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
17936     return SDValue();
17937
17938   // The binop must be a vector type, so we can extract some fraction of it.
17939   EVT WideBVT = BinOp.getValueType();
17940   if (!WideBVT.isVector())
17941     return SDValue();
17942
17943   EVT VT = Extract->getValueType(0);
17944   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
17945   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
17946          "Extract index is not a multiple of the vector length.");
17947
17948   // Bail out if this is not a proper multiple width extraction.
17949   unsigned WideWidth = WideBVT.getSizeInBits();
17950   unsigned NarrowWidth = VT.getSizeInBits();
17951   if (WideWidth % NarrowWidth != 0)
17952     return SDValue();
17953
17954   // Bail out if we are extracting a fraction of a single operation. This can
17955   // occur because we potentially looked through a bitcast of the binop.
17956   unsigned NarrowingRatio = WideWidth / NarrowWidth;
17957   unsigned WideNumElts = WideBVT.getVectorNumElements();
17958   if (WideNumElts % NarrowingRatio != 0)
17959     return SDValue();
17960
17961   // Bail out if the target does not support a narrower version of the binop.
17962   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
17963                                    WideNumElts / NarrowingRatio);
17964   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
17965     return SDValue();
17966
17967   // If extraction is cheap, we don't need to look at the binop operands
17968   // for concat ops. The narrow binop alone makes this transform profitable.
17969   // We can't just reuse the original extract index operand because we may have
17970   // bitcasted.
17971   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
17972   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
17973   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
17974   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
17975       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
17976     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
17977     SDLoc DL(Extract);
17978     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
17979     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17980                             BinOp.getOperand(0), NewExtIndex);
17981     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17982                             BinOp.getOperand(1), NewExtIndex);
17983     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
17984                                       BinOp.getNode()->getFlags());
17985     return DAG.getBitcast(VT, NarrowBinOp);
17986   }
17987
17988   // Only handle the case where we are doubling and then halving. A larger ratio
17989   // may require more than two narrow binops to replace the wide binop.
17990   if (NarrowingRatio != 2)
17991     return SDValue();
17992
17993   // TODO: The motivating case for this transform is an x86 AVX1 target. That
17994   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
17995   // flavors, but no other 256-bit integer support. This could be extended to
17996   // handle any binop, but that may require fixing/adding other folds to avoid
17997   // codegen regressions.
17998   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
17999     return SDValue();
18000
18001   // We need at least one concatenation operation of a binop operand to make
18002   // this transform worthwhile. The concat must double the input vector sizes.
18003   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
18004     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
18005       return V.getOperand(ConcatOpNum);
18006     return SDValue();
18007   };
18008   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
18009   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
18010
18011   if (SubVecL || SubVecR) {
18012     // If a binop operand was not the result of a concat, we must extract a
18013     // half-sized operand for our new narrow binop:
18014     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
18015     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
18016     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
18017     SDLoc DL(Extract);
18018     SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18019     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
18020                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18021                                       BinOp.getOperand(0), IndexC);
18022
18023     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
18024                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18025                                       BinOp.getOperand(1), IndexC);
18026
18027     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
18028     return DAG.getBitcast(VT, NarrowBinOp);
18029   }
18030
18031   return SDValue();
18032 }
18033
18034 /// If we are extracting a subvector from a wide vector load, convert to a
18035 /// narrow load to eliminate the extraction:
18036 /// (extract_subvector (load wide vector)) --> (load narrow vector)
18037 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
18038   // TODO: Add support for big-endian. The offset calculation must be adjusted.
18039   if (DAG.getDataLayout().isBigEndian())
18040     return SDValue();
18041
18042   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
18043   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18044   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
18045       !ExtIdx)
18046     return SDValue();
18047
18048   // Allow targets to opt-out.
18049   EVT VT = Extract->getValueType(0);
18050   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18051   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
18052     return SDValue();
18053
18054   // The narrow load will be offset from the base address of the old load if
18055   // we are extracting from something besides index 0 (little-endian).
18056   SDLoc DL(Extract);
18057   SDValue BaseAddr = Ld->getOperand(1);
18058   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
18059
18060   // TODO: Use "BaseIndexOffset" to make this more effective.
18061   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
18062   MachineFunction &MF = DAG.getMachineFunction();
18063   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
18064                                                    VT.getStoreSize());
18065   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
18066   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
18067   return NewLd;
18068 }
18069
18070 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
18071   EVT NVT = N->getValueType(0);
18072   SDValue V = N->getOperand(0);
18073
18074   // Extract from UNDEF is UNDEF.
18075   if (V.isUndef())
18076     return DAG.getUNDEF(NVT);
18077
18078   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
18079     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
18080       return NarrowLoad;
18081
18082   // Combine an extract of an extract into a single extract_subvector.
18083   // ext (ext X, C), 0 --> ext X, C
18084   SDValue Index = N->getOperand(1);
18085   if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18086       V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
18087     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
18088                                     V.getConstantOperandVal(1)) &&
18089         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
18090       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
18091                          V.getOperand(1));
18092     }
18093   }
18094
18095   // Try to move vector bitcast after extract_subv by scaling extraction index:
18096   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
18097   if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
18098       V.getOperand(0).getValueType().isVector()) {
18099     SDValue SrcOp = V.getOperand(0);
18100     EVT SrcVT = SrcOp.getValueType();
18101     unsigned SrcNumElts = SrcVT.getVectorNumElements();
18102     unsigned DestNumElts = V.getValueType().getVectorNumElements();
18103     if ((SrcNumElts % DestNumElts) == 0) {
18104       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
18105       unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
18106       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
18107                                       NewExtNumElts);
18108       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
18109         unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
18110         SDLoc DL(N);
18111         SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
18112         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18113                                          V.getOperand(0), NewIndex);
18114         return DAG.getBitcast(NVT, NewExtract);
18115       }
18116     }
18117     // TODO - handle (DestNumElts % SrcNumElts) == 0
18118   }
18119
18120   // Combine:
18121   //    (extract_subvec (concat V1, V2, ...), i)
18122   // Into:
18123   //    Vi if possible
18124   // Only operand 0 is checked as 'concat' assumes all inputs of the same
18125   // type.
18126   if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
18127       V.getOperand(0).getValueType() == NVT) {
18128     unsigned Idx = N->getConstantOperandVal(1);
18129     unsigned NumElems = NVT.getVectorNumElements();
18130     assert((Idx % NumElems) == 0 &&
18131            "IDX in concat is not a multiple of the result vector length.");
18132     return V->getOperand(Idx / NumElems);
18133   }
18134
18135   V = peekThroughBitcasts(V);
18136
18137   // If the input is a build vector. Try to make a smaller build vector.
18138   if (V.getOpcode() == ISD::BUILD_VECTOR) {
18139     if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
18140       EVT InVT = V.getValueType();
18141       unsigned ExtractSize = NVT.getSizeInBits();
18142       unsigned EltSize = InVT.getScalarSizeInBits();
18143       // Only do this if we won't split any elements.
18144       if (ExtractSize % EltSize == 0) {
18145         unsigned NumElems = ExtractSize / EltSize;
18146         EVT EltVT = InVT.getVectorElementType();
18147         EVT ExtractVT = NumElems == 1 ? EltVT
18148                                       : EVT::getVectorVT(*DAG.getContext(),
18149                                                          EltVT, NumElems);
18150         if ((Level < AfterLegalizeDAG ||
18151              (NumElems == 1 ||
18152               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
18153             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
18154           unsigned IdxVal = IdxC->getZExtValue();
18155           IdxVal *= NVT.getScalarSizeInBits();
18156           IdxVal /= EltSize;
18157
18158           if (NumElems == 1) {
18159             SDValue Src = V->getOperand(IdxVal);
18160             if (EltVT != Src.getValueType())
18161               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
18162             return DAG.getBitcast(NVT, Src);
18163           }
18164
18165           // Extract the pieces from the original build_vector.
18166           SDValue BuildVec = DAG.getBuildVector(
18167               ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
18168           return DAG.getBitcast(NVT, BuildVec);
18169         }
18170       }
18171     }
18172   }
18173
18174   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
18175     // Handle only simple case where vector being inserted and vector
18176     // being extracted are of same size.
18177     EVT SmallVT = V.getOperand(1).getValueType();
18178     if (!NVT.bitsEq(SmallVT))
18179       return SDValue();
18180
18181     // Only handle cases where both indexes are constants.
18182     auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
18183     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
18184     if (InsIdx && ExtIdx) {
18185       // Combine:
18186       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
18187       // Into:
18188       //    indices are equal or bit offsets are equal => V1
18189       //    otherwise => (extract_subvec V1, ExtIdx)
18190       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
18191           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
18192         return DAG.getBitcast(NVT, V.getOperand(1));
18193       return DAG.getNode(
18194           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
18195           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
18196           Index);
18197     }
18198   }
18199
18200   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
18201     return NarrowBOp;
18202
18203   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18204     return SDValue(N, 0);
18205
18206   return SDValue();
18207 }
18208
18209 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
18210 /// followed by concatenation. Narrow vector ops may have better performance
18211 /// than wide ops, and this can unlock further narrowing of other vector ops.
18212 /// Targets can invert this transform later if it is not profitable.
18213 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
18214                                          SelectionDAG &DAG) {
18215   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
18216   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
18217       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
18218       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
18219     return SDValue();
18220
18221   // Split the wide shuffle mask into halves. Any mask element that is accessing
18222   // operand 1 is offset down to account for narrowing of the vectors.
18223   ArrayRef<int> Mask = Shuf->getMask();
18224   EVT VT = Shuf->getValueType(0);
18225   unsigned NumElts = VT.getVectorNumElements();
18226   unsigned HalfNumElts = NumElts / 2;
18227   SmallVector<int, 16> Mask0(HalfNumElts, -1);
18228   SmallVector<int, 16> Mask1(HalfNumElts, -1);
18229   for (unsigned i = 0; i != NumElts; ++i) {
18230     if (Mask[i] == -1)
18231       continue;
18232     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
18233     if (i < HalfNumElts)
18234       Mask0[i] = M;
18235     else
18236       Mask1[i - HalfNumElts] = M;
18237   }
18238
18239   // Ask the target if this is a valid transform.
18240   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18241   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
18242                                 HalfNumElts);
18243   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
18244       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
18245     return SDValue();
18246
18247   // shuffle (concat X, undef), (concat Y, undef), Mask -->
18248   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
18249   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
18250   SDLoc DL(Shuf);
18251   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
18252   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
18253   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
18254 }
18255
18256 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
18257 // or turn a shuffle of a single concat into simpler shuffle then concat.
18258 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
18259   EVT VT = N->getValueType(0);
18260   unsigned NumElts = VT.getVectorNumElements();
18261
18262   SDValue N0 = N->getOperand(0);
18263   SDValue N1 = N->getOperand(1);
18264   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18265   ArrayRef<int> Mask = SVN->getMask();
18266
18267   SmallVector<SDValue, 4> Ops;
18268   EVT ConcatVT = N0.getOperand(0).getValueType();
18269   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
18270   unsigned NumConcats = NumElts / NumElemsPerConcat;
18271
18272   auto IsUndefMaskElt = [](int i) { return i == -1; };
18273
18274   // Special case: shuffle(concat(A,B)) can be more efficiently represented
18275   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
18276   // half vector elements.
18277   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
18278       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
18279                    IsUndefMaskElt)) {
18280     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
18281                               N0.getOperand(1),
18282                               Mask.slice(0, NumElemsPerConcat));
18283     N1 = DAG.getUNDEF(ConcatVT);
18284     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
18285   }
18286
18287   // Look at every vector that's inserted. We're looking for exact
18288   // subvector-sized copies from a concatenated vector
18289   for (unsigned I = 0; I != NumConcats; ++I) {
18290     unsigned Begin = I * NumElemsPerConcat;
18291     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
18292
18293     // Make sure we're dealing with a copy.
18294     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
18295       Ops.push_back(DAG.getUNDEF(ConcatVT));
18296       continue;
18297     }
18298
18299     int OpIdx = -1;
18300     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
18301       if (IsUndefMaskElt(SubMask[i]))
18302         continue;
18303       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
18304         return SDValue();
18305       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
18306       if (0 <= OpIdx && EltOpIdx != OpIdx)
18307         return SDValue();
18308       OpIdx = EltOpIdx;
18309     }
18310     assert(0 <= OpIdx && "Unknown concat_vectors op");
18311
18312     if (OpIdx < (int)N0.getNumOperands())
18313       Ops.push_back(N0.getOperand(OpIdx));
18314     else
18315       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
18316   }
18317
18318   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18319 }
18320
18321 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18322 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18323 //
18324 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
18325 // a simplification in some sense, but it isn't appropriate in general: some
18326 // BUILD_VECTORs are substantially cheaper than others. The general case
18327 // of a BUILD_VECTOR requires inserting each element individually (or
18328 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
18329 // all constants is a single constant pool load.  A BUILD_VECTOR where each
18330 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
18331 // are undef lowers to a small number of element insertions.
18332 //
18333 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
18334 // We don't fold shuffles where one side is a non-zero constant, and we don't
18335 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
18336 // non-constant operands. This seems to work out reasonably well in practice.
18337 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
18338                                        SelectionDAG &DAG,
18339                                        const TargetLowering &TLI) {
18340   EVT VT = SVN->getValueType(0);
18341   unsigned NumElts = VT.getVectorNumElements();
18342   SDValue N0 = SVN->getOperand(0);
18343   SDValue N1 = SVN->getOperand(1);
18344
18345   if (!N0->hasOneUse())
18346     return SDValue();
18347
18348   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
18349   // discussed above.
18350   if (!N1.isUndef()) {
18351     if (!N1->hasOneUse())
18352       return SDValue();
18353
18354     bool N0AnyConst = isAnyConstantBuildVector(N0);
18355     bool N1AnyConst = isAnyConstantBuildVector(N1);
18356     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
18357       return SDValue();
18358     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
18359       return SDValue();
18360   }
18361
18362   // If both inputs are splats of the same value then we can safely merge this
18363   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
18364   bool IsSplat = false;
18365   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
18366   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
18367   if (BV0 && BV1)
18368     if (SDValue Splat0 = BV0->getSplatValue())
18369       IsSplat = (Splat0 == BV1->getSplatValue());
18370
18371   SmallVector<SDValue, 8> Ops;
18372   SmallSet<SDValue, 16> DuplicateOps;
18373   for (int M : SVN->getMask()) {
18374     SDValue Op = DAG.getUNDEF(VT.getScalarType());
18375     if (M >= 0) {
18376       int Idx = M < (int)NumElts ? M : M - NumElts;
18377       SDValue &S = (M < (int)NumElts ? N0 : N1);
18378       if (S.getOpcode() == ISD::BUILD_VECTOR) {
18379         Op = S.getOperand(Idx);
18380       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18381         SDValue Op0 = S.getOperand(0);
18382         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
18383       } else {
18384         // Operand can't be combined - bail out.
18385         return SDValue();
18386       }
18387     }
18388
18389     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
18390     // generating a splat; semantically, this is fine, but it's likely to
18391     // generate low-quality code if the target can't reconstruct an appropriate
18392     // shuffle.
18393     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
18394       if (!IsSplat && !DuplicateOps.insert(Op).second)
18395         return SDValue();
18396
18397     Ops.push_back(Op);
18398   }
18399
18400   // BUILD_VECTOR requires all inputs to be of the same type, find the
18401   // maximum type and extend them all.
18402   EVT SVT = VT.getScalarType();
18403   if (SVT.isInteger())
18404     for (SDValue &Op : Ops)
18405       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
18406   if (SVT != VT.getScalarType())
18407     for (SDValue &Op : Ops)
18408       Op = TLI.isZExtFree(Op.getValueType(), SVT)
18409                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
18410                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
18411   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
18412 }
18413
18414 // Match shuffles that can be converted to any_vector_extend_in_reg.
18415 // This is often generated during legalization.
18416 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
18417 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
18418 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
18419                                             SelectionDAG &DAG,
18420                                             const TargetLowering &TLI,
18421                                             bool LegalOperations) {
18422   EVT VT = SVN->getValueType(0);
18423   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18424
18425   // TODO Add support for big-endian when we have a test case.
18426   if (!VT.isInteger() || IsBigEndian)
18427     return SDValue();
18428
18429   unsigned NumElts = VT.getVectorNumElements();
18430   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18431   ArrayRef<int> Mask = SVN->getMask();
18432   SDValue N0 = SVN->getOperand(0);
18433
18434   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
18435   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
18436     for (unsigned i = 0; i != NumElts; ++i) {
18437       if (Mask[i] < 0)
18438         continue;
18439       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
18440         continue;
18441       return false;
18442     }
18443     return true;
18444   };
18445
18446   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
18447   // power-of-2 extensions as they are the most likely.
18448   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
18449     // Check for non power of 2 vector sizes
18450     if (NumElts % Scale != 0)
18451       continue;
18452     if (!isAnyExtend(Scale))
18453       continue;
18454
18455     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
18456     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
18457     // Never create an illegal type. Only create unsupported operations if we
18458     // are pre-legalization.
18459     if (TLI.isTypeLegal(OutVT))
18460       if (!LegalOperations ||
18461           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
18462         return DAG.getBitcast(VT,
18463                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
18464                                           SDLoc(SVN), OutVT, N0));
18465   }
18466
18467   return SDValue();
18468 }
18469
18470 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
18471 // each source element of a large type into the lowest elements of a smaller
18472 // destination type. This is often generated during legalization.
18473 // If the source node itself was a '*_extend_vector_inreg' node then we should
18474 // then be able to remove it.
18475 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
18476                                         SelectionDAG &DAG) {
18477   EVT VT = SVN->getValueType(0);
18478   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18479
18480   // TODO Add support for big-endian when we have a test case.
18481   if (!VT.isInteger() || IsBigEndian)
18482     return SDValue();
18483
18484   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
18485
18486   unsigned Opcode = N0.getOpcode();
18487   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
18488       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
18489       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
18490     return SDValue();
18491
18492   SDValue N00 = N0.getOperand(0);
18493   ArrayRef<int> Mask = SVN->getMask();
18494   unsigned NumElts = VT.getVectorNumElements();
18495   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18496   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
18497   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
18498
18499   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
18500     return SDValue();
18501   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
18502
18503   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
18504   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
18505   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
18506   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
18507     for (unsigned i = 0; i != NumElts; ++i) {
18508       if (Mask[i] < 0)
18509         continue;
18510       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
18511         continue;
18512       return false;
18513     }
18514     return true;
18515   };
18516
18517   // At the moment we just handle the case where we've truncated back to the
18518   // same size as before the extension.
18519   // TODO: handle more extension/truncation cases as cases arise.
18520   if (EltSizeInBits != ExtSrcSizeInBits)
18521     return SDValue();
18522
18523   // We can remove *extend_vector_inreg only if the truncation happens at
18524   // the same scale as the extension.
18525   if (isTruncate(ExtScale))
18526     return DAG.getBitcast(VT, N00);
18527
18528   return SDValue();
18529 }
18530
18531 // Combine shuffles of splat-shuffles of the form:
18532 // shuffle (shuffle V, undef, splat-mask), undef, M
18533 // If splat-mask contains undef elements, we need to be careful about
18534 // introducing undef's in the folded mask which are not the result of composing
18535 // the masks of the shuffles.
18536 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
18537                                         SelectionDAG &DAG) {
18538   if (!Shuf->getOperand(1).isUndef())
18539     return SDValue();
18540   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18541   if (!Splat || !Splat->isSplat())
18542     return SDValue();
18543
18544   ArrayRef<int> ShufMask = Shuf->getMask();
18545   ArrayRef<int> SplatMask = Splat->getMask();
18546   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
18547
18548   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18549   // every undef mask element in the splat-shuffle has a corresponding undef
18550   // element in the user-shuffle's mask or if the composition of mask elements
18551   // would result in undef.
18552   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18553   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18554   //   In this case it is not legal to simplify to the splat-shuffle because we
18555   //   may be exposing the users of the shuffle an undef element at index 1
18556   //   which was not there before the combine.
18557   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18558   //   In this case the composition of masks yields SplatMask, so it's ok to
18559   //   simplify to the splat-shuffle.
18560   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18561   //   In this case the composed mask includes all undef elements of SplatMask
18562   //   and in addition sets element zero to undef. It is safe to simplify to
18563   //   the splat-shuffle.
18564   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
18565                                        ArrayRef<int> SplatMask) {
18566     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
18567       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
18568           SplatMask[UserMask[i]] != -1)
18569         return false;
18570     return true;
18571   };
18572   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
18573     return Shuf->getOperand(0);
18574
18575   // Create a new shuffle with a mask that is composed of the two shuffles'
18576   // masks.
18577   SmallVector<int, 32> NewMask;
18578   for (int Idx : ShufMask)
18579     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
18580
18581   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
18582                               Splat->getOperand(0), Splat->getOperand(1),
18583                               NewMask);
18584 }
18585
18586 /// If the shuffle mask is taking exactly one element from the first vector
18587 /// operand and passing through all other elements from the second vector
18588 /// operand, return the index of the mask element that is choosing an element
18589 /// from the first operand. Otherwise, return -1.
18590 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
18591   int MaskSize = Mask.size();
18592   int EltFromOp0 = -1;
18593   // TODO: This does not match if there are undef elements in the shuffle mask.
18594   // Should we ignore undefs in the shuffle mask instead? The trade-off is
18595   // removing an instruction (a shuffle), but losing the knowledge that some
18596   // vector lanes are not needed.
18597   for (int i = 0; i != MaskSize; ++i) {
18598     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
18599       // We're looking for a shuffle of exactly one element from operand 0.
18600       if (EltFromOp0 != -1)
18601         return -1;
18602       EltFromOp0 = i;
18603     } else if (Mask[i] != i + MaskSize) {
18604       // Nothing from operand 1 can change lanes.
18605       return -1;
18606     }
18607   }
18608   return EltFromOp0;
18609 }
18610
18611 /// If a shuffle inserts exactly one element from a source vector operand into
18612 /// another vector operand and we can access the specified element as a scalar,
18613 /// then we can eliminate the shuffle.
18614 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
18615                                       SelectionDAG &DAG) {
18616   // First, check if we are taking one element of a vector and shuffling that
18617   // element into another vector.
18618   ArrayRef<int> Mask = Shuf->getMask();
18619   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
18620   SDValue Op0 = Shuf->getOperand(0);
18621   SDValue Op1 = Shuf->getOperand(1);
18622   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
18623   if (ShufOp0Index == -1) {
18624     // Commute mask and check again.
18625     ShuffleVectorSDNode::commuteMask(CommutedMask);
18626     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
18627     if (ShufOp0Index == -1)
18628       return SDValue();
18629     // Commute operands to match the commuted shuffle mask.
18630     std::swap(Op0, Op1);
18631     Mask = CommutedMask;
18632   }
18633
18634   // The shuffle inserts exactly one element from operand 0 into operand 1.
18635   // Now see if we can access that element as a scalar via a real insert element
18636   // instruction.
18637   // TODO: We can try harder to locate the element as a scalar. Examples: it
18638   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18639   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
18640          "Shuffle mask value must be from operand 0");
18641   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
18642     return SDValue();
18643
18644   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
18645   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
18646     return SDValue();
18647
18648   // There's an existing insertelement with constant insertion index, so we
18649   // don't need to check the legality/profitability of a replacement operation
18650   // that differs at most in the constant value. The target should be able to
18651   // lower any of those in a similar way. If not, legalization will expand this
18652   // to a scalar-to-vector plus shuffle.
18653   //
18654   // Note that the shuffle may move the scalar from the position that the insert
18655   // element used. Therefore, our new insert element occurs at the shuffle's
18656   // mask index value, not the insert's index value.
18657   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18658   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
18659                                         Op0.getOperand(2).getValueType());
18660   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
18661                      Op1, Op0.getOperand(1), NewInsIndex);
18662 }
18663
18664 /// If we have a unary shuffle of a shuffle, see if it can be folded away
18665 /// completely. This has the potential to lose undef knowledge because the first
18666 /// shuffle may not have an undef mask element where the second one does. So
18667 /// only call this after doing simplifications based on demanded elements.
18668 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
18669   // shuf (shuf0 X, Y, Mask0), undef, Mask
18670   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18671   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
18672     return SDValue();
18673
18674   ArrayRef<int> Mask = Shuf->getMask();
18675   ArrayRef<int> Mask0 = Shuf0->getMask();
18676   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
18677     // Ignore undef elements.
18678     if (Mask[i] == -1)
18679       continue;
18680     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
18681
18682     // Is the element of the shuffle operand chosen by this shuffle the same as
18683     // the element chosen by the shuffle operand itself?
18684     if (Mask0[Mask[i]] != Mask0[i])
18685       return SDValue();
18686   }
18687   // Every element of this shuffle is identical to the result of the previous
18688   // shuffle, so we can replace this value.
18689   return Shuf->getOperand(0);
18690 }
18691
18692 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
18693   EVT VT = N->getValueType(0);
18694   unsigned NumElts = VT.getVectorNumElements();
18695
18696   SDValue N0 = N->getOperand(0);
18697   SDValue N1 = N->getOperand(1);
18698
18699   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
18700
18701   // Canonicalize shuffle undef, undef -> undef
18702   if (N0.isUndef() && N1.isUndef())
18703     return DAG.getUNDEF(VT);
18704
18705   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18706
18707   // Canonicalize shuffle v, v -> v, undef
18708   if (N0 == N1) {
18709     SmallVector<int, 8> NewMask;
18710     for (unsigned i = 0; i != NumElts; ++i) {
18711       int Idx = SVN->getMaskElt(i);
18712       if (Idx >= (int)NumElts) Idx -= NumElts;
18713       NewMask.push_back(Idx);
18714     }
18715     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
18716   }
18717
18718   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
18719   if (N0.isUndef())
18720     return DAG.getCommutedVectorShuffle(*SVN);
18721
18722   // Remove references to rhs if it is undef
18723   if (N1.isUndef()) {
18724     bool Changed = false;
18725     SmallVector<int, 8> NewMask;
18726     for (unsigned i = 0; i != NumElts; ++i) {
18727       int Idx = SVN->getMaskElt(i);
18728       if (Idx >= (int)NumElts) {
18729         Idx = -1;
18730         Changed = true;
18731       }
18732       NewMask.push_back(Idx);
18733     }
18734     if (Changed)
18735       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
18736   }
18737
18738   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
18739     return InsElt;
18740
18741   // A shuffle of a single vector that is a splatted value can always be folded.
18742   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
18743     return V;
18744
18745   // If it is a splat, check if the argument vector is another splat or a
18746   // build_vector.
18747   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
18748     int SplatIndex = SVN->getSplatIndex();
18749     if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
18750         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
18751       // splat (vector_bo L, R), Index -->
18752       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18753       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
18754       SDLoc DL(N);
18755       EVT EltVT = VT.getScalarType();
18756       SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
18757       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
18758       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
18759       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
18760                                   N0.getNode()->getFlags());
18761       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
18762       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
18763       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
18764     }
18765
18766     // If this is a bit convert that changes the element type of the vector but
18767     // not the number of vector elements, look through it.  Be careful not to
18768     // look though conversions that change things like v4f32 to v2f64.
18769     SDNode *V = N0.getNode();
18770     if (V->getOpcode() == ISD::BITCAST) {
18771       SDValue ConvInput = V->getOperand(0);
18772       if (ConvInput.getValueType().isVector() &&
18773           ConvInput.getValueType().getVectorNumElements() == NumElts)
18774         V = ConvInput.getNode();
18775     }
18776
18777     if (V->getOpcode() == ISD::BUILD_VECTOR) {
18778       assert(V->getNumOperands() == NumElts &&
18779              "BUILD_VECTOR has wrong number of operands");
18780       SDValue Base;
18781       bool AllSame = true;
18782       for (unsigned i = 0; i != NumElts; ++i) {
18783         if (!V->getOperand(i).isUndef()) {
18784           Base = V->getOperand(i);
18785           break;
18786         }
18787       }
18788       // Splat of <u, u, u, u>, return <u, u, u, u>
18789       if (!Base.getNode())
18790         return N0;
18791       for (unsigned i = 0; i != NumElts; ++i) {
18792         if (V->getOperand(i) != Base) {
18793           AllSame = false;
18794           break;
18795         }
18796       }
18797       // Splat of <x, x, x, x>, return <x, x, x, x>
18798       if (AllSame)
18799         return N0;
18800
18801       // Canonicalize any other splat as a build_vector.
18802       SDValue Splatted = V->getOperand(SplatIndex);
18803       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
18804       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
18805
18806       // We may have jumped through bitcasts, so the type of the
18807       // BUILD_VECTOR may not match the type of the shuffle.
18808       if (V->getValueType(0) != VT)
18809         NewBV = DAG.getBitcast(VT, NewBV);
18810       return NewBV;
18811     }
18812   }
18813
18814   // Simplify source operands based on shuffle mask.
18815   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18816     return SDValue(N, 0);
18817
18818   // This is intentionally placed after demanded elements simplification because
18819   // it could eliminate knowledge of undef elements created by this shuffle.
18820   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
18821     return ShufOp;
18822
18823   // Match shuffles that can be converted to any_vector_extend_in_reg.
18824   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
18825     return V;
18826
18827   // Combine "truncate_vector_in_reg" style shuffles.
18828   if (SDValue V = combineTruncationShuffle(SVN, DAG))
18829     return V;
18830
18831   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
18832       Level < AfterLegalizeVectorOps &&
18833       (N1.isUndef() ||
18834       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
18835        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
18836     if (SDValue V = partitionShuffleOfConcats(N, DAG))
18837       return V;
18838   }
18839
18840   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18841   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18842   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
18843     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
18844       return Res;
18845
18846   // If this shuffle only has a single input that is a bitcasted shuffle,
18847   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
18848   // back to their original types.
18849   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
18850       N1.isUndef() && Level < AfterLegalizeVectorOps &&
18851       TLI.isTypeLegal(VT)) {
18852     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
18853       if (Scale == 1)
18854         return SmallVector<int, 8>(Mask.begin(), Mask.end());
18855
18856       SmallVector<int, 8> NewMask;
18857       for (int M : Mask)
18858         for (int s = 0; s != Scale; ++s)
18859           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
18860       return NewMask;
18861     };
18862
18863     SDValue BC0 = peekThroughOneUseBitcasts(N0);
18864     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
18865       EVT SVT = VT.getScalarType();
18866       EVT InnerVT = BC0->getValueType(0);
18867       EVT InnerSVT = InnerVT.getScalarType();
18868
18869       // Determine which shuffle works with the smaller scalar type.
18870       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
18871       EVT ScaleSVT = ScaleVT.getScalarType();
18872
18873       if (TLI.isTypeLegal(ScaleVT) &&
18874           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
18875           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
18876         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
18877         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
18878
18879         // Scale the shuffle masks to the smaller scalar type.
18880         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
18881         SmallVector<int, 8> InnerMask =
18882             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
18883         SmallVector<int, 8> OuterMask =
18884             ScaleShuffleMask(SVN->getMask(), OuterScale);
18885
18886         // Merge the shuffle masks.
18887         SmallVector<int, 8> NewMask;
18888         for (int M : OuterMask)
18889           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
18890
18891         // Test for shuffle mask legality over both commutations.
18892         SDValue SV0 = BC0->getOperand(0);
18893         SDValue SV1 = BC0->getOperand(1);
18894         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
18895         if (!LegalMask) {
18896           std::swap(SV0, SV1);
18897           ShuffleVectorSDNode::commuteMask(NewMask);
18898           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
18899         }
18900
18901         if (LegalMask) {
18902           SV0 = DAG.getBitcast(ScaleVT, SV0);
18903           SV1 = DAG.getBitcast(ScaleVT, SV1);
18904           return DAG.getBitcast(
18905               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
18906         }
18907       }
18908     }
18909   }
18910
18911   // Canonicalize shuffles according to rules:
18912   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
18913   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
18914   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
18915   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
18916       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
18917       TLI.isTypeLegal(VT)) {
18918     // The incoming shuffle must be of the same type as the result of the
18919     // current shuffle.
18920     assert(N1->getOperand(0).getValueType() == VT &&
18921            "Shuffle types don't match");
18922
18923     SDValue SV0 = N1->getOperand(0);
18924     SDValue SV1 = N1->getOperand(1);
18925     bool HasSameOp0 = N0 == SV0;
18926     bool IsSV1Undef = SV1.isUndef();
18927     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
18928       // Commute the operands of this shuffle so that next rule
18929       // will trigger.
18930       return DAG.getCommutedVectorShuffle(*SVN);
18931   }
18932
18933   // Try to fold according to rules:
18934   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18935   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18936   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18937   // Don't try to fold shuffles with illegal type.
18938   // Only fold if this shuffle is the only user of the other shuffle.
18939   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
18940       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
18941     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
18942
18943     // Don't try to fold splats; they're likely to simplify somehow, or they
18944     // might be free.
18945     if (OtherSV->isSplat())
18946       return SDValue();
18947
18948     // The incoming shuffle must be of the same type as the result of the
18949     // current shuffle.
18950     assert(OtherSV->getOperand(0).getValueType() == VT &&
18951            "Shuffle types don't match");
18952
18953     SDValue SV0, SV1;
18954     SmallVector<int, 4> Mask;
18955     // Compute the combined shuffle mask for a shuffle with SV0 as the first
18956     // operand, and SV1 as the second operand.
18957     for (unsigned i = 0; i != NumElts; ++i) {
18958       int Idx = SVN->getMaskElt(i);
18959       if (Idx < 0) {
18960         // Propagate Undef.
18961         Mask.push_back(Idx);
18962         continue;
18963       }
18964
18965       SDValue CurrentVec;
18966       if (Idx < (int)NumElts) {
18967         // This shuffle index refers to the inner shuffle N0. Lookup the inner
18968         // shuffle mask to identify which vector is actually referenced.
18969         Idx = OtherSV->getMaskElt(Idx);
18970         if (Idx < 0) {
18971           // Propagate Undef.
18972           Mask.push_back(Idx);
18973           continue;
18974         }
18975
18976         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
18977                                            : OtherSV->getOperand(1);
18978       } else {
18979         // This shuffle index references an element within N1.
18980         CurrentVec = N1;
18981       }
18982
18983       // Simple case where 'CurrentVec' is UNDEF.
18984       if (CurrentVec.isUndef()) {
18985         Mask.push_back(-1);
18986         continue;
18987       }
18988
18989       // Canonicalize the shuffle index. We don't know yet if CurrentVec
18990       // will be the first or second operand of the combined shuffle.
18991       Idx = Idx % NumElts;
18992       if (!SV0.getNode() || SV0 == CurrentVec) {
18993         // Ok. CurrentVec is the left hand side.
18994         // Update the mask accordingly.
18995         SV0 = CurrentVec;
18996         Mask.push_back(Idx);
18997         continue;
18998       }
18999
19000       // Bail out if we cannot convert the shuffle pair into a single shuffle.
19001       if (SV1.getNode() && SV1 != CurrentVec)
19002         return SDValue();
19003
19004       // Ok. CurrentVec is the right hand side.
19005       // Update the mask accordingly.
19006       SV1 = CurrentVec;
19007       Mask.push_back(Idx + NumElts);
19008     }
19009
19010     // Check if all indices in Mask are Undef. In case, propagate Undef.
19011     bool isUndefMask = true;
19012     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
19013       isUndefMask &= Mask[i] < 0;
19014
19015     if (isUndefMask)
19016       return DAG.getUNDEF(VT);
19017
19018     if (!SV0.getNode())
19019       SV0 = DAG.getUNDEF(VT);
19020     if (!SV1.getNode())
19021       SV1 = DAG.getUNDEF(VT);
19022
19023     // Avoid introducing shuffles with illegal mask.
19024     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19025     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19026     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19027     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
19028     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
19029     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
19030     return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
19031   }
19032
19033   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
19034     return V;
19035
19036   return SDValue();
19037 }
19038
19039 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
19040   SDValue InVal = N->getOperand(0);
19041   EVT VT = N->getValueType(0);
19042
19043   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
19044   // with a VECTOR_SHUFFLE and possible truncate.
19045   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19046     SDValue InVec = InVal->getOperand(0);
19047     SDValue EltNo = InVal->getOperand(1);
19048     auto InVecT = InVec.getValueType();
19049     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
19050       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
19051       int Elt = C0->getZExtValue();
19052       NewMask[0] = Elt;
19053       // If we have an implict truncate do truncate here as long as it's legal.
19054       // if it's not legal, this should
19055       if (VT.getScalarType() != InVal.getValueType() &&
19056           InVal.getValueType().isScalarInteger() &&
19057           isTypeLegal(VT.getScalarType())) {
19058         SDValue Val =
19059             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
19060         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
19061       }
19062       if (VT.getScalarType() == InVecT.getScalarType() &&
19063           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
19064         SDValue LegalShuffle =
19065           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
19066                                       DAG.getUNDEF(InVecT), NewMask, DAG);
19067         if (LegalShuffle) {
19068           // If the initial vector is the correct size this shuffle is a
19069           // valid result.
19070           if (VT == InVecT)
19071             return LegalShuffle;
19072           // If not we must truncate the vector.
19073           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
19074             MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
19075             SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
19076             EVT SubVT =
19077                 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
19078                                  VT.getVectorNumElements());
19079             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
19080                                LegalShuffle, ZeroIdx);
19081           }
19082         }
19083       }
19084     }
19085   }
19086
19087   return SDValue();
19088 }
19089
19090 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
19091   EVT VT = N->getValueType(0);
19092   SDValue N0 = N->getOperand(0);
19093   SDValue N1 = N->getOperand(1);
19094   SDValue N2 = N->getOperand(2);
19095
19096   // If inserting an UNDEF, just return the original vector.
19097   if (N1.isUndef())
19098     return N0;
19099
19100   // If this is an insert of an extracted vector into an undef vector, we can
19101   // just use the input to the extract.
19102   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19103       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
19104     return N1.getOperand(0);
19105
19106   // If we are inserting a bitcast value into an undef, with the same
19107   // number of elements, just use the bitcast input of the extract.
19108   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
19109   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
19110   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
19111       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19112       N1.getOperand(0).getOperand(1) == N2 &&
19113       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
19114           VT.getVectorNumElements() &&
19115       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
19116           VT.getSizeInBits()) {
19117     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
19118   }
19119
19120   // If both N1 and N2 are bitcast values on which insert_subvector
19121   // would makes sense, pull the bitcast through.
19122   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
19123   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
19124   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
19125     SDValue CN0 = N0.getOperand(0);
19126     SDValue CN1 = N1.getOperand(0);
19127     EVT CN0VT = CN0.getValueType();
19128     EVT CN1VT = CN1.getValueType();
19129     if (CN0VT.isVector() && CN1VT.isVector() &&
19130         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
19131         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
19132       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
19133                                       CN0.getValueType(), CN0, CN1, N2);
19134       return DAG.getBitcast(VT, NewINSERT);
19135     }
19136   }
19137
19138   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
19139   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
19140   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
19141   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
19142       N0.getOperand(1).getValueType() == N1.getValueType() &&
19143       N0.getOperand(2) == N2)
19144     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
19145                        N1, N2);
19146
19147   // Eliminate an intermediate insert into an undef vector:
19148   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
19149   // insert_subvector undef, X, N2
19150   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
19151       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
19152     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
19153                        N1.getOperand(1), N2);
19154
19155   if (!isa<ConstantSDNode>(N2))
19156     return SDValue();
19157
19158   uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
19159
19160   // Push subvector bitcasts to the output, adjusting the index as we go.
19161   // insert_subvector(bitcast(v), bitcast(s), c1)
19162   // -> bitcast(insert_subvector(v, s, c2))
19163   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
19164       N1.getOpcode() == ISD::BITCAST) {
19165     SDValue N0Src = peekThroughBitcasts(N0);
19166     SDValue N1Src = peekThroughBitcasts(N1);
19167     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
19168     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
19169     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
19170         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
19171       EVT NewVT;
19172       SDLoc DL(N);
19173       SDValue NewIdx;
19174       MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
19175       LLVMContext &Ctx = *DAG.getContext();
19176       unsigned NumElts = VT.getVectorNumElements();
19177       unsigned EltSizeInBits = VT.getScalarSizeInBits();
19178       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
19179         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
19180         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
19181         NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
19182       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
19183         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
19184         if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
19185           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
19186           NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
19187         }
19188       }
19189       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
19190         SDValue Res = DAG.getBitcast(NewVT, N0Src);
19191         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
19192         return DAG.getBitcast(VT, Res);
19193       }
19194     }
19195   }
19196
19197   // Canonicalize insert_subvector dag nodes.
19198   // Example:
19199   // (insert_subvector (insert_subvector A, Idx0), Idx1)
19200   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
19201   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
19202       N1.getValueType() == N0.getOperand(1).getValueType() &&
19203       isa<ConstantSDNode>(N0.getOperand(2))) {
19204     unsigned OtherIdx = N0.getConstantOperandVal(2);
19205     if (InsIdx < OtherIdx) {
19206       // Swap nodes.
19207       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
19208                                   N0.getOperand(0), N1, N2);
19209       AddToWorklist(NewOp.getNode());
19210       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
19211                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
19212     }
19213   }
19214
19215   // If the input vector is a concatenation, and the insert replaces
19216   // one of the pieces, we can optimize into a single concat_vectors.
19217   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
19218       N0.getOperand(0).getValueType() == N1.getValueType()) {
19219     unsigned Factor = N1.getValueType().getVectorNumElements();
19220
19221     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
19222     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
19223
19224     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19225   }
19226
19227   // Simplify source operands based on insertion.
19228   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19229     return SDValue(N, 0);
19230
19231   return SDValue();
19232 }
19233
19234 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
19235   SDValue N0 = N->getOperand(0);
19236
19237   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
19238   if (N0->getOpcode() == ISD::FP16_TO_FP)
19239     return N0->getOperand(0);
19240
19241   return SDValue();
19242 }
19243
19244 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
19245   SDValue N0 = N->getOperand(0);
19246
19247   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
19248   if (N0->getOpcode() == ISD::AND) {
19249     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
19250     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
19251       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
19252                          N0.getOperand(0));
19253     }
19254   }
19255
19256   return SDValue();
19257 }
19258
19259 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
19260   SDValue N0 = N->getOperand(0);
19261   EVT VT = N0.getValueType();
19262   unsigned Opcode = N->getOpcode();
19263
19264   // VECREDUCE over 1-element vector is just an extract.
19265   if (VT.getVectorNumElements() == 1) {
19266     SDLoc dl(N);
19267     SDValue Res = DAG.getNode(
19268         ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
19269         DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
19270     if (Res.getValueType() != N->getValueType(0))
19271       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
19272     return Res;
19273   }
19274
19275   // On an boolean vector an and/or reduction is the same as a umin/umax
19276   // reduction. Convert them if the latter is legal while the former isn't.
19277   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
19278     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
19279         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
19280     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
19281         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
19282         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
19283       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
19284   }
19285
19286   return SDValue();
19287 }
19288
19289 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
19290 /// with the destination vector and a zero vector.
19291 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
19292 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
19293 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
19294   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
19295
19296   EVT VT = N->getValueType(0);
19297   SDValue LHS = N->getOperand(0);
19298   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
19299   SDLoc DL(N);
19300
19301   // Make sure we're not running after operation legalization where it
19302   // may have custom lowered the vector shuffles.
19303   if (LegalOperations)
19304     return SDValue();
19305
19306   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19307     return SDValue();
19308
19309   EVT RVT = RHS.getValueType();
19310   unsigned NumElts = RHS.getNumOperands();
19311
19312   // Attempt to create a valid clear mask, splitting the mask into
19313   // sub elements and checking to see if each is
19314   // all zeros or all ones - suitable for shuffle masking.
19315   auto BuildClearMask = [&](int Split) {
19316     int NumSubElts = NumElts * Split;
19317     int NumSubBits = RVT.getScalarSizeInBits() / Split;
19318
19319     SmallVector<int, 8> Indices;
19320     for (int i = 0; i != NumSubElts; ++i) {
19321       int EltIdx = i / Split;
19322       int SubIdx = i % Split;
19323       SDValue Elt = RHS.getOperand(EltIdx);
19324       if (Elt.isUndef()) {
19325         Indices.push_back(-1);
19326         continue;
19327       }
19328
19329       APInt Bits;
19330       if (isa<ConstantSDNode>(Elt))
19331         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
19332       else if (isa<ConstantFPSDNode>(Elt))
19333         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
19334       else
19335         return SDValue();
19336
19337       // Extract the sub element from the constant bit mask.
19338       if (DAG.getDataLayout().isBigEndian()) {
19339         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
19340       } else {
19341         Bits.lshrInPlace(SubIdx * NumSubBits);
19342       }
19343
19344       if (Split > 1)
19345         Bits = Bits.trunc(NumSubBits);
19346
19347       if (Bits.isAllOnesValue())
19348         Indices.push_back(i);
19349       else if (Bits == 0)
19350         Indices.push_back(i + NumSubElts);
19351       else
19352         return SDValue();
19353     }
19354
19355     // Let's see if the target supports this vector_shuffle.
19356     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
19357     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
19358     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
19359       return SDValue();
19360
19361     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
19362     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
19363                                                    DAG.getBitcast(ClearVT, LHS),
19364                                                    Zero, Indices));
19365   };
19366
19367   // Determine maximum split level (byte level masking).
19368   int MaxSplit = 1;
19369   if (RVT.getScalarSizeInBits() % 8 == 0)
19370     MaxSplit = RVT.getScalarSizeInBits() / 8;
19371
19372   for (int Split = 1; Split <= MaxSplit; ++Split)
19373     if (RVT.getScalarSizeInBits() % Split == 0)
19374       if (SDValue S = BuildClearMask(Split))
19375         return S;
19376
19377   return SDValue();
19378 }
19379
19380 /// If a vector binop is performed on splat values, it may be profitable to
19381 /// extract, scalarize, and insert/splat.
19382 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
19383   SDValue N0 = N->getOperand(0);
19384   SDValue N1 = N->getOperand(1);
19385   unsigned Opcode = N->getOpcode();
19386   EVT VT = N->getValueType(0);
19387   EVT EltVT = VT.getVectorElementType();
19388   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19389
19390   // TODO: Remove/replace the extract cost check? If the elements are available
19391   //       as scalars, then there may be no extract cost. Should we ask if
19392   //       inserting a scalar back into a vector is cheap instead?
19393   int Index0, Index1;
19394   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
19395   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
19396   if (!Src0 || !Src1 || Index0 != Index1 ||
19397       Src0.getValueType().getVectorElementType() != EltVT ||
19398       Src1.getValueType().getVectorElementType() != EltVT ||
19399       !TLI.isExtractVecEltCheap(VT, Index0) ||
19400       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
19401     return SDValue();
19402
19403   SDLoc DL(N);
19404   SDValue IndexC =
19405       DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
19406   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
19407   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
19408   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
19409
19410   // If all lanes but 1 are undefined, no need to splat the scalar result.
19411   // TODO: Keep track of undefs and use that info in the general case.
19412   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
19413       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
19414       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
19415     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
19416     // build_vec ..undef, (bo X, Y), undef...
19417     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
19418     Ops[Index0] = ScalarBO;
19419     return DAG.getBuildVector(VT, DL, Ops);
19420   }
19421
19422   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
19423   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
19424   return DAG.getBuildVector(VT, DL, Ops);
19425 }
19426
19427 /// Visit a binary vector operation, like ADD.
19428 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
19429   assert(N->getValueType(0).isVector() &&
19430          "SimplifyVBinOp only works on vectors!");
19431
19432   SDValue LHS = N->getOperand(0);
19433   SDValue RHS = N->getOperand(1);
19434   SDValue Ops[] = {LHS, RHS};
19435   EVT VT = N->getValueType(0);
19436   unsigned Opcode = N->getOpcode();
19437
19438   // See if we can constant fold the vector operation.
19439   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
19440           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
19441     return Fold;
19442
19443   // Move unary shuffles with identical masks after a vector binop:
19444   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
19445   //   --> shuffle (VBinOp A, B), Undef, Mask
19446   // This does not require type legality checks because we are creating the
19447   // same types of operations that are in the original sequence. We do have to
19448   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
19449   // though. This code is adapted from the identical transform in instcombine.
19450   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
19451       Opcode != ISD::UREM && Opcode != ISD::SREM &&
19452       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
19453     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
19454     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
19455     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
19456         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
19457         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
19458       SDLoc DL(N);
19459       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
19460                                      RHS.getOperand(0), N->getFlags());
19461       SDValue UndefV = LHS.getOperand(1);
19462       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
19463     }
19464   }
19465
19466   // The following pattern is likely to emerge with vector reduction ops. Moving
19467   // the binary operation ahead of insertion may allow using a narrower vector
19468   // instruction that has better performance than the wide version of the op:
19469   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
19470   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
19471       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
19472       LHS.getOperand(2) == RHS.getOperand(2) &&
19473       (LHS.hasOneUse() || RHS.hasOneUse())) {
19474     SDValue X = LHS.getOperand(1);
19475     SDValue Y = RHS.getOperand(1);
19476     SDValue Z = LHS.getOperand(2);
19477     EVT NarrowVT = X.getValueType();
19478     if (NarrowVT == Y.getValueType() &&
19479         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19480       // (binop undef, undef) may not return undef, so compute that result.
19481       SDLoc DL(N);
19482       SDValue VecC =
19483           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
19484       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
19485       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
19486     }
19487   }
19488
19489   // Make sure all but the first op are undef or constant.
19490   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
19491     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
19492            std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
19493                      [](const SDValue &Op) {
19494                        return Op.isUndef() ||
19495                               ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
19496                      });
19497   };
19498
19499   // The following pattern is likely to emerge with vector reduction ops. Moving
19500   // the binary operation ahead of the concat may allow using a narrower vector
19501   // instruction that has better performance than the wide version of the op:
19502   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
19503   //   concat (VBinOp X, Y), VecC
19504   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
19505       (LHS.hasOneUse() || RHS.hasOneUse())) {
19506     EVT NarrowVT = LHS.getOperand(0).getValueType();
19507     if (NarrowVT == RHS.getOperand(0).getValueType() &&
19508         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19509       SDLoc DL(N);
19510       unsigned NumOperands = LHS.getNumOperands();
19511       SmallVector<SDValue, 4> ConcatOps;
19512       for (unsigned i = 0; i != NumOperands; ++i) {
19513         // This constant fold for operands 1 and up.
19514         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
19515                                         RHS.getOperand(i)));
19516       }
19517
19518       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19519     }
19520   }
19521
19522   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
19523     return V;
19524
19525   return SDValue();
19526 }
19527
19528 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
19529                                     SDValue N2) {
19530   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
19531
19532   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
19533                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
19534
19535   // If we got a simplified select_cc node back from SimplifySelectCC, then
19536   // break it down into a new SETCC node, and a new SELECT node, and then return
19537   // the SELECT node, since we were called with a SELECT node.
19538   if (SCC.getNode()) {
19539     // Check to see if we got a select_cc back (to turn into setcc/select).
19540     // Otherwise, just return whatever node we got back, like fabs.
19541     if (SCC.getOpcode() == ISD::SELECT_CC) {
19542       const SDNodeFlags Flags = N0.getNode()->getFlags();
19543       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
19544                                   N0.getValueType(),
19545                                   SCC.getOperand(0), SCC.getOperand(1),
19546                                   SCC.getOperand(4), Flags);
19547       AddToWorklist(SETCC.getNode());
19548       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
19549                                          SCC.getOperand(2), SCC.getOperand(3));
19550       SelectNode->setFlags(Flags);
19551       return SelectNode;
19552     }
19553
19554     return SCC;
19555   }
19556   return SDValue();
19557 }
19558
19559 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
19560 /// being selected between, see if we can simplify the select.  Callers of this
19561 /// should assume that TheSelect is deleted if this returns true.  As such, they
19562 /// should return the appropriate thing (e.g. the node) back to the top-level of
19563 /// the DAG combiner loop to avoid it being looked at.
19564 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
19565                                     SDValue RHS) {
19566   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19567   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
19568   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
19569     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
19570       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
19571       SDValue Sqrt = RHS;
19572       ISD::CondCode CC;
19573       SDValue CmpLHS;
19574       const ConstantFPSDNode *Zero = nullptr;
19575
19576       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
19577         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
19578         CmpLHS = TheSelect->getOperand(0);
19579         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
19580       } else {
19581         // SELECT or VSELECT
19582         SDValue Cmp = TheSelect->getOperand(0);
19583         if (Cmp.getOpcode() == ISD::SETCC) {
19584           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
19585           CmpLHS = Cmp.getOperand(0);
19586           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
19587         }
19588       }
19589       if (Zero && Zero->isZero() &&
19590           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
19591           CC == ISD::SETULT || CC == ISD::SETLT)) {
19592         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19593         CombineTo(TheSelect, Sqrt);
19594         return true;
19595       }
19596     }
19597   }
19598   // Cannot simplify select with vector condition
19599   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
19600
19601   // If this is a select from two identical things, try to pull the operation
19602   // through the select.
19603   if (LHS.getOpcode() != RHS.getOpcode() ||
19604       !LHS.hasOneUse() || !RHS.hasOneUse())
19605     return false;
19606
19607   // If this is a load and the token chain is identical, replace the select
19608   // of two loads with a load through a select of the address to load from.
19609   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19610   // constants have been dropped into the constant pool.
19611   if (LHS.getOpcode() == ISD::LOAD) {
19612     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
19613     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
19614
19615     // Token chains must be identical.
19616     if (LHS.getOperand(0) != RHS.getOperand(0) ||
19617         // Do not let this transformation reduce the number of volatile loads.
19618         // Be conservative for atomics for the moment
19619         // TODO: This does appear to be legal for unordered atomics (see D66309)
19620         !LLD->isSimple() || !RLD->isSimple() ||
19621         // FIXME: If either is a pre/post inc/dec load,
19622         // we'd need to split out the address adjustment.
19623         LLD->isIndexed() || RLD->isIndexed() ||
19624         // If this is an EXTLOAD, the VT's must match.
19625         LLD->getMemoryVT() != RLD->getMemoryVT() ||
19626         // If this is an EXTLOAD, the kind of extension must match.
19627         (LLD->getExtensionType() != RLD->getExtensionType() &&
19628          // The only exception is if one of the extensions is anyext.
19629          LLD->getExtensionType() != ISD::EXTLOAD &&
19630          RLD->getExtensionType() != ISD::EXTLOAD) ||
19631         // FIXME: this discards src value information.  This is
19632         // over-conservative. It would be beneficial to be able to remember
19633         // both potential memory locations.  Since we are discarding
19634         // src value info, don't do the transformation if the memory
19635         // locations are not in the default address space.
19636         LLD->getPointerInfo().getAddrSpace() != 0 ||
19637         RLD->getPointerInfo().getAddrSpace() != 0 ||
19638         // We can't produce a CMOV of a TargetFrameIndex since we won't
19639         // generate the address generation required.
19640         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19641         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19642         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
19643                                       LLD->getBasePtr().getValueType()))
19644       return false;
19645
19646     // The loads must not depend on one another.
19647     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
19648       return false;
19649
19650     // Check that the select condition doesn't reach either load.  If so,
19651     // folding this will induce a cycle into the DAG.  If not, this is safe to
19652     // xform, so create a select of the addresses.
19653
19654     SmallPtrSet<const SDNode *, 32> Visited;
19655     SmallVector<const SDNode *, 16> Worklist;
19656
19657     // Always fail if LLD and RLD are not independent. TheSelect is a
19658     // predecessor to all Nodes in question so we need not search past it.
19659
19660     Visited.insert(TheSelect);
19661     Worklist.push_back(LLD);
19662     Worklist.push_back(RLD);
19663
19664     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
19665         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
19666       return false;
19667
19668     SDValue Addr;
19669     if (TheSelect->getOpcode() == ISD::SELECT) {
19670       // We cannot do this optimization if any pair of {RLD, LLD} is a
19671       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19672       // Loads, we only need to check if CondNode is a successor to one of the
19673       // loads. We can further avoid this if there's no use of their chain
19674       // value.
19675       SDNode *CondNode = TheSelect->getOperand(0).getNode();
19676       Worklist.push_back(CondNode);
19677
19678       if ((LLD->hasAnyUseOfValue(1) &&
19679            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19680           (RLD->hasAnyUseOfValue(1) &&
19681            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19682         return false;
19683
19684       Addr = DAG.getSelect(SDLoc(TheSelect),
19685                            LLD->getBasePtr().getValueType(),
19686                            TheSelect->getOperand(0), LLD->getBasePtr(),
19687                            RLD->getBasePtr());
19688     } else {  // Otherwise SELECT_CC
19689       // We cannot do this optimization if any pair of {RLD, LLD} is a
19690       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19691       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19692       // one of the loads. We can further avoid this if there's no use of their
19693       // chain value.
19694
19695       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
19696       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
19697       Worklist.push_back(CondLHS);
19698       Worklist.push_back(CondRHS);
19699
19700       if ((LLD->hasAnyUseOfValue(1) &&
19701            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19702           (RLD->hasAnyUseOfValue(1) &&
19703            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19704         return false;
19705
19706       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
19707                          LLD->getBasePtr().getValueType(),
19708                          TheSelect->getOperand(0),
19709                          TheSelect->getOperand(1),
19710                          LLD->getBasePtr(), RLD->getBasePtr(),
19711                          TheSelect->getOperand(4));
19712     }
19713
19714     SDValue Load;
19715     // It is safe to replace the two loads if they have different alignments,
19716     // but the new load must be the minimum (most restrictive) alignment of the
19717     // inputs.
19718     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
19719     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
19720     if (!RLD->isInvariant())
19721       MMOFlags &= ~MachineMemOperand::MOInvariant;
19722     if (!RLD->isDereferenceable())
19723       MMOFlags &= ~MachineMemOperand::MODereferenceable;
19724     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
19725       // FIXME: Discards pointer and AA info.
19726       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
19727                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
19728                          MMOFlags);
19729     } else {
19730       // FIXME: Discards pointer and AA info.
19731       Load = DAG.getExtLoad(
19732           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
19733                                                   : LLD->getExtensionType(),
19734           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
19735           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
19736     }
19737
19738     // Users of the select now use the result of the load.
19739     CombineTo(TheSelect, Load);
19740
19741     // Users of the old loads now use the new load's chain.  We know the
19742     // old-load value is dead now.
19743     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
19744     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
19745     return true;
19746   }
19747
19748   return false;
19749 }
19750
19751 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19752 /// bitwise 'and'.
19753 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
19754                                             SDValue N1, SDValue N2, SDValue N3,
19755                                             ISD::CondCode CC) {
19756   // If this is a select where the false operand is zero and the compare is a
19757   // check of the sign bit, see if we can perform the "gzip trick":
19758   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19759   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19760   EVT XType = N0.getValueType();
19761   EVT AType = N2.getValueType();
19762   if (!isNullConstant(N3) || !XType.bitsGE(AType))
19763     return SDValue();
19764
19765   // If the comparison is testing for a positive value, we have to invert
19766   // the sign bit mask, so only do that transform if the target has a bitwise
19767   // 'and not' instruction (the invert is free).
19768   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
19769     // (X > -1) ? A : 0
19770     // (X >  0) ? X : 0 <-- This is canonical signed max.
19771     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
19772       return SDValue();
19773   } else if (CC == ISD::SETLT) {
19774     // (X <  0) ? A : 0
19775     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
19776     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
19777       return SDValue();
19778   } else {
19779     return SDValue();
19780   }
19781
19782   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
19783   // constant.
19784   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
19785   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19786   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
19787     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
19788     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
19789     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
19790     AddToWorklist(Shift.getNode());
19791
19792     if (XType.bitsGT(AType)) {
19793       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19794       AddToWorklist(Shift.getNode());
19795     }
19796
19797     if (CC == ISD::SETGT)
19798       Shift = DAG.getNOT(DL, Shift, AType);
19799
19800     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19801   }
19802
19803   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
19804   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
19805   AddToWorklist(Shift.getNode());
19806
19807   if (XType.bitsGT(AType)) {
19808     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19809     AddToWorklist(Shift.getNode());
19810   }
19811
19812   if (CC == ISD::SETGT)
19813     Shift = DAG.getNOT(DL, Shift, AType);
19814
19815   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19816 }
19817
19818 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
19819 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
19820 /// in it. This may be a win when the constant is not otherwise available
19821 /// because it replaces two constant pool loads with one.
19822 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
19823     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
19824     ISD::CondCode CC) {
19825   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
19826     return SDValue();
19827
19828   // If we are before legalize types, we want the other legalization to happen
19829   // first (for example, to avoid messing with soft float).
19830   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
19831   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
19832   EVT VT = N2.getValueType();
19833   if (!TV || !FV || !TLI.isTypeLegal(VT))
19834     return SDValue();
19835
19836   // If a constant can be materialized without loads, this does not make sense.
19837   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
19838       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
19839       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
19840     return SDValue();
19841
19842   // If both constants have multiple uses, then we won't need to do an extra
19843   // load. The values are likely around in registers for other users.
19844   if (!TV->hasOneUse() && !FV->hasOneUse())
19845     return SDValue();
19846
19847   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
19848                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
19849   Type *FPTy = Elts[0]->getType();
19850   const DataLayout &TD = DAG.getDataLayout();
19851
19852   // Create a ConstantArray of the two constants.
19853   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
19854   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
19855                                       TD.getPrefTypeAlignment(FPTy));
19856   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
19857
19858   // Get offsets to the 0 and 1 elements of the array, so we can select between
19859   // them.
19860   SDValue Zero = DAG.getIntPtrConstant(0, DL);
19861   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
19862   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
19863   SDValue Cond =
19864       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
19865   AddToWorklist(Cond.getNode());
19866   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
19867   AddToWorklist(CstOffset.getNode());
19868   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
19869   AddToWorklist(CPIdx.getNode());
19870   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
19871                      MachinePointerInfo::getConstantPool(
19872                          DAG.getMachineFunction()), Alignment);
19873 }
19874
19875 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
19876 /// where 'cond' is the comparison specified by CC.
19877 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
19878                                       SDValue N2, SDValue N3, ISD::CondCode CC,
19879                                       bool NotExtCompare) {
19880   // (x ? y : y) -> y.
19881   if (N2 == N3) return N2;
19882
19883   EVT CmpOpVT = N0.getValueType();
19884   EVT CmpResVT = getSetCCResultType(CmpOpVT);
19885   EVT VT = N2.getValueType();
19886   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
19887   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19888   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
19889
19890   // Determine if the condition we're dealing with is constant.
19891   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
19892     AddToWorklist(SCC.getNode());
19893     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
19894       // fold select_cc true, x, y -> x
19895       // fold select_cc false, x, y -> y
19896       return !(SCCC->isNullValue()) ? N2 : N3;
19897     }
19898   }
19899
19900   if (SDValue V =
19901           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
19902     return V;
19903
19904   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
19905     return V;
19906
19907   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
19908   // where y is has a single bit set.
19909   // A plaintext description would be, we can turn the SELECT_CC into an AND
19910   // when the condition can be materialized as an all-ones register.  Any
19911   // single bit-test can be materialized as an all-ones register with
19912   // shift-left and shift-right-arith.
19913   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
19914       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
19915     SDValue AndLHS = N0->getOperand(0);
19916     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
19917     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
19918       // Shift the tested bit over the sign bit.
19919       const APInt &AndMask = ConstAndRHS->getAPIntValue();
19920       SDValue ShlAmt =
19921         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
19922                         getShiftAmountTy(AndLHS.getValueType()));
19923       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
19924
19925       // Now arithmetic right shift it all the way over, so the result is either
19926       // all-ones, or zero.
19927       SDValue ShrAmt =
19928         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
19929                         getShiftAmountTy(Shl.getValueType()));
19930       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
19931
19932       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
19933     }
19934   }
19935
19936   // fold select C, 16, 0 -> shl C, 4
19937   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
19938   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
19939
19940   if ((Fold || Swap) &&
19941       TLI.getBooleanContents(CmpOpVT) ==
19942           TargetLowering::ZeroOrOneBooleanContent &&
19943       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
19944
19945     if (Swap) {
19946       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
19947       std::swap(N2C, N3C);
19948     }
19949
19950     // If the caller doesn't want us to simplify this into a zext of a compare,
19951     // don't do it.
19952     if (NotExtCompare && N2C->isOne())
19953       return SDValue();
19954
19955     SDValue Temp, SCC;
19956     // zext (setcc n0, n1)
19957     if (LegalTypes) {
19958       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
19959       if (VT.bitsLT(SCC.getValueType()))
19960         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
19961       else
19962         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
19963     } else {
19964       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
19965       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
19966     }
19967
19968     AddToWorklist(SCC.getNode());
19969     AddToWorklist(Temp.getNode());
19970
19971     if (N2C->isOne())
19972       return Temp;
19973
19974     // shl setcc result by log2 n2c
19975     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
19976                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
19977                                        SDLoc(Temp),
19978                                        getShiftAmountTy(Temp.getValueType())));
19979   }
19980
19981   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
19982   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
19983   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
19984   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
19985   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
19986   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
19987   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
19988   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
19989   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
19990     SDValue ValueOnZero = N2;
19991     SDValue Count = N3;
19992     // If the condition is NE instead of E, swap the operands.
19993     if (CC == ISD::SETNE)
19994       std::swap(ValueOnZero, Count);
19995     // Check if the value on zero is a constant equal to the bits in the type.
19996     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
19997       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
19998         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
19999         // legal, combine to just cttz.
20000         if ((Count.getOpcode() == ISD::CTTZ ||
20001              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
20002             N0 == Count.getOperand(0) &&
20003             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
20004           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
20005         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
20006         // legal, combine to just ctlz.
20007         if ((Count.getOpcode() == ISD::CTLZ ||
20008              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
20009             N0 == Count.getOperand(0) &&
20010             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
20011           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
20012       }
20013     }
20014   }
20015
20016   return SDValue();
20017 }
20018
20019 /// This is a stub for TargetLowering::SimplifySetCC.
20020 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
20021                                    ISD::CondCode Cond, const SDLoc &DL,
20022                                    bool foldBooleans) {
20023   TargetLowering::DAGCombinerInfo
20024     DagCombineInfo(DAG, Level, false, this);
20025   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
20026 }
20027
20028 /// Given an ISD::SDIV node expressing a divide by constant, return
20029 /// a DAG expression to select that will generate the same value by multiplying
20030 /// by a magic number.
20031 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20032 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
20033   // when optimising for minimum size, we don't want to expand a div to a mul
20034   // and a shift.
20035   if (DAG.getMachineFunction().getFunction().hasMinSize())
20036     return SDValue();
20037
20038   SmallVector<SDNode *, 8> Built;
20039   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
20040     for (SDNode *N : Built)
20041       AddToWorklist(N);
20042     return S;
20043   }
20044
20045   return SDValue();
20046 }
20047
20048 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
20049 /// DAG expression that will generate the same value by right shifting.
20050 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
20051   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
20052   if (!C)
20053     return SDValue();
20054
20055   // Avoid division by zero.
20056   if (C->isNullValue())
20057     return SDValue();
20058
20059   SmallVector<SDNode *, 8> Built;
20060   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
20061     for (SDNode *N : Built)
20062       AddToWorklist(N);
20063     return S;
20064   }
20065
20066   return SDValue();
20067 }
20068
20069 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
20070 /// expression that will generate the same value by multiplying by a magic
20071 /// number.
20072 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20073 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
20074   // when optimising for minimum size, we don't want to expand a div to a mul
20075   // and a shift.
20076   if (DAG.getMachineFunction().getFunction().hasMinSize())
20077     return SDValue();
20078
20079   SmallVector<SDNode *, 8> Built;
20080   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
20081     for (SDNode *N : Built)
20082       AddToWorklist(N);
20083     return S;
20084   }
20085
20086   return SDValue();
20087 }
20088
20089 /// Determines the LogBase2 value for a non-null input value using the
20090 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
20091 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
20092   EVT VT = V.getValueType();
20093   unsigned EltBits = VT.getScalarSizeInBits();
20094   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
20095   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
20096   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
20097   return LogBase2;
20098 }
20099
20100 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20101 /// For the reciprocal, we need to find the zero of the function:
20102 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
20103 ///     =>
20104 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
20105 ///     does not require additional intermediate precision]
20106 /// For the last iteration, put numerator N into it to gain more precision:
20107 ///   Result = N X_i + X_i (N - N A X_i)
20108 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
20109                                       SDNodeFlags Flags) {
20110   if (Level >= AfterLegalizeDAG)
20111     return SDValue();
20112
20113   // TODO: Handle half and/or extended types?
20114   EVT VT = Op.getValueType();
20115   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20116     return SDValue();
20117
20118   // If estimates are explicitly disabled for this function, we're done.
20119   MachineFunction &MF = DAG.getMachineFunction();
20120   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
20121   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20122     return SDValue();
20123
20124   // Estimates may be explicitly enabled for this type with a custom number of
20125   // refinement steps.
20126   int Iterations = TLI.getDivRefinementSteps(VT, MF);
20127   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
20128     AddToWorklist(Est.getNode());
20129
20130     SDLoc DL(Op);
20131     if (Iterations) {
20132       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
20133
20134       // Newton iterations: Est = Est + Est (N - Arg * Est)
20135       // If this is the last iteration, also multiply by the numerator.
20136       for (int i = 0; i < Iterations; ++i) {
20137         SDValue MulEst = Est;
20138
20139         if (i == Iterations - 1) {
20140           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
20141           AddToWorklist(MulEst.getNode());
20142         }
20143
20144         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
20145         AddToWorklist(NewEst.getNode());
20146
20147         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
20148                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
20149         AddToWorklist(NewEst.getNode());
20150
20151         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20152         AddToWorklist(NewEst.getNode());
20153
20154         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
20155         AddToWorklist(Est.getNode());
20156       }
20157     } else {
20158       // If no iterations are available, multiply with N.
20159       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
20160       AddToWorklist(Est.getNode());
20161     }
20162
20163     return Est;
20164   }
20165
20166   return SDValue();
20167 }
20168
20169 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20170 /// For the reciprocal sqrt, we need to find the zero of the function:
20171 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20172 ///     =>
20173 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
20174 /// As a result, we precompute A/2 prior to the iteration loop.
20175 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
20176                                          unsigned Iterations,
20177                                          SDNodeFlags Flags, bool Reciprocal) {
20178   EVT VT = Arg.getValueType();
20179   SDLoc DL(Arg);
20180   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
20181
20182   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
20183   // this entire sequence requires only one FP constant.
20184   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
20185   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
20186
20187   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
20188   for (unsigned i = 0; i < Iterations; ++i) {
20189     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
20190     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
20191     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
20192     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20193   }
20194
20195   // If non-reciprocal square root is requested, multiply the result by Arg.
20196   if (!Reciprocal)
20197     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
20198
20199   return Est;
20200 }
20201
20202 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20203 /// For the reciprocal sqrt, we need to find the zero of the function:
20204 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20205 ///     =>
20206 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
20207 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
20208                                          unsigned Iterations,
20209                                          SDNodeFlags Flags, bool Reciprocal) {
20210   EVT VT = Arg.getValueType();
20211   SDLoc DL(Arg);
20212   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
20213   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
20214
20215   // This routine must enter the loop below to work correctly
20216   // when (Reciprocal == false).
20217   assert(Iterations > 0);
20218
20219   // Newton iterations for reciprocal square root:
20220   // E = (E * -0.5) * ((A * E) * E + -3.0)
20221   for (unsigned i = 0; i < Iterations; ++i) {
20222     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
20223     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
20224     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
20225
20226     // When calculating a square root at the last iteration build:
20227     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
20228     // (notice a common subexpression)
20229     SDValue LHS;
20230     if (Reciprocal || (i + 1) < Iterations) {
20231       // RSQRT: LHS = (E * -0.5)
20232       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
20233     } else {
20234       // SQRT: LHS = (A * E) * -0.5
20235       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
20236     }
20237
20238     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
20239   }
20240
20241   return Est;
20242 }
20243
20244 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
20245 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
20246 /// Op can be zero.
20247 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
20248                                            bool Reciprocal) {
20249   if (Level >= AfterLegalizeDAG)
20250     return SDValue();
20251
20252   // TODO: Handle half and/or extended types?
20253   EVT VT = Op.getValueType();
20254   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20255     return SDValue();
20256
20257   // If estimates are explicitly disabled for this function, we're done.
20258   MachineFunction &MF = DAG.getMachineFunction();
20259   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
20260   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20261     return SDValue();
20262
20263   // Estimates may be explicitly enabled for this type with a custom number of
20264   // refinement steps.
20265   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
20266
20267   bool UseOneConstNR = false;
20268   if (SDValue Est =
20269       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
20270                           Reciprocal)) {
20271     AddToWorklist(Est.getNode());
20272
20273     if (Iterations) {
20274       Est = UseOneConstNR
20275             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
20276             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
20277
20278       if (!Reciprocal) {
20279         // The estimate is now completely wrong if the input was exactly 0.0 or
20280         // possibly a denormal. Force the answer to 0.0 for those cases.
20281         SDLoc DL(Op);
20282         EVT CCVT = getSetCCResultType(VT);
20283         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
20284         const Function &F = DAG.getMachineFunction().getFunction();
20285         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
20286         if (Denorms.getValueAsString().equals("ieee")) {
20287           // fabs(X) < SmallestNormal ? 0.0 : Est
20288           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
20289           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
20290           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
20291           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20292           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
20293           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
20294           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
20295         } else {
20296           // X == 0.0 ? 0.0 : Est
20297           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20298           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
20299           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
20300         }
20301       }
20302     }
20303     return Est;
20304   }
20305
20306   return SDValue();
20307 }
20308
20309 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20310   return buildSqrtEstimateImpl(Op, Flags, true);
20311 }
20312
20313 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20314   return buildSqrtEstimateImpl(Op, Flags, false);
20315 }
20316
20317 /// Return true if there is any possibility that the two addresses overlap.
20318 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
20319
20320   struct MemUseCharacteristics {
20321     bool IsVolatile;
20322     bool IsAtomic;
20323     SDValue BasePtr;
20324     int64_t Offset;
20325     Optional<int64_t> NumBytes;
20326     MachineMemOperand *MMO;
20327   };
20328
20329   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
20330     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
20331       int64_t Offset = 0;
20332       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
20333         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
20334                      ? C->getSExtValue()
20335                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
20336                            ? -1 * C->getSExtValue()
20337                            : 0;
20338       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
20339               Offset /*base offset*/,
20340               Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
20341               LSN->getMemOperand()};
20342     }
20343     if (const auto *LN = cast<LifetimeSDNode>(N))
20344       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
20345               (LN->hasOffset()) ? LN->getOffset() : 0,
20346               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
20347                                 : Optional<int64_t>(),
20348               (MachineMemOperand *)nullptr};
20349     // Default.
20350     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
20351             (int64_t)0 /*offset*/,
20352             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
20353   };
20354
20355   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
20356                         MUC1 = getCharacteristics(Op1);
20357
20358   // If they are to the same address, then they must be aliases.
20359   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
20360       MUC0.Offset == MUC1.Offset)
20361     return true;
20362
20363   // If they are both volatile then they cannot be reordered.
20364   if (MUC0.IsVolatile && MUC1.IsVolatile)
20365     return true;
20366
20367   // Be conservative about atomics for the moment
20368   // TODO: This is way overconservative for unordered atomics (see D66309)
20369   if (MUC0.IsAtomic && MUC1.IsAtomic)
20370     return true;
20371
20372   if (MUC0.MMO && MUC1.MMO) {
20373     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20374         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20375       return false;
20376   }
20377
20378   // Try to prove that there is aliasing, or that there is no aliasing. Either
20379   // way, we can return now. If nothing can be proved, proceed with more tests.
20380   bool IsAlias;
20381   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
20382                                        DAG, IsAlias))
20383     return IsAlias;
20384
20385   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
20386   // either are not known.
20387   if (!MUC0.MMO || !MUC1.MMO)
20388     return true;
20389
20390   // If one operation reads from invariant memory, and the other may store, they
20391   // cannot alias. These should really be checking the equivalent of mayWrite,
20392   // but it only matters for memory nodes other than load /store.
20393   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20394       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20395     return false;
20396
20397   // If we know required SrcValue1 and SrcValue2 have relatively large
20398   // alignment compared to the size and offset of the access, we may be able
20399   // to prove they do not alias. This check is conservative for now to catch
20400   // cases created by splitting vector types.
20401   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
20402   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
20403   unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
20404   unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
20405   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
20406       MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
20407       *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
20408     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
20409     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
20410
20411     // There is no overlap between these relatively aligned accesses of
20412     // similar size. Return no alias.
20413     if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
20414         (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
20415       return false;
20416   }
20417
20418   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
20419                    ? CombinerGlobalAA
20420                    : DAG.getSubtarget().useAA();
20421 #ifndef NDEBUG
20422   if (CombinerAAOnlyFunc.getNumOccurrences() &&
20423       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
20424     UseAA = false;
20425 #endif
20426
20427   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
20428     // Use alias analysis information.
20429     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
20430     int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
20431     int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
20432     AliasResult AAResult = AA->alias(
20433         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
20434                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
20435         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
20436                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
20437     if (AAResult == NoAlias)
20438       return false;
20439   }
20440
20441   // Otherwise we have to assume they alias.
20442   return true;
20443 }
20444
20445 /// Walk up chain skipping non-aliasing memory nodes,
20446 /// looking for aliasing nodes and adding them to the Aliases vector.
20447 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
20448                                    SmallVectorImpl<SDValue> &Aliases) {
20449   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
20450   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
20451
20452   // Get alias information for node.
20453   // TODO: relax aliasing for unordered atomics (see D66309)
20454   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
20455
20456   // Starting off.
20457   Chains.push_back(OriginalChain);
20458   unsigned Depth = 0;
20459
20460   // Attempt to improve chain by a single step
20461   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
20462     switch (C.getOpcode()) {
20463     case ISD::EntryToken:
20464       // No need to mark EntryToken.
20465       C = SDValue();
20466       return true;
20467     case ISD::LOAD:
20468     case ISD::STORE: {
20469       // Get alias information for C.
20470       // TODO: Relax aliasing for unordered atomics (see D66309)
20471       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
20472                       cast<LSBaseSDNode>(C.getNode())->isSimple();
20473       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
20474         // Look further up the chain.
20475         C = C.getOperand(0);
20476         return true;
20477       }
20478       // Alias, so stop here.
20479       return false;
20480     }
20481
20482     case ISD::CopyFromReg:
20483       // Always forward past past CopyFromReg.
20484       C = C.getOperand(0);
20485       return true;
20486
20487     case ISD::LIFETIME_START:
20488     case ISD::LIFETIME_END: {
20489       // We can forward past any lifetime start/end that can be proven not to
20490       // alias the memory access.
20491       if (!isAlias(N, C.getNode())) {
20492         // Look further up the chain.
20493         C = C.getOperand(0);
20494         return true;
20495       }
20496       return false;
20497     }
20498     default:
20499       return false;
20500     }
20501   };
20502
20503   // Look at each chain and determine if it is an alias.  If so, add it to the
20504   // aliases list.  If not, then continue up the chain looking for the next
20505   // candidate.
20506   while (!Chains.empty()) {
20507     SDValue Chain = Chains.pop_back_val();
20508
20509     // Don't bother if we've seen Chain before.
20510     if (!Visited.insert(Chain.getNode()).second)
20511       continue;
20512
20513     // For TokenFactor nodes, look at each operand and only continue up the
20514     // chain until we reach the depth limit.
20515     //
20516     // FIXME: The depth check could be made to return the last non-aliasing
20517     // chain we found before we hit a tokenfactor rather than the original
20518     // chain.
20519     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
20520       Aliases.clear();
20521       Aliases.push_back(OriginalChain);
20522       return;
20523     }
20524
20525     if (Chain.getOpcode() == ISD::TokenFactor) {
20526       // We have to check each of the operands of the token factor for "small"
20527       // token factors, so we queue them up.  Adding the operands to the queue
20528       // (stack) in reverse order maintains the original order and increases the
20529       // likelihood that getNode will find a matching token factor (CSE.)
20530       if (Chain.getNumOperands() > 16) {
20531         Aliases.push_back(Chain);
20532         continue;
20533       }
20534       for (unsigned n = Chain.getNumOperands(); n;)
20535         Chains.push_back(Chain.getOperand(--n));
20536       ++Depth;
20537       continue;
20538     }
20539     // Everything else
20540     if (ImproveChain(Chain)) {
20541       // Updated Chain Found, Consider new chain if one exists.
20542       if (Chain.getNode())
20543         Chains.push_back(Chain);
20544       ++Depth;
20545       continue;
20546     }
20547     // No Improved Chain Possible, treat as Alias.
20548     Aliases.push_back(Chain);
20549   }
20550 }
20551
20552 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
20553 /// (aliasing node.)
20554 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
20555   if (OptLevel == CodeGenOpt::None)
20556     return OldChain;
20557
20558   // Ops for replacing token factor.
20559   SmallVector<SDValue, 8> Aliases;
20560
20561   // Accumulate all the aliases to this node.
20562   GatherAllAliases(N, OldChain, Aliases);
20563
20564   // If no operands then chain to entry token.
20565   if (Aliases.size() == 0)
20566     return DAG.getEntryNode();
20567
20568   // If a single operand then chain to it.  We don't need to revisit it.
20569   if (Aliases.size() == 1)
20570     return Aliases[0];
20571
20572   // Construct a custom tailored token factor.
20573   return DAG.getTokenFactor(SDLoc(N), Aliases);
20574 }
20575
20576 namespace {
20577 // TODO: Replace with with std::monostate when we move to C++17.
20578 struct UnitT { } Unit;
20579 bool operator==(const UnitT &, const UnitT &) { return true; }
20580 bool operator!=(const UnitT &, const UnitT &) { return false; }
20581 } // namespace
20582
20583 // This function tries to collect a bunch of potentially interesting
20584 // nodes to improve the chains of, all at once. This might seem
20585 // redundant, as this function gets called when visiting every store
20586 // node, so why not let the work be done on each store as it's visited?
20587 //
20588 // I believe this is mainly important because MergeConsecutiveStores
20589 // is unable to deal with merging stores of different sizes, so unless
20590 // we improve the chains of all the potential candidates up-front
20591 // before running MergeConsecutiveStores, it might only see some of
20592 // the nodes that will eventually be candidates, and then not be able
20593 // to go from a partially-merged state to the desired final
20594 // fully-merged state.
20595
20596 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
20597   SmallVector<StoreSDNode *, 8> ChainedStores;
20598   StoreSDNode *STChain = St;
20599   // Intervals records which offsets from BaseIndex have been covered. In
20600   // the common case, every store writes to the immediately previous address
20601   // space and thus merged with the previous interval at insertion time.
20602
20603   using IMap =
20604       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
20605   IMap::Allocator A;
20606   IMap Intervals(A);
20607
20608   // This holds the base pointer, index, and the offset in bytes from the base
20609   // pointer.
20610   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20611
20612   // We must have a base and an offset.
20613   if (!BasePtr.getBase().getNode())
20614     return false;
20615
20616   // Do not handle stores to undef base pointers.
20617   if (BasePtr.getBase().isUndef())
20618     return false;
20619
20620   // Add ST's interval.
20621   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
20622
20623   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
20624     // If the chain has more than one use, then we can't reorder the mem ops.
20625     if (!SDValue(Chain, 0)->hasOneUse())
20626       break;
20627     // TODO: Relax for unordered atomics (see D66309)
20628     if (!Chain->isSimple() || Chain->isIndexed())
20629       break;
20630
20631     // Find the base pointer and offset for this memory node.
20632     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
20633     // Check that the base pointer is the same as the original one.
20634     int64_t Offset;
20635     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
20636       break;
20637     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
20638     // Make sure we don't overlap with other intervals by checking the ones to
20639     // the left or right before inserting.
20640     auto I = Intervals.find(Offset);
20641     // If there's a next interval, we should end before it.
20642     if (I != Intervals.end() && I.start() < (Offset + Length))
20643       break;
20644     // If there's a previous interval, we should start after it.
20645     if (I != Intervals.begin() && (--I).stop() <= Offset)
20646       break;
20647     Intervals.insert(Offset, Offset + Length, Unit);
20648
20649     ChainedStores.push_back(Chain);
20650     STChain = Chain;
20651   }
20652
20653   // If we didn't find a chained store, exit.
20654   if (ChainedStores.size() == 0)
20655     return false;
20656
20657   // Improve all chained stores (St and ChainedStores members) starting from
20658   // where the store chain ended and return single TokenFactor.
20659   SDValue NewChain = STChain->getChain();
20660   SmallVector<SDValue, 8> TFOps;
20661   for (unsigned I = ChainedStores.size(); I;) {
20662     StoreSDNode *S = ChainedStores[--I];
20663     SDValue BetterChain = FindBetterChain(S, NewChain);
20664     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
20665         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
20666     TFOps.push_back(SDValue(S, 0));
20667     ChainedStores[I] = S;
20668   }
20669
20670   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20671   SDValue BetterChain = FindBetterChain(St, NewChain);
20672   SDValue NewST;
20673   if (St->isTruncatingStore())
20674     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
20675                               St->getBasePtr(), St->getMemoryVT(),
20676                               St->getMemOperand());
20677   else
20678     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
20679                          St->getBasePtr(), St->getMemOperand());
20680
20681   TFOps.push_back(NewST);
20682
20683   // If we improved every element of TFOps, then we've lost the dependence on
20684   // NewChain to successors of St and we need to add it back to TFOps. Do so at
20685   // the beginning to keep relative order consistent with FindBetterChains.
20686   auto hasImprovedChain = [&](SDValue ST) -> bool {
20687     return ST->getOperand(0) != NewChain;
20688   };
20689   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
20690   if (AddNewChain)
20691     TFOps.insert(TFOps.begin(), NewChain);
20692
20693   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
20694   CombineTo(St, TF);
20695
20696   // Add TF and its operands to the worklist.
20697   AddToWorklist(TF.getNode());
20698   for (const SDValue &Op : TF->ops())
20699     AddToWorklist(Op.getNode());
20700   AddToWorklist(STChain);
20701   return true;
20702 }
20703
20704 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
20705   if (OptLevel == CodeGenOpt::None)
20706     return false;
20707
20708   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20709
20710   // We must have a base and an offset.
20711   if (!BasePtr.getBase().getNode())
20712     return false;
20713
20714   // Do not handle stores to undef base pointers.
20715   if (BasePtr.getBase().isUndef())
20716     return false;
20717
20718   // Directly improve a chain of disjoint stores starting at St.
20719   if (parallelizeChainedStores(St))
20720     return true;
20721
20722   // Improve St's Chain..
20723   SDValue BetterChain = FindBetterChain(St, St->getChain());
20724   if (St->getChain() != BetterChain) {
20725     replaceStoreChain(St, BetterChain);
20726     return true;
20727   }
20728   return false;
20729 }
20730
20731 /// This is the entry point for the file.
20732 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
20733                            CodeGenOpt::Level OptLevel) {
20734   /// This is the main entry point to this class.
20735   DAGCombiner(*this, AA, OptLevel).Run(Level);
20736 }