lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallBitVector.h"
  28 #include "llvm/ADT/SmallPtrSet.h"
  29 #include "llvm/ADT/SmallSet.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/ADT/Statistic.h"
  32 #include "llvm/Analysis/AliasAnalysis.h"
  33 #include "llvm/Analysis/MemoryLocation.h"
  34 #include "llvm/CodeGen/DAGCombine.h"
  35 #include "llvm/CodeGen/ISDOpcodes.h"
  36 #include "llvm/CodeGen/MachineFrameInfo.h"
  37 #include "llvm/CodeGen/MachineFunction.h"
  38 #include "llvm/CodeGen/MachineMemOperand.h"
  39 #include "llvm/CodeGen/RuntimeLibcalls.h"
  40 #include "llvm/CodeGen/SelectionDAG.h"
  41 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  42 #include "llvm/CodeGen/SelectionDAGNodes.h"
  43 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  44 #include "llvm/CodeGen/TargetLowering.h"
  45 #include "llvm/CodeGen/TargetRegisterInfo.h"
  46 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  47 #include "llvm/CodeGen/ValueTypes.h"
  48 #include "llvm/IR/Attributes.h"
  49 #include "llvm/IR/Constant.h"
  50 #include "llvm/IR/DataLayout.h"
  51 #include "llvm/IR/DerivedTypes.h"
  52 #include "llvm/IR/Function.h"
  53 #include "llvm/IR/LLVMContext.h"
  54 #include "llvm/IR/Metadata.h"
  55 #include "llvm/Support/Casting.h"
  56 #include "llvm/Support/CodeGen.h"
  57 #include "llvm/Support/CommandLine.h"
  58 #include "llvm/Support/Compiler.h"
  59 #include "llvm/Support/Debug.h"
  60 #include "llvm/Support/ErrorHandling.h"
  61 #include "llvm/Support/KnownBits.h"
  62 #include "llvm/Support/MachineValueType.h"
  63 #include "llvm/Support/MathExtras.h"
  64 #include "llvm/Support/raw_ostream.h"
  65 #include "llvm/Target/TargetMachine.h"
  66 #include "llvm/Target/TargetOptions.h"
  67 #include <algorithm>
  68 #include <cassert>
  69 #include <cstdint>
  70 #include <functional>
  71 #include <iterator>
  72 #include <string>
  73 #include <tuple>
  74 #include <utility>
  75
  76 using namespace llvm;
  77
  78 #define DEBUG_TYPE "dagcombine"
  79
  80 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  81 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  82 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  83 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  84 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  85 STATISTIC(SlicedLoads, "Number of load sliced");
  86 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  87
  88 static cl::opt<bool>
  89 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  90                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  91
  92 static cl::opt<bool>
  93 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  94         cl::desc("Enable DAG combiner's use of TBAA"));
  95
  96 #ifndef NDEBUG
  97 static cl::opt<std::string>
  98 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  99                    cl::desc("Only use DAG-combiner alias analysis in this"
 100                             " function"));
 101 #endif
 102
 103 /// Hidden option to stress test load slicing, i.e., when this option
 104 /// is enabled, load slicing bypasses most of its profitability guards.
 105 static cl::opt<bool>
 106 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 107                   cl::desc("Bypass the profitability model of load slicing"),
 108                   cl::init(false));
 109
 110 static cl::opt<bool>
 111   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 112                     cl::desc("DAG combiner may split indexing from loads"));
 113
 114 namespace {
 115
 116   class DAGCombiner {
 117     SelectionDAG &DAG;
 118     const TargetLowering &TLI;
 119     CombineLevel Level;
 120     CodeGenOpt::Level OptLevel;
 121     bool LegalOperations = false;
 122     bool LegalTypes = false;
 123     bool ForCodeSize;
 124
 125     /// Worklist of all of the nodes that need to be simplified.
 126     ///
 127     /// This must behave as a stack -- new nodes to process are pushed onto the
 128     /// back and when processing we pop off of the back.
 129     ///
 130     /// The worklist will not contain duplicates but may contain null entries
 131     /// due to nodes being deleted from the underlying DAG.
 132     SmallVector<SDNode *, 64> Worklist;
 133
 134     /// Mapping from an SDNode to its position on the worklist.
 135     ///
 136     /// This is used to find and remove nodes from the worklist (by nulling
 137     /// them) when they are deleted from the underlying DAG. It relies on
 138     /// stable indices of nodes within the worklist.
 139     DenseMap<SDNode *, unsigned> WorklistMap;
 140     /// This records all nodes attempted to add to the worklist since we
 141     /// considered a new worklist entry. As we keep do not add duplicate nodes
 142     /// in the worklist, this is different from the tail of the worklist.
 143     SmallSetVector<SDNode *, 32> PruningList;
 144
 145     /// Set of nodes which have been combined (at least once).
 146     ///
 147     /// This is used to allow us to reliably add any operands of a DAG node
 148     /// which have not yet been combined to the worklist.
 149     SmallPtrSet<SDNode *, 32> CombinedNodes;
 150
 151     // AA - Used for DAG load/store alias analysis.
 152     AliasAnalysis *AA;
 153
 154     /// When an instruction is simplified, add all users of the instruction to
 155     /// the work lists because they might get more simplified now.
 156     void AddUsersToWorklist(SDNode *N) {
 157       for (SDNode *Node : N->uses())
 158         AddToWorklist(Node);
 159     }
 160
 161     // Prune potentially dangling nodes. This is called after
 162     // any visit to a node, but should also be called during a visit after any
 163     // failed combine which may have created a DAG node.
 164     void clearAddedDanglingWorklistEntries() {
 165       // Check any nodes added to the worklist to see if they are prunable.
 166       while (!PruningList.empty()) {
 167         auto *N = PruningList.pop_back_val();
 168         if (N->use_empty())
 169           recursivelyDeleteUnusedNodes(N);
 170       }
 171     }
 172
 173     SDNode *getNextWorklistEntry() {
 174       // Before we do any work, remove nodes that are not in use.
 175       clearAddedDanglingWorklistEntries();
 176       SDNode *N = nullptr;
 177       // The Worklist holds the SDNodes in order, but it may contain null
 178       // entries.
 179       while (!N && !Worklist.empty()) {
 180         N = Worklist.pop_back_val();
 181       }
 182
 183       if (N) {
 184         bool GoodWorklistEntry = WorklistMap.erase(N);
 185         (void)GoodWorklistEntry;
 186         assert(GoodWorklistEntry &&
 187                "Found a worklist entry without a corresponding map entry!");
 188       }
 189       return N;
 190     }
 191
 192     /// Call the node-specific routine that folds each particular type of node.
 193     SDValue visit(SDNode *N);
 194
 195   public:
 196     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 197         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 198           OptLevel(OL), AA(AA) {
 199       ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
 200
 201       MaximumLegalStoreInBits = 0;
 202       for (MVT VT : MVT::all_valuetypes())
 203         if (EVT(VT).isSimple() && VT != MVT::Other &&
 204             TLI.isTypeLegal(EVT(VT)) &&
 205             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 206           MaximumLegalStoreInBits = VT.getSizeInBits();
 207     }
 208
 209     void ConsiderForPruning(SDNode *N) {
 210       // Mark this for potential pruning.
 211       PruningList.insert(N);
 212     }
 213
 214     /// Add to the worklist making sure its instance is at the back (next to be
 215     /// processed.)
 216     void AddToWorklist(SDNode *N) {
 217       assert(N->getOpcode() != ISD::DELETED_NODE &&
 218              "Deleted Node added to Worklist");
 219
 220       // Skip handle nodes as they can't usefully be combined and confuse the
 221       // zero-use deletion strategy.
 222       if (N->getOpcode() == ISD::HANDLENODE)
 223         return;
 224
 225       ConsiderForPruning(N);
 226
 227       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 228         Worklist.push_back(N);
 229     }
 230
 231     /// Remove all instances of N from the worklist.
 232     void removeFromWorklist(SDNode *N) {
 233       CombinedNodes.erase(N);
 234       PruningList.remove(N);
 235
 236       auto It = WorklistMap.find(N);
 237       if (It == WorklistMap.end())
 238         return; // Not in the worklist.
 239
 240       // Null out the entry rather than erasing it to avoid a linear operation.
 241       Worklist[It->second] = nullptr;
 242       WorklistMap.erase(It);
 243     }
 244
 245     void deleteAndRecombine(SDNode *N);
 246     bool recursivelyDeleteUnusedNodes(SDNode *N);
 247
 248     /// Replaces all uses of the results of one DAG node with new values.
 249     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 250                       bool AddTo = true);
 251
 252     /// Replaces all uses of the results of one DAG node with new values.
 253     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 254       return CombineTo(N, &Res, 1, AddTo);
 255     }
 256
 257     /// Replaces all uses of the results of one DAG node with new values.
 258     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 259                       bool AddTo = true) {
 260       SDValue To[] = { Res0, Res1 };
 261       return CombineTo(N, To, 2, AddTo);
 262     }
 263
 264     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 265
 266   private:
 267     unsigned MaximumLegalStoreInBits;
 268
 269     /// Check the specified integer node value to see if it can be simplified or
 270     /// if things it uses can be simplified by bit propagation.
 271     /// If so, return true.
 272     bool SimplifyDemandedBits(SDValue Op) {
 273       unsigned BitWidth = Op.getScalarValueSizeInBits();
 274       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
 275       return SimplifyDemandedBits(Op, DemandedBits);
 276     }
 277
 278     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 279       EVT VT = Op.getValueType();
 280       unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
 281       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 282       return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
 283     }
 284
 285     /// Check the specified vector node value to see if it can be simplified or
 286     /// if things it uses can be simplified as it only uses some of the
 287     /// elements. If so, return true.
 288     bool SimplifyDemandedVectorElts(SDValue Op) {
 289       unsigned NumElts = Op.getValueType().getVectorNumElements();
 290       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 291       return SimplifyDemandedVectorElts(Op, DemandedElts);
 292     }
 293
 294     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 295                               const APInt &DemandedElts);
 296     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 297                                     bool AssumeSingleUse = false);
 298
 299     bool CombineToPreIndexedLoadStore(SDNode *N);
 300     bool CombineToPostIndexedLoadStore(SDNode *N);
 301     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 302     bool SliceUpLoad(SDNode *N);
 303
 304     // Scalars have size 0 to distinguish from singleton vectors.
 305     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 306     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 307     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 308
 309     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 310     ///   load.
 311     ///
 312     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 313     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 314     /// \param EltNo index of the vector element to load.
 315     /// \param OriginalLoad load that EVE came from to be replaced.
 316     /// \returns EVE on success SDValue() on failure.
 317     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 318                                          SDValue EltNo,
 319                                          LoadSDNode *OriginalLoad);
 320     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 321     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 322     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 323     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 324     SDValue PromoteIntBinOp(SDValue Op);
 325     SDValue PromoteIntShiftOp(SDValue Op);
 326     SDValue PromoteExtend(SDValue Op);
 327     bool PromoteLoad(SDValue Op);
 328
 329     /// Call the node-specific routine that knows how to fold each
 330     /// particular type of node. If that doesn't do anything, try the
 331     /// target-specific DAG combines.
 332     SDValue combine(SDNode *N);
 333
 334     // Visitation implementation - Implement dag node combining for different
 335     // node types.  The semantics are as follows:
 336     // Return Value:
 337     //   SDValue.getNode() == 0 - No change was made
 338     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 339     //   otherwise              - N should be replaced by the returned Operand.
 340     //
 341     SDValue visitTokenFactor(SDNode *N);
 342     SDValue visitMERGE_VALUES(SDNode *N);
 343     SDValue visitADD(SDNode *N);
 344     SDValue visitADDLike(SDNode *N);
 345     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 346     SDValue visitSUB(SDNode *N);
 347     SDValue visitADDSAT(SDNode *N);
 348     SDValue visitSUBSAT(SDNode *N);
 349     SDValue visitADDC(SDNode *N);
 350     SDValue visitADDO(SDNode *N);
 351     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 352     SDValue visitSUBC(SDNode *N);
 353     SDValue visitSUBO(SDNode *N);
 354     SDValue visitADDE(SDNode *N);
 355     SDValue visitADDCARRY(SDNode *N);
 356     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 357     SDValue visitSUBE(SDNode *N);
 358     SDValue visitSUBCARRY(SDNode *N);
 359     SDValue visitMUL(SDNode *N);
 360     SDValue useDivRem(SDNode *N);
 361     SDValue visitSDIV(SDNode *N);
 362     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 363     SDValue visitUDIV(SDNode *N);
 364     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 365     SDValue visitREM(SDNode *N);
 366     SDValue visitMULHU(SDNode *N);
 367     SDValue visitMULHS(SDNode *N);
 368     SDValue visitSMUL_LOHI(SDNode *N);
 369     SDValue visitUMUL_LOHI(SDNode *N);
 370     SDValue visitMULO(SDNode *N);
 371     SDValue visitIMINMAX(SDNode *N);
 372     SDValue visitAND(SDNode *N);
 373     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 374     SDValue visitOR(SDNode *N);
 375     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 376     SDValue visitXOR(SDNode *N);
 377     SDValue SimplifyVBinOp(SDNode *N);
 378     SDValue visitSHL(SDNode *N);
 379     SDValue visitSRA(SDNode *N);
 380     SDValue visitSRL(SDNode *N);
 381     SDValue visitFunnelShift(SDNode *N);
 382     SDValue visitRotate(SDNode *N);
 383     SDValue visitABS(SDNode *N);
 384     SDValue visitBSWAP(SDNode *N);
 385     SDValue visitBITREVERSE(SDNode *N);
 386     SDValue visitCTLZ(SDNode *N);
 387     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 388     SDValue visitCTTZ(SDNode *N);
 389     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 390     SDValue visitCTPOP(SDNode *N);
 391     SDValue visitSELECT(SDNode *N);
 392     SDValue visitVSELECT(SDNode *N);
 393     SDValue visitSELECT_CC(SDNode *N);
 394     SDValue visitSETCC(SDNode *N);
 395     SDValue visitSETCCCARRY(SDNode *N);
 396     SDValue visitSIGN_EXTEND(SDNode *N);
 397     SDValue visitZERO_EXTEND(SDNode *N);
 398     SDValue visitANY_EXTEND(SDNode *N);
 399     SDValue visitAssertExt(SDNode *N);
 400     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 401     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 402     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 403     SDValue visitTRUNCATE(SDNode *N);
 404     SDValue visitBITCAST(SDNode *N);
 405     SDValue visitBUILD_PAIR(SDNode *N);
 406     SDValue visitFADD(SDNode *N);
 407     SDValue visitFSUB(SDNode *N);
 408     SDValue visitFMUL(SDNode *N);
 409     SDValue visitFMA(SDNode *N);
 410     SDValue visitFDIV(SDNode *N);
 411     SDValue visitFREM(SDNode *N);
 412     SDValue visitFSQRT(SDNode *N);
 413     SDValue visitFCOPYSIGN(SDNode *N);
 414     SDValue visitFPOW(SDNode *N);
 415     SDValue visitSINT_TO_FP(SDNode *N);
 416     SDValue visitUINT_TO_FP(SDNode *N);
 417     SDValue visitFP_TO_SINT(SDNode *N);
 418     SDValue visitFP_TO_UINT(SDNode *N);
 419     SDValue visitFP_ROUND(SDNode *N);
 420     SDValue visitFP_ROUND_INREG(SDNode *N);
 421     SDValue visitFP_EXTEND(SDNode *N);
 422     SDValue visitFNEG(SDNode *N);
 423     SDValue visitFABS(SDNode *N);
 424     SDValue visitFCEIL(SDNode *N);
 425     SDValue visitFTRUNC(SDNode *N);
 426     SDValue visitFFLOOR(SDNode *N);
 427     SDValue visitFMINNUM(SDNode *N);
 428     SDValue visitFMAXNUM(SDNode *N);
 429     SDValue visitFMINIMUM(SDNode *N);
 430     SDValue visitFMAXIMUM(SDNode *N);
 431     SDValue visitBRCOND(SDNode *N);
 432     SDValue visitBR_CC(SDNode *N);
 433     SDValue visitLOAD(SDNode *N);
 434
 435     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 436     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 437
 438     SDValue visitSTORE(SDNode *N);
 439     SDValue visitLIFETIME_END(SDNode *N);
 440     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 441     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 442     SDValue visitBUILD_VECTOR(SDNode *N);
 443     SDValue visitCONCAT_VECTORS(SDNode *N);
 444     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 445     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 446     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 447     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 448     SDValue visitMLOAD(SDNode *N);
 449     SDValue visitMSTORE(SDNode *N);
 450     SDValue visitMGATHER(SDNode *N);
 451     SDValue visitMSCATTER(SDNode *N);
 452     SDValue visitFP_TO_FP16(SDNode *N);
 453     SDValue visitFP16_TO_FP(SDNode *N);
 454     SDValue visitVECREDUCE(SDNode *N);
 455
 456     SDValue visitFADDForFMACombine(SDNode *N);
 457     SDValue visitFSUBForFMACombine(SDNode *N);
 458     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 459
 460     SDValue XformToShuffleWithZero(SDNode *N);
 461     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 462                                       SDValue N1);
 463     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 464                            SDValue N1, SDNodeFlags Flags);
 465
 466     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 467
 468     SDValue foldSelectOfConstants(SDNode *N);
 469     SDValue foldVSelectOfConstants(SDNode *N);
 470     SDValue foldBinOpIntoSelect(SDNode *BO);
 471     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 472     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 473     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 474     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 475                              SDValue N2, SDValue N3, ISD::CondCode CC,
 476                              bool NotExtCompare = false);
 477     SDValue convertSelectOfFPConstantsToLoadOffset(
 478         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 479         ISD::CondCode CC);
 480     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 481                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 482     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 483                               const SDLoc &DL);
 484     SDValue unfoldMaskedMerge(SDNode *N);
 485     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 486     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 487                           const SDLoc &DL, bool foldBooleans);
 488     SDValue rebuildSetCC(SDValue N);
 489
 490     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 491                            SDValue &CC) const;
 492     bool isOneUseSetCC(SDValue N) const;
 493
 494     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 495                                          unsigned HiOp);
 496     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 497     SDValue CombineExtLoad(SDNode *N);
 498     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 499     SDValue combineRepeatedFPDivisors(SDNode *N);
 500     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 501     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 502     SDValue BuildSDIV(SDNode *N);
 503     SDValue BuildSDIVPow2(SDNode *N);
 504     SDValue BuildUDIV(SDNode *N);
 505     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 506     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
 507     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 508     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 509     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 510     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 511                                 SDNodeFlags Flags, bool Reciprocal);
 512     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 513                                 SDNodeFlags Flags, bool Reciprocal);
 514     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 515                                bool DemandHighBits = true);
 516     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 517     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 518                               SDValue InnerPos, SDValue InnerNeg,
 519                               unsigned PosOpcode, unsigned NegOpcode,
 520                               const SDLoc &DL);
 521     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 522     SDValue MatchLoadCombine(SDNode *N);
 523     SDValue ReduceLoadWidth(SDNode *N);
 524     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 525     SDValue splitMergedValStore(StoreSDNode *ST);
 526     SDValue TransformFPLoadStorePair(SDNode *N);
 527     SDValue convertBuildVecZextToZext(SDNode *N);
 528     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 529     SDValue reduceBuildVecToShuffle(SDNode *N);
 530     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 531                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 532                                   SDValue VecIn2, unsigned LeftIdx,
 533                                   bool DidSplitVec);
 534     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 535
 536     /// Walk up chain skipping non-aliasing memory nodes,
 537     /// looking for aliasing nodes and adding them to the Aliases vector.
 538     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 539                           SmallVectorImpl<SDValue> &Aliases);
 540
 541     /// Return true if there is any possibility that the two addresses overlap.
 542     bool isAlias(SDNode *Op0, SDNode *Op1) const;
 543
 544     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 545     /// chain (aliasing node.)
 546     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 547
 548     /// Try to replace a store and any possibly adjacent stores on
 549     /// consecutive chains with better chains. Return true only if St is
 550     /// replaced.
 551     ///
 552     /// Notice that other chains may still be replaced even if the function
 553     /// returns false.
 554     bool findBetterNeighborChains(StoreSDNode *St);
 555
 556     // Helper for findBetterNeighborChains. Walk up store chain add additional
 557     // chained stores that do not overlap and can be parallelized.
 558     bool parallelizeChainedStores(StoreSDNode *St);
 559
 560     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 561     /// is located in a sequence of memory operations connected by a chain.
 562     struct MemOpLink {
 563       // Ptr to the mem node.
 564       LSBaseSDNode *MemNode;
 565
 566       // Offset from the base ptr.
 567       int64_t OffsetFromBase;
 568
 569       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 570           : MemNode(N), OffsetFromBase(Offset) {}
 571     };
 572
 573     /// This is a helper function for visitMUL to check the profitability
 574     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 575     /// MulNode is the original multiply, AddNode is (add x, c1),
 576     /// and ConstNode is c2.
 577     bool isMulAddWithConstProfitable(SDNode *MulNode,
 578                                      SDValue &AddNode,
 579                                      SDValue &ConstNode);
 580
 581     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 582     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 583     /// the type of the loaded value to be extended.
 584     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 585                           EVT LoadResultTy, EVT &ExtVT);
 586
 587     /// Helper function to calculate whether the given Load/Store can have its
 588     /// width reduced to ExtVT.
 589     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 590                            EVT &MemVT, unsigned ShAmt = 0);
 591
 592     /// Used by BackwardsPropagateMask to find suitable loads.
 593     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 594                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 595                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 596     /// Attempt to propagate a given AND node back to load leaves so that they
 597     /// can be combined into narrow loads.
 598     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 599
 600     /// Helper function for MergeConsecutiveStores which merges the
 601     /// component store chains.
 602     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 603                                 unsigned NumStores);
 604
 605     /// This is a helper function for MergeConsecutiveStores. When the
 606     /// source elements of the consecutive stores are all constants or
 607     /// all extracted vector elements, try to merge them into one
 608     /// larger store introducing bitcasts if necessary.  \return True
 609     /// if a merged store was created.
 610     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 611                                          EVT MemVT, unsigned NumStores,
 612                                          bool IsConstantSrc, bool UseVector,
 613                                          bool UseTrunc);
 614
 615     /// This is a helper function for MergeConsecutiveStores. Stores
 616     /// that potentially may be merged with St are placed in
 617     /// StoreNodes. RootNode is a chain predecessor to all store
 618     /// candidates.
 619     void getStoreMergeCandidates(StoreSDNode *St,
 620                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 621                                  SDNode *&Root);
 622
 623     /// Helper function for MergeConsecutiveStores. Checks if
 624     /// candidate stores have indirect dependency through their
 625     /// operands. RootNode is the predecessor to all stores calculated
 626     /// by getStoreMergeCandidates and is used to prune the dependency check.
 627     /// \return True if safe to merge.
 628     bool checkMergeStoreCandidatesForDependencies(
 629         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 630         SDNode *RootNode);
 631
 632     /// Merge consecutive store operations into a wide store.
 633     /// This optimization uses wide integers or vectors when possible.
 634     /// \return number of stores that were merged into a merged store (the
 635     /// affected nodes are stored as a prefix in \p StoreNodes).
 636     bool MergeConsecutiveStores(StoreSDNode *St);
 637
 638     /// Try to transform a truncation where C is a constant:
 639     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 640     ///
 641     /// \p N needs to be a truncation and its first operand an AND. Other
 642     /// requirements are checked by the function (e.g. that trunc is
 643     /// single-use) and if missed an empty SDValue is returned.
 644     SDValue distributeTruncateThroughAnd(SDNode *N);
 645
 646     /// Helper function to determine whether the target supports operation
 647     /// given by \p Opcode for type \p VT, that is, whether the operation
 648     /// is legal or custom before legalizing operations, and whether is
 649     /// legal (but not custom) after legalization.
 650     bool hasOperation(unsigned Opcode, EVT VT) {
 651       if (LegalOperations)
 652         return TLI.isOperationLegal(Opcode, VT);
 653       return TLI.isOperationLegalOrCustom(Opcode, VT);
 654     }
 655
 656   public:
 657     /// Runs the dag combiner on all nodes in the work list
 658     void Run(CombineLevel AtLevel);
 659
 660     SelectionDAG &getDAG() const { return DAG; }
 661
 662     /// Returns a type large enough to hold any valid shift amount - before type
 663     /// legalization these can be huge.
 664     EVT getShiftAmountTy(EVT LHSTy) {
 665       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 666       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 667     }
 668
 669     /// This method returns true if we are running before type legalization or
 670     /// if the specified VT is legal.
 671     bool isTypeLegal(const EVT &VT) {
 672       if (!LegalTypes) return true;
 673       return TLI.isTypeLegal(VT);
 674     }
 675
 676     /// Convenience wrapper around TargetLowering::getSetCCResultType
 677     EVT getSetCCResultType(EVT VT) const {
 678       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 679     }
 680
 681     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 682                          SDValue OrigLoad, SDValue ExtLoad,
 683                          ISD::NodeType ExtType);
 684   };
 685
 686 /// This class is a DAGUpdateListener that removes any deleted
 687 /// nodes from the worklist.
 688 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 689   DAGCombiner &DC;
 690
 691 public:
 692   explicit WorklistRemover(DAGCombiner &dc)
 693     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 694
 695   void NodeDeleted(SDNode *N, SDNode *E) override {
 696     DC.removeFromWorklist(N);
 697   }
 698 };
 699
 700 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 701   DAGCombiner &DC;
 702
 703 public:
 704   explicit WorklistInserter(DAGCombiner &dc)
 705       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 706
 707   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 708   //        compile time costs in large DAGs, e.g. Halide.
 709   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 710 };
 711
 712 } // end anonymous namespace
 713
 714 //===----------------------------------------------------------------------===//
 715 //  TargetLowering::DAGCombinerInfo implementation
 716 //===----------------------------------------------------------------------===//
 717
 718 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 719   ((DAGCombiner*)DC)->AddToWorklist(N);
 720 }
 721
 722 SDValue TargetLowering::DAGCombinerInfo::
 723 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 724   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 725 }
 726
 727 SDValue TargetLowering::DAGCombinerInfo::
 728 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 729   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 730 }
 731
 732 SDValue TargetLowering::DAGCombinerInfo::
 733 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 734   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 735 }
 736
 737 void TargetLowering::DAGCombinerInfo::
 738 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 739   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 740 }
 741
 742 //===----------------------------------------------------------------------===//
 743 // Helper Functions
 744 //===----------------------------------------------------------------------===//
 745
 746 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 747   removeFromWorklist(N);
 748
 749   // If the operands of this node are only used by the node, they will now be
 750   // dead. Make sure to re-visit them and recursively delete dead nodes.
 751   for (const SDValue &Op : N->ops())
 752     // For an operand generating multiple values, one of the values may
 753     // become dead allowing further simplification (e.g. split index
 754     // arithmetic from an indexed load).
 755     if (Op->hasOneUse() || Op->getNumValues() > 1)
 756       AddToWorklist(Op.getNode());
 757
 758   DAG.DeleteNode(N);
 759 }
 760
 761 /// Return 1 if we can compute the negated form of the specified expression for
 762 /// the same cost as the expression itself, or 2 if we can compute the negated
 763 /// form more cheaply than the expression itself.
 764 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 765                                const TargetLowering &TLI,
 766                                const TargetOptions *Options,
 767                                bool ForCodeSize,
 768                                unsigned Depth = 0) {
 769   // fneg is removable even if it has multiple uses.
 770   if (Op.getOpcode() == ISD::FNEG) return 2;
 771
 772   // Don't allow anything with multiple uses unless we know it is free.
 773   EVT VT = Op.getValueType();
 774   const SDNodeFlags Flags = Op->getFlags();
 775   if (!Op.hasOneUse())
 776     if (!(Op.getOpcode() == ISD::FP_EXTEND &&
 777           TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
 778       return 0;
 779
 780   // Don't recurse exponentially.
 781   if (Depth > 6) return 0;
 782
 783   switch (Op.getOpcode()) {
 784   default: return false;
 785   case ISD::ConstantFP: {
 786     if (!LegalOperations)
 787       return 1;
 788
 789     // Don't invert constant FP values after legalization unless the target says
 790     // the negated constant is legal.
 791     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
 792       TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
 793                        ForCodeSize);
 794   }
 795   case ISD::FADD:
 796     if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
 797       return 0;
 798
 799     // After operation legalization, it might not be legal to create new FSUBs.
 800     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
 801       return 0;
 802
 803     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 804     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 805                                     Options, ForCodeSize, Depth + 1))
 806       return V;
 807     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 808     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 809                               ForCodeSize, Depth + 1);
 810   case ISD::FSUB:
 811     // We can't turn -(A-B) into B-A when we honor signed zeros.
 812     if (!Options->NoSignedZerosFPMath &&
 813         !Flags.hasNoSignedZeros())
 814       return 0;
 815
 816     // fold (fneg (fsub A, B)) -> (fsub B, A)
 817     return 1;
 818
 819   case ISD::FMUL:
 820   case ISD::FDIV:
 821     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
 822     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 823                                     Options, ForCodeSize, Depth + 1))
 824       return V;
 825
 826     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 827                               ForCodeSize, Depth + 1);
 828
 829   case ISD::FP_EXTEND:
 830   case ISD::FP_ROUND:
 831   case ISD::FSIN:
 832     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
 833                               ForCodeSize, Depth + 1);
 834   }
 835 }
 836
 837 /// If isNegatibleForFree returns true, return the newly negated expression.
 838 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 839                                     bool LegalOperations, bool ForCodeSize,
 840                                     unsigned Depth = 0) {
 841   const TargetOptions &Options = DAG.getTarget().Options;
 842   // fneg is removable even if it has multiple uses.
 843   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
 844
 845   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 846
 847   const SDNodeFlags Flags = Op.getNode()->getFlags();
 848
 849   switch (Op.getOpcode()) {
 850   default: llvm_unreachable("Unknown code");
 851   case ISD::ConstantFP: {
 852     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
 853     V.changeSign();
 854     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
 855   }
 856   case ISD::FADD:
 857     assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
 858
 859     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 860     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 861                            DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
 862                            Depth+1))
 863       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 864                          GetNegatedExpression(Op.getOperand(0), DAG,
 865                                               LegalOperations, ForCodeSize,
 866                                               Depth+1),
 867                          Op.getOperand(1), Flags);
 868     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 869     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 870                        GetNegatedExpression(Op.getOperand(1), DAG,
 871                                             LegalOperations, ForCodeSize,
 872                                             Depth+1),
 873                        Op.getOperand(0), Flags);
 874   case ISD::FSUB:
 875     // fold (fneg (fsub 0, B)) -> B
 876     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
 877       if (N0CFP->isZero())
 878         return Op.getOperand(1);
 879
 880     // fold (fneg (fsub A, B)) -> (fsub B, A)
 881     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 882                        Op.getOperand(1), Op.getOperand(0), Flags);
 883
 884   case ISD::FMUL:
 885   case ISD::FDIV:
 886     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
 887     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 888                            DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
 889                            Depth+1))
 890       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 891                          GetNegatedExpression(Op.getOperand(0), DAG,
 892                                               LegalOperations, ForCodeSize,
 893                                               Depth+1),
 894                          Op.getOperand(1), Flags);
 895
 896     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
 897     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 898                        Op.getOperand(0),
 899                        GetNegatedExpression(Op.getOperand(1), DAG,
 900                                             LegalOperations, ForCodeSize,
 901                                             Depth+1), Flags);
 902
 903   case ISD::FP_EXTEND:
 904   case ISD::FSIN:
 905     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 906                        GetNegatedExpression(Op.getOperand(0), DAG,
 907                                             LegalOperations, ForCodeSize,
 908                                             Depth+1));
 909   case ISD::FP_ROUND:
 910       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
 911                          GetNegatedExpression(Op.getOperand(0), DAG,
 912                                               LegalOperations, ForCodeSize,
 913                                               Depth+1),
 914                          Op.getOperand(1));
 915   }
 916 }
 917
 918 // APInts must be the same size for most operations, this helper
 919 // function zero extends the shorter of the pair so that they match.
 920 // We provide an Offset so that we can create bitwidths that won't overflow.
 921 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 922   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 923   LHS = LHS.zextOrSelf(Bits);
 924   RHS = RHS.zextOrSelf(Bits);
 925 }
 926
 927 // Return true if this node is a setcc, or is a select_cc
 928 // that selects between the target values used for true and false, making it
 929 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 930 // the appropriate nodes based on the type of node we are checking. This
 931 // simplifies life a bit for the callers.
 932 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 933                                     SDValue &CC) const {
 934   if (N.getOpcode() == ISD::SETCC) {
 935     LHS = N.getOperand(0);
 936     RHS = N.getOperand(1);
 937     CC  = N.getOperand(2);
 938     return true;
 939   }
 940
 941   if (N.getOpcode() != ISD::SELECT_CC ||
 942       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 943       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 944     return false;
 945
 946   if (TLI.getBooleanContents(N.getValueType()) ==
 947       TargetLowering::UndefinedBooleanContent)
 948     return false;
 949
 950   LHS = N.getOperand(0);
 951   RHS = N.getOperand(1);
 952   CC  = N.getOperand(4);
 953   return true;
 954 }
 955
 956 /// Return true if this is a SetCC-equivalent operation with only one use.
 957 /// If this is true, it allows the users to invert the operation for free when
 958 /// it is profitable to do so.
 959 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 960   SDValue N0, N1, N2;
 961   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 962     return true;
 963   return false;
 964 }
 965
 966 // Returns the SDNode if it is a constant float BuildVector
 967 // or constant float.
 968 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 969   if (isa<ConstantFPSDNode>(N))
 970     return N.getNode();
 971   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 972     return N.getNode();
 973   return nullptr;
 974 }
 975
 976 // Determines if it is a constant integer or a build vector of constant
 977 // integers (and undefs).
 978 // Do not permit build vector implicit truncation.
 979 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 980   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 981     return !(Const->isOpaque() && NoOpaques);
 982   if (N.getOpcode() != ISD::BUILD_VECTOR)
 983     return false;
 984   unsigned BitWidth = N.getScalarValueSizeInBits();
 985   for (const SDValue &Op : N->op_values()) {
 986     if (Op.isUndef())
 987       continue;
 988     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 989     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 990         (Const->isOpaque() && NoOpaques))
 991       return false;
 992   }
 993   return true;
 994 }
 995
 996 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 997 // undef's.
 998 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 999   if (V.getOpcode() != ISD::BUILD_VECTOR)
1000     return false;
1001   return isConstantOrConstantVector(V, NoOpaques) ||
1002          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
1003 }
1004
1005 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1006 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1007 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1008                                                SDValue N0, SDValue N1) {
1009   EVT VT = N0.getValueType();
1010
1011   if (N0.getOpcode() != Opc)
1012     return SDValue();
1013
1014   // Don't reassociate reductions.
1015   if (N0->getFlags().hasVectorReduction())
1016     return SDValue();
1017
1018   if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1019     if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1020       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1021       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1022         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1023       return SDValue();
1024     }
1025     if (N0.hasOneUse()) {
1026       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1027       //              iff (op x, c1) has one use
1028       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1029       if (!OpNode.getNode())
1030         return SDValue();
1031       AddToWorklist(OpNode.getNode());
1032       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1033     }
1034   }
1035   return SDValue();
1036 }
1037
1038 // Try to reassociate commutative binops.
1039 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1040                                     SDValue N1, SDNodeFlags Flags) {
1041   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1042   // Don't reassociate reductions.
1043   if (Flags.hasVectorReduction())
1044     return SDValue();
1045   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1046     return Combined;
1047   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1048     return Combined;
1049   return SDValue();
1050 }
1051
1052 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1053                                bool AddTo) {
1054   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1055   ++NodesCombined;
1056   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1057              To[0].getNode()->dump(&DAG);
1058              dbgs() << " and " << NumTo - 1 << " other values\n");
1059   for (unsigned i = 0, e = NumTo; i != e; ++i)
1060     assert((!To[i].getNode() ||
1061             N->getValueType(i) == To[i].getValueType()) &&
1062            "Cannot combine value to value of different type!");
1063
1064   WorklistRemover DeadNodes(*this);
1065   DAG.ReplaceAllUsesWith(N, To);
1066   if (AddTo) {
1067     // Push the new nodes and any users onto the worklist
1068     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1069       if (To[i].getNode()) {
1070         AddToWorklist(To[i].getNode());
1071         AddUsersToWorklist(To[i].getNode());
1072       }
1073     }
1074   }
1075
1076   // Finally, if the node is now dead, remove it from the graph.  The node
1077   // may not be dead if the replacement process recursively simplified to
1078   // something else needing this node.
1079   if (N->use_empty())
1080     deleteAndRecombine(N);
1081   return SDValue(N, 0);
1082 }
1083
1084 void DAGCombiner::
1085 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1086   // Replace all uses.  If any nodes become isomorphic to other nodes and
1087   // are deleted, make sure to remove them from our worklist.
1088   WorklistRemover DeadNodes(*this);
1089   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1090
1091   // Push the new node and any (possibly new) users onto the worklist.
1092   AddToWorklist(TLO.New.getNode());
1093   AddUsersToWorklist(TLO.New.getNode());
1094
1095   // Finally, if the node is now dead, remove it from the graph.  The node
1096   // may not be dead if the replacement process recursively simplified to
1097   // something else needing this node.
1098   if (TLO.Old.getNode()->use_empty())
1099     deleteAndRecombine(TLO.Old.getNode());
1100 }
1101
1102 /// Check the specified integer node value to see if it can be simplified or if
1103 /// things it uses can be simplified by bit propagation. If so, return true.
1104 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1105                                        const APInt &DemandedElts) {
1106   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1107   KnownBits Known;
1108   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1109     return false;
1110
1111   // Revisit the node.
1112   AddToWorklist(Op.getNode());
1113
1114   // Replace the old value with the new one.
1115   ++NodesCombined;
1116   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1117              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1118              dbgs() << '\n');
1119
1120   CommitTargetLoweringOpt(TLO);
1121   return true;
1122 }
1123
1124 /// Check the specified vector node value to see if it can be simplified or
1125 /// if things it uses can be simplified as it only uses some of the elements.
1126 /// If so, return true.
1127 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1128                                              const APInt &DemandedElts,
1129                                              bool AssumeSingleUse) {
1130   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1131   APInt KnownUndef, KnownZero;
1132   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1133                                       TLO, 0, AssumeSingleUse))
1134     return false;
1135
1136   // Revisit the node.
1137   AddToWorklist(Op.getNode());
1138
1139   // Replace the old value with the new one.
1140   ++NodesCombined;
1141   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1142              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1143              dbgs() << '\n');
1144
1145   CommitTargetLoweringOpt(TLO);
1146   return true;
1147 }
1148
1149 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1150   SDLoc DL(Load);
1151   EVT VT = Load->getValueType(0);
1152   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1153
1154   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1155              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1156   WorklistRemover DeadNodes(*this);
1157   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1158   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1159   deleteAndRecombine(Load);
1160   AddToWorklist(Trunc.getNode());
1161 }
1162
1163 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1164   Replace = false;
1165   SDLoc DL(Op);
1166   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1167     LoadSDNode *LD = cast<LoadSDNode>(Op);
1168     EVT MemVT = LD->getMemoryVT();
1169     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1170                                                       : LD->getExtensionType();
1171     Replace = true;
1172     return DAG.getExtLoad(ExtType, DL, PVT,
1173                           LD->getChain(), LD->getBasePtr(),
1174                           MemVT, LD->getMemOperand());
1175   }
1176
1177   unsigned Opc = Op.getOpcode();
1178   switch (Opc) {
1179   default: break;
1180   case ISD::AssertSext:
1181     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1182       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1183     break;
1184   case ISD::AssertZext:
1185     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1186       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1187     break;
1188   case ISD::Constant: {
1189     unsigned ExtOpc =
1190       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1191     return DAG.getNode(ExtOpc, DL, PVT, Op);
1192   }
1193   }
1194
1195   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1196     return SDValue();
1197   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1198 }
1199
1200 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1201   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1202     return SDValue();
1203   EVT OldVT = Op.getValueType();
1204   SDLoc DL(Op);
1205   bool Replace = false;
1206   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1207   if (!NewOp.getNode())
1208     return SDValue();
1209   AddToWorklist(NewOp.getNode());
1210
1211   if (Replace)
1212     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1213   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1214                      DAG.getValueType(OldVT));
1215 }
1216
1217 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1218   EVT OldVT = Op.getValueType();
1219   SDLoc DL(Op);
1220   bool Replace = false;
1221   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1222   if (!NewOp.getNode())
1223     return SDValue();
1224   AddToWorklist(NewOp.getNode());
1225
1226   if (Replace)
1227     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1228   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1229 }
1230
1231 /// Promote the specified integer binary operation if the target indicates it is
1232 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1233 /// i32 since i16 instructions are longer.
1234 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1235   if (!LegalOperations)
1236     return SDValue();
1237
1238   EVT VT = Op.getValueType();
1239   if (VT.isVector() || !VT.isInteger())
1240     return SDValue();
1241
1242   // If operation type is 'undesirable', e.g. i16 on x86, consider
1243   // promoting it.
1244   unsigned Opc = Op.getOpcode();
1245   if (TLI.isTypeDesirableForOp(Opc, VT))
1246     return SDValue();
1247
1248   EVT PVT = VT;
1249   // Consult target whether it is a good idea to promote this operation and
1250   // what's the right type to promote it to.
1251   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1252     assert(PVT != VT && "Don't know what type to promote to!");
1253
1254     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1255
1256     bool Replace0 = false;
1257     SDValue N0 = Op.getOperand(0);
1258     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1259
1260     bool Replace1 = false;
1261     SDValue N1 = Op.getOperand(1);
1262     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1263     SDLoc DL(Op);
1264
1265     SDValue RV =
1266         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1267
1268     // We are always replacing N0/N1's use in N and only need
1269     // additional replacements if there are additional uses.
1270     Replace0 &= !N0->hasOneUse();
1271     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1272
1273     // Combine Op here so it is preserved past replacements.
1274     CombineTo(Op.getNode(), RV);
1275
1276     // If operands have a use ordering, make sure we deal with
1277     // predecessor first.
1278     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1279       std::swap(N0, N1);
1280       std::swap(NN0, NN1);
1281     }
1282
1283     if (Replace0) {
1284       AddToWorklist(NN0.getNode());
1285       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1286     }
1287     if (Replace1) {
1288       AddToWorklist(NN1.getNode());
1289       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1290     }
1291     return Op;
1292   }
1293   return SDValue();
1294 }
1295
1296 /// Promote the specified integer shift operation if the target indicates it is
1297 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1298 /// i32 since i16 instructions are longer.
1299 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1300   if (!LegalOperations)
1301     return SDValue();
1302
1303   EVT VT = Op.getValueType();
1304   if (VT.isVector() || !VT.isInteger())
1305     return SDValue();
1306
1307   // If operation type is 'undesirable', e.g. i16 on x86, consider
1308   // promoting it.
1309   unsigned Opc = Op.getOpcode();
1310   if (TLI.isTypeDesirableForOp(Opc, VT))
1311     return SDValue();
1312
1313   EVT PVT = VT;
1314   // Consult target whether it is a good idea to promote this operation and
1315   // what's the right type to promote it to.
1316   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1317     assert(PVT != VT && "Don't know what type to promote to!");
1318
1319     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1320
1321     bool Replace = false;
1322     SDValue N0 = Op.getOperand(0);
1323     SDValue N1 = Op.getOperand(1);
1324     if (Opc == ISD::SRA)
1325       N0 = SExtPromoteOperand(N0, PVT);
1326     else if (Opc == ISD::SRL)
1327       N0 = ZExtPromoteOperand(N0, PVT);
1328     else
1329       N0 = PromoteOperand(N0, PVT, Replace);
1330
1331     if (!N0.getNode())
1332       return SDValue();
1333
1334     SDLoc DL(Op);
1335     SDValue RV =
1336         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1337
1338     AddToWorklist(N0.getNode());
1339     if (Replace)
1340       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1341
1342     // Deal with Op being deleted.
1343     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1344       return RV;
1345   }
1346   return SDValue();
1347 }
1348
1349 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1350   if (!LegalOperations)
1351     return SDValue();
1352
1353   EVT VT = Op.getValueType();
1354   if (VT.isVector() || !VT.isInteger())
1355     return SDValue();
1356
1357   // If operation type is 'undesirable', e.g. i16 on x86, consider
1358   // promoting it.
1359   unsigned Opc = Op.getOpcode();
1360   if (TLI.isTypeDesirableForOp(Opc, VT))
1361     return SDValue();
1362
1363   EVT PVT = VT;
1364   // Consult target whether it is a good idea to promote this operation and
1365   // what's the right type to promote it to.
1366   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1367     assert(PVT != VT && "Don't know what type to promote to!");
1368     // fold (aext (aext x)) -> (aext x)
1369     // fold (aext (zext x)) -> (zext x)
1370     // fold (aext (sext x)) -> (sext x)
1371     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1372     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1373   }
1374   return SDValue();
1375 }
1376
1377 bool DAGCombiner::PromoteLoad(SDValue Op) {
1378   if (!LegalOperations)
1379     return false;
1380
1381   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1382     return false;
1383
1384   EVT VT = Op.getValueType();
1385   if (VT.isVector() || !VT.isInteger())
1386     return false;
1387
1388   // If operation type is 'undesirable', e.g. i16 on x86, consider
1389   // promoting it.
1390   unsigned Opc = Op.getOpcode();
1391   if (TLI.isTypeDesirableForOp(Opc, VT))
1392     return false;
1393
1394   EVT PVT = VT;
1395   // Consult target whether it is a good idea to promote this operation and
1396   // what's the right type to promote it to.
1397   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1398     assert(PVT != VT && "Don't know what type to promote to!");
1399
1400     SDLoc DL(Op);
1401     SDNode *N = Op.getNode();
1402     LoadSDNode *LD = cast<LoadSDNode>(N);
1403     EVT MemVT = LD->getMemoryVT();
1404     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1405                                                       : LD->getExtensionType();
1406     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1407                                    LD->getChain(), LD->getBasePtr(),
1408                                    MemVT, LD->getMemOperand());
1409     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1410
1411     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1412                Result.getNode()->dump(&DAG); dbgs() << '\n');
1413     WorklistRemover DeadNodes(*this);
1414     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1415     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1416     deleteAndRecombine(N);
1417     AddToWorklist(Result.getNode());
1418     return true;
1419   }
1420   return false;
1421 }
1422
1423 /// Recursively delete a node which has no uses and any operands for
1424 /// which it is the only use.
1425 ///
1426 /// Note that this both deletes the nodes and removes them from the worklist.
1427 /// It also adds any nodes who have had a user deleted to the worklist as they
1428 /// may now have only one use and subject to other combines.
1429 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1430   if (!N->use_empty())
1431     return false;
1432
1433   SmallSetVector<SDNode *, 16> Nodes;
1434   Nodes.insert(N);
1435   do {
1436     N = Nodes.pop_back_val();
1437     if (!N)
1438       continue;
1439
1440     if (N->use_empty()) {
1441       for (const SDValue &ChildN : N->op_values())
1442         Nodes.insert(ChildN.getNode());
1443
1444       removeFromWorklist(N);
1445       DAG.DeleteNode(N);
1446     } else {
1447       AddToWorklist(N);
1448     }
1449   } while (!Nodes.empty());
1450   return true;
1451 }
1452
1453 //===----------------------------------------------------------------------===//
1454 //  Main DAG Combiner implementation
1455 //===----------------------------------------------------------------------===//
1456
1457 void DAGCombiner::Run(CombineLevel AtLevel) {
1458   // set the instance variables, so that the various visit routines may use it.
1459   Level = AtLevel;
1460   LegalOperations = Level >= AfterLegalizeVectorOps;
1461   LegalTypes = Level >= AfterLegalizeTypes;
1462
1463   WorklistInserter AddNodes(*this);
1464
1465   // Add all the dag nodes to the worklist.
1466   for (SDNode &Node : DAG.allnodes())
1467     AddToWorklist(&Node);
1468
1469   // Create a dummy node (which is not added to allnodes), that adds a reference
1470   // to the root node, preventing it from being deleted, and tracking any
1471   // changes of the root.
1472   HandleSDNode Dummy(DAG.getRoot());
1473
1474   // While we have a valid worklist entry node, try to combine it.
1475   while (SDNode *N = getNextWorklistEntry()) {
1476     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1477     // N is deleted from the DAG, since they too may now be dead or may have a
1478     // reduced number of uses, allowing other xforms.
1479     if (recursivelyDeleteUnusedNodes(N))
1480       continue;
1481
1482     WorklistRemover DeadNodes(*this);
1483
1484     // If this combine is running after legalizing the DAG, re-legalize any
1485     // nodes pulled off the worklist.
1486     if (Level == AfterLegalizeDAG) {
1487       SmallSetVector<SDNode *, 16> UpdatedNodes;
1488       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1489
1490       for (SDNode *LN : UpdatedNodes) {
1491         AddToWorklist(LN);
1492         AddUsersToWorklist(LN);
1493       }
1494       if (!NIsValid)
1495         continue;
1496     }
1497
1498     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1499
1500     // Add any operands of the new node which have not yet been combined to the
1501     // worklist as well. Because the worklist uniques things already, this
1502     // won't repeatedly process the same operand.
1503     CombinedNodes.insert(N);
1504     for (const SDValue &ChildN : N->op_values())
1505       if (!CombinedNodes.count(ChildN.getNode()))
1506         AddToWorklist(ChildN.getNode());
1507
1508     SDValue RV = combine(N);
1509
1510     if (!RV.getNode())
1511       continue;
1512
1513     ++NodesCombined;
1514
1515     // If we get back the same node we passed in, rather than a new node or
1516     // zero, we know that the node must have defined multiple values and
1517     // CombineTo was used.  Since CombineTo takes care of the worklist
1518     // mechanics for us, we have no work to do in this case.
1519     if (RV.getNode() == N)
1520       continue;
1521
1522     assert(N->getOpcode() != ISD::DELETED_NODE &&
1523            RV.getOpcode() != ISD::DELETED_NODE &&
1524            "Node was deleted but visit returned new node!");
1525
1526     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1527
1528     if (N->getNumValues() == RV.getNode()->getNumValues())
1529       DAG.ReplaceAllUsesWith(N, RV.getNode());
1530     else {
1531       assert(N->getValueType(0) == RV.getValueType() &&
1532              N->getNumValues() == 1 && "Type mismatch");
1533       DAG.ReplaceAllUsesWith(N, &RV);
1534     }
1535
1536     // Push the new node and any users onto the worklist
1537     AddToWorklist(RV.getNode());
1538     AddUsersToWorklist(RV.getNode());
1539
1540     // Finally, if the node is now dead, remove it from the graph.  The node
1541     // may not be dead if the replacement process recursively simplified to
1542     // something else needing this node. This will also take care of adding any
1543     // operands which have lost a user to the worklist.
1544     recursivelyDeleteUnusedNodes(N);
1545   }
1546
1547   // If the root changed (e.g. it was a dead load, update the root).
1548   DAG.setRoot(Dummy.getValue());
1549   DAG.RemoveDeadNodes();
1550 }
1551
1552 SDValue DAGCombiner::visit(SDNode *N) {
1553   switch (N->getOpcode()) {
1554   default: break;
1555   case ISD::TokenFactor:        return visitTokenFactor(N);
1556   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1557   case ISD::ADD:                return visitADD(N);
1558   case ISD::SUB:                return visitSUB(N);
1559   case ISD::SADDSAT:
1560   case ISD::UADDSAT:            return visitADDSAT(N);
1561   case ISD::SSUBSAT:
1562   case ISD::USUBSAT:            return visitSUBSAT(N);
1563   case ISD::ADDC:               return visitADDC(N);
1564   case ISD::SADDO:
1565   case ISD::UADDO:              return visitADDO(N);
1566   case ISD::SUBC:               return visitSUBC(N);
1567   case ISD::SSUBO:
1568   case ISD::USUBO:              return visitSUBO(N);
1569   case ISD::ADDE:               return visitADDE(N);
1570   case ISD::ADDCARRY:           return visitADDCARRY(N);
1571   case ISD::SUBE:               return visitSUBE(N);
1572   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1573   case ISD::MUL:                return visitMUL(N);
1574   case ISD::SDIV:               return visitSDIV(N);
1575   case ISD::UDIV:               return visitUDIV(N);
1576   case ISD::SREM:
1577   case ISD::UREM:               return visitREM(N);
1578   case ISD::MULHU:              return visitMULHU(N);
1579   case ISD::MULHS:              return visitMULHS(N);
1580   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1581   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1582   case ISD::SMULO:
1583   case ISD::UMULO:              return visitMULO(N);
1584   case ISD::SMIN:
1585   case ISD::SMAX:
1586   case ISD::UMIN:
1587   case ISD::UMAX:               return visitIMINMAX(N);
1588   case ISD::AND:                return visitAND(N);
1589   case ISD::OR:                 return visitOR(N);
1590   case ISD::XOR:                return visitXOR(N);
1591   case ISD::SHL:                return visitSHL(N);
1592   case ISD::SRA:                return visitSRA(N);
1593   case ISD::SRL:                return visitSRL(N);
1594   case ISD::ROTR:
1595   case ISD::ROTL:               return visitRotate(N);
1596   case ISD::FSHL:
1597   case ISD::FSHR:               return visitFunnelShift(N);
1598   case ISD::ABS:                return visitABS(N);
1599   case ISD::BSWAP:              return visitBSWAP(N);
1600   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1601   case ISD::CTLZ:               return visitCTLZ(N);
1602   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1603   case ISD::CTTZ:               return visitCTTZ(N);
1604   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1605   case ISD::CTPOP:              return visitCTPOP(N);
1606   case ISD::SELECT:             return visitSELECT(N);
1607   case ISD::VSELECT:            return visitVSELECT(N);
1608   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1609   case ISD::SETCC:              return visitSETCC(N);
1610   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1611   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1612   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1613   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1614   case ISD::AssertSext:
1615   case ISD::AssertZext:         return visitAssertExt(N);
1616   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1617   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1618   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1619   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1620   case ISD::BITCAST:            return visitBITCAST(N);
1621   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1622   case ISD::FADD:               return visitFADD(N);
1623   case ISD::FSUB:               return visitFSUB(N);
1624   case ISD::FMUL:               return visitFMUL(N);
1625   case ISD::FMA:                return visitFMA(N);
1626   case ISD::FDIV:               return visitFDIV(N);
1627   case ISD::FREM:               return visitFREM(N);
1628   case ISD::FSQRT:              return visitFSQRT(N);
1629   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1630   case ISD::FPOW:               return visitFPOW(N);
1631   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1632   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1633   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1634   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1635   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1636   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1637   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1638   case ISD::FNEG:               return visitFNEG(N);
1639   case ISD::FABS:               return visitFABS(N);
1640   case ISD::FFLOOR:             return visitFFLOOR(N);
1641   case ISD::FMINNUM:            return visitFMINNUM(N);
1642   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1643   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1644   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1645   case ISD::FCEIL:              return visitFCEIL(N);
1646   case ISD::FTRUNC:             return visitFTRUNC(N);
1647   case ISD::BRCOND:             return visitBRCOND(N);
1648   case ISD::BR_CC:              return visitBR_CC(N);
1649   case ISD::LOAD:               return visitLOAD(N);
1650   case ISD::STORE:              return visitSTORE(N);
1651   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1652   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1653   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1654   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1655   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1656   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1657   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1658   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1659   case ISD::MGATHER:            return visitMGATHER(N);
1660   case ISD::MLOAD:              return visitMLOAD(N);
1661   case ISD::MSCATTER:           return visitMSCATTER(N);
1662   case ISD::MSTORE:             return visitMSTORE(N);
1663   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1664   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1665   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1666   case ISD::VECREDUCE_FADD:
1667   case ISD::VECREDUCE_FMUL:
1668   case ISD::VECREDUCE_ADD:
1669   case ISD::VECREDUCE_MUL:
1670   case ISD::VECREDUCE_AND:
1671   case ISD::VECREDUCE_OR:
1672   case ISD::VECREDUCE_XOR:
1673   case ISD::VECREDUCE_SMAX:
1674   case ISD::VECREDUCE_SMIN:
1675   case ISD::VECREDUCE_UMAX:
1676   case ISD::VECREDUCE_UMIN:
1677   case ISD::VECREDUCE_FMAX:
1678   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1679   }
1680   return SDValue();
1681 }
1682
1683 SDValue DAGCombiner::combine(SDNode *N) {
1684   SDValue RV = visit(N);
1685
1686   // If nothing happened, try a target-specific DAG combine.
1687   if (!RV.getNode()) {
1688     assert(N->getOpcode() != ISD::DELETED_NODE &&
1689            "Node was deleted but visit returned NULL!");
1690
1691     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1692         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1693
1694       // Expose the DAG combiner to the target combiner impls.
1695       TargetLowering::DAGCombinerInfo
1696         DagCombineInfo(DAG, Level, false, this);
1697
1698       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1699     }
1700   }
1701
1702   // If nothing happened still, try promoting the operation.
1703   if (!RV.getNode()) {
1704     switch (N->getOpcode()) {
1705     default: break;
1706     case ISD::ADD:
1707     case ISD::SUB:
1708     case ISD::MUL:
1709     case ISD::AND:
1710     case ISD::OR:
1711     case ISD::XOR:
1712       RV = PromoteIntBinOp(SDValue(N, 0));
1713       break;
1714     case ISD::SHL:
1715     case ISD::SRA:
1716     case ISD::SRL:
1717       RV = PromoteIntShiftOp(SDValue(N, 0));
1718       break;
1719     case ISD::SIGN_EXTEND:
1720     case ISD::ZERO_EXTEND:
1721     case ISD::ANY_EXTEND:
1722       RV = PromoteExtend(SDValue(N, 0));
1723       break;
1724     case ISD::LOAD:
1725       if (PromoteLoad(SDValue(N, 0)))
1726         RV = SDValue(N, 0);
1727       break;
1728     }
1729   }
1730
1731   // If N is a commutative binary node, try eliminate it if the commuted
1732   // version is already present in the DAG.
1733   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1734       N->getNumValues() == 1) {
1735     SDValue N0 = N->getOperand(0);
1736     SDValue N1 = N->getOperand(1);
1737
1738     // Constant operands are canonicalized to RHS.
1739     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1740       SDValue Ops[] = {N1, N0};
1741       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1742                                             N->getFlags());
1743       if (CSENode)
1744         return SDValue(CSENode, 0);
1745     }
1746   }
1747
1748   return RV;
1749 }
1750
1751 /// Given a node, return its input chain if it has one, otherwise return a null
1752 /// sd operand.
1753 static SDValue getInputChainForNode(SDNode *N) {
1754   if (unsigned NumOps = N->getNumOperands()) {
1755     if (N->getOperand(0).getValueType() == MVT::Other)
1756       return N->getOperand(0);
1757     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1758       return N->getOperand(NumOps-1);
1759     for (unsigned i = 1; i < NumOps-1; ++i)
1760       if (N->getOperand(i).getValueType() == MVT::Other)
1761         return N->getOperand(i);
1762   }
1763   return SDValue();
1764 }
1765
1766 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1767   // If N has two operands, where one has an input chain equal to the other,
1768   // the 'other' chain is redundant.
1769   if (N->getNumOperands() == 2) {
1770     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1771       return N->getOperand(0);
1772     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1773       return N->getOperand(1);
1774   }
1775
1776   // Don't simplify token factors if optnone.
1777   if (OptLevel == CodeGenOpt::None)
1778     return SDValue();
1779
1780   // If the sole user is a token factor, we should make sure we have a
1781   // chance to merge them together. This prevents TF chains from inhibiting
1782   // optimizations.
1783   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1784     AddToWorklist(*(N->use_begin()));
1785
1786   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1787   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1788   SmallPtrSet<SDNode*, 16> SeenOps;
1789   bool Changed = false;             // If we should replace this token factor.
1790
1791   // Start out with this token factor.
1792   TFs.push_back(N);
1793
1794   // Iterate through token factors.  The TFs grows when new token factors are
1795   // encountered. Limit number of nodes to inline, to avoid quadratic compile
1796   // times.
1797   for (unsigned i = 0; i < TFs.size() && Ops.size() <= 2048; ++i) {
1798     SDNode *TF = TFs[i];
1799
1800     // Check each of the operands.
1801     for (const SDValue &Op : TF->op_values()) {
1802       switch (Op.getOpcode()) {
1803       case ISD::EntryToken:
1804         // Entry tokens don't need to be added to the list. They are
1805         // redundant.
1806         Changed = true;
1807         break;
1808
1809       case ISD::TokenFactor:
1810         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1811           // Queue up for processing.
1812           TFs.push_back(Op.getNode());
1813           // Clean up in case the token factor is removed.
1814           AddToWorklist(Op.getNode());
1815           Changed = true;
1816           break;
1817         }
1818         LLVM_FALLTHROUGH;
1819
1820       default:
1821         // Only add if it isn't already in the list.
1822         if (SeenOps.insert(Op.getNode()).second)
1823           Ops.push_back(Op);
1824         else
1825           Changed = true;
1826         break;
1827       }
1828     }
1829   }
1830
1831   // Remove Nodes that are chained to another node in the list. Do so
1832   // by walking up chains breath-first stopping when we've seen
1833   // another operand. In general we must climb to the EntryNode, but we can exit
1834   // early if we find all remaining work is associated with just one operand as
1835   // no further pruning is possible.
1836
1837   // List of nodes to search through and original Ops from which they originate.
1838   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1839   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1840   SmallPtrSet<SDNode *, 16> SeenChains;
1841   bool DidPruneOps = false;
1842
1843   unsigned NumLeftToConsider = 0;
1844   for (const SDValue &Op : Ops) {
1845     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1846     OpWorkCount.push_back(1);
1847   }
1848
1849   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1850     // If this is an Op, we can remove the op from the list. Remark any
1851     // search associated with it as from the current OpNumber.
1852     if (SeenOps.count(Op) != 0) {
1853       Changed = true;
1854       DidPruneOps = true;
1855       unsigned OrigOpNumber = 0;
1856       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1857         OrigOpNumber++;
1858       assert((OrigOpNumber != Ops.size()) &&
1859              "expected to find TokenFactor Operand");
1860       // Re-mark worklist from OrigOpNumber to OpNumber
1861       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1862         if (Worklist[i].second == OrigOpNumber) {
1863           Worklist[i].second = OpNumber;
1864         }
1865       }
1866       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1867       OpWorkCount[OrigOpNumber] = 0;
1868       NumLeftToConsider--;
1869     }
1870     // Add if it's a new chain
1871     if (SeenChains.insert(Op).second) {
1872       OpWorkCount[OpNumber]++;
1873       Worklist.push_back(std::make_pair(Op, OpNumber));
1874     }
1875   };
1876
1877   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1878     // We need at least be consider at least 2 Ops to prune.
1879     if (NumLeftToConsider <= 1)
1880       break;
1881     auto CurNode = Worklist[i].first;
1882     auto CurOpNumber = Worklist[i].second;
1883     assert((OpWorkCount[CurOpNumber] > 0) &&
1884            "Node should not appear in worklist");
1885     switch (CurNode->getOpcode()) {
1886     case ISD::EntryToken:
1887       // Hitting EntryToken is the only way for the search to terminate without
1888       // hitting
1889       // another operand's search. Prevent us from marking this operand
1890       // considered.
1891       NumLeftToConsider++;
1892       break;
1893     case ISD::TokenFactor:
1894       for (const SDValue &Op : CurNode->op_values())
1895         AddToWorklist(i, Op.getNode(), CurOpNumber);
1896       break;
1897     case ISD::LIFETIME_START:
1898     case ISD::LIFETIME_END:
1899     case ISD::CopyFromReg:
1900     case ISD::CopyToReg:
1901       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1902       break;
1903     default:
1904       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1905         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1906       break;
1907     }
1908     OpWorkCount[CurOpNumber]--;
1909     if (OpWorkCount[CurOpNumber] == 0)
1910       NumLeftToConsider--;
1911   }
1912
1913   // If we've changed things around then replace token factor.
1914   if (Changed) {
1915     SDValue Result;
1916     if (Ops.empty()) {
1917       // The entry token is the only possible outcome.
1918       Result = DAG.getEntryNode();
1919     } else {
1920       if (DidPruneOps) {
1921         SmallVector<SDValue, 8> PrunedOps;
1922         //
1923         for (const SDValue &Op : Ops) {
1924           if (SeenChains.count(Op.getNode()) == 0)
1925             PrunedOps.push_back(Op);
1926         }
1927         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1928       } else {
1929         Result = DAG.getTokenFactor(SDLoc(N), Ops);
1930       }
1931     }
1932     return Result;
1933   }
1934   return SDValue();
1935 }
1936
1937 /// MERGE_VALUES can always be eliminated.
1938 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1939   WorklistRemover DeadNodes(*this);
1940   // Replacing results may cause a different MERGE_VALUES to suddenly
1941   // be CSE'd with N, and carry its uses with it. Iterate until no
1942   // uses remain, to ensure that the node can be safely deleted.
1943   // First add the users of this node to the work list so that they
1944   // can be tried again once they have new operands.
1945   AddUsersToWorklist(N);
1946   do {
1947     // Do as a single replacement to avoid rewalking use lists.
1948     SmallVector<SDValue, 8> Ops;
1949     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1950       Ops.push_back(N->getOperand(i));
1951     DAG.ReplaceAllUsesWith(N, Ops.data());
1952   } while (!N->use_empty());
1953   deleteAndRecombine(N);
1954   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1955 }
1956
1957 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1958 /// ConstantSDNode pointer else nullptr.
1959 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1960   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1961   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1962 }
1963
1964 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1965   assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
1966
1967   // Don't do this unless the old select is going away. We want to eliminate the
1968   // binary operator, not replace a binop with a select.
1969   // TODO: Handle ISD::SELECT_CC.
1970   unsigned SelOpNo = 0;
1971   SDValue Sel = BO->getOperand(0);
1972   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1973     SelOpNo = 1;
1974     Sel = BO->getOperand(1);
1975   }
1976
1977   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1978     return SDValue();
1979
1980   SDValue CT = Sel.getOperand(1);
1981   if (!isConstantOrConstantVector(CT, true) &&
1982       !isConstantFPBuildVectorOrConstantFP(CT))
1983     return SDValue();
1984
1985   SDValue CF = Sel.getOperand(2);
1986   if (!isConstantOrConstantVector(CF, true) &&
1987       !isConstantFPBuildVectorOrConstantFP(CF))
1988     return SDValue();
1989
1990   // Bail out if any constants are opaque because we can't constant fold those.
1991   // The exception is "and" and "or" with either 0 or -1 in which case we can
1992   // propagate non constant operands into select. I.e.:
1993   // and (select Cond, 0, -1), X --> select Cond, 0, X
1994   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1995   auto BinOpcode = BO->getOpcode();
1996   bool CanFoldNonConst =
1997       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1998       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1999       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2000
2001   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2002   if (!CanFoldNonConst &&
2003       !isConstantOrConstantVector(CBO, true) &&
2004       !isConstantFPBuildVectorOrConstantFP(CBO))
2005     return SDValue();
2006
2007   EVT VT = Sel.getValueType();
2008
2009   // In case of shift value and shift amount may have different VT. For instance
2010   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2011   // swapped operands and value types do not match. NB: x86 is fine if operands
2012   // are not swapped with shift amount VT being not bigger than shifted value.
2013   // TODO: that is possible to check for a shift operation, correct VTs and
2014   // still perform optimization on x86 if needed.
2015   if (SelOpNo && VT != CBO.getValueType())
2016     return SDValue();
2017
2018   // We have a select-of-constants followed by a binary operator with a
2019   // constant. Eliminate the binop by pulling the constant math into the select.
2020   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2021   SDLoc DL(Sel);
2022   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2023                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2024   if (!CanFoldNonConst && !NewCT.isUndef() &&
2025       !isConstantOrConstantVector(NewCT, true) &&
2026       !isConstantFPBuildVectorOrConstantFP(NewCT))
2027     return SDValue();
2028
2029   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2030                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2031   if (!CanFoldNonConst && !NewCF.isUndef() &&
2032       !isConstantOrConstantVector(NewCF, true) &&
2033       !isConstantFPBuildVectorOrConstantFP(NewCF))
2034     return SDValue();
2035
2036   return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2037 }
2038
2039 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2040   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2041          "Expecting add or sub");
2042
2043   // Match a constant operand and a zext operand for the math instruction:
2044   // add Z, C
2045   // sub C, Z
2046   bool IsAdd = N->getOpcode() == ISD::ADD;
2047   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2048   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2049   auto *CN = dyn_cast<ConstantSDNode>(C);
2050   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2051     return SDValue();
2052
2053   // Match the zext operand as a setcc of a boolean.
2054   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2055       Z.getOperand(0).getValueType() != MVT::i1)
2056     return SDValue();
2057
2058   // Match the compare as: setcc (X & 1), 0, eq.
2059   SDValue SetCC = Z.getOperand(0);
2060   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2061   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2062       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2063       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2064     return SDValue();
2065
2066   // We are adding/subtracting a constant and an inverted low bit. Turn that
2067   // into a subtract/add of the low bit with incremented/decremented constant:
2068   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2069   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2070   EVT VT = C.getValueType();
2071   SDLoc DL(N);
2072   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2073   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2074                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2075   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2076 }
2077
2078 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2079 /// a shift and add with a different constant.
2080 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2081   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2082          "Expecting add or sub");
2083
2084   // We need a constant operand for the add/sub, and the other operand is a
2085   // logical shift right: add (srl), C or sub C, (srl).
2086   bool IsAdd = N->getOpcode() == ISD::ADD;
2087   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2088   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2089   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2090   if (!C || ShiftOp.getOpcode() != ISD::SRL)
2091     return SDValue();
2092
2093   // The shift must be of a 'not' value.
2094   SDValue Not = ShiftOp.getOperand(0);
2095   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2096     return SDValue();
2097
2098   // The shift must be moving the sign bit to the least-significant-bit.
2099   EVT VT = ShiftOp.getValueType();
2100   SDValue ShAmt = ShiftOp.getOperand(1);
2101   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2102   if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
2103     return SDValue();
2104
2105   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2106   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2107   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2108   SDLoc DL(N);
2109   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2110   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2111   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2112   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2113 }
2114
2115 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2116 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2117 /// are no common bits set in the operands).
2118 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2119   SDValue N0 = N->getOperand(0);
2120   SDValue N1 = N->getOperand(1);
2121   EVT VT = N0.getValueType();
2122   SDLoc DL(N);
2123
2124   // fold vector ops
2125   if (VT.isVector()) {
2126     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2127       return FoldedVOp;
2128
2129     // fold (add x, 0) -> x, vector edition
2130     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2131       return N0;
2132     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2133       return N1;
2134   }
2135
2136   // fold (add x, undef) -> undef
2137   if (N0.isUndef())
2138     return N0;
2139
2140   if (N1.isUndef())
2141     return N1;
2142
2143   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2144     // canonicalize constant to RHS
2145     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2146       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2147     // fold (add c1, c2) -> c1+c2
2148     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2149                                       N1.getNode());
2150   }
2151
2152   // fold (add x, 0) -> x
2153   if (isNullConstant(N1))
2154     return N0;
2155
2156   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2157     // fold ((c1-A)+c2) -> (c1+c2)-A
2158     if (N0.getOpcode() == ISD::SUB &&
2159         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2160       // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
2161       return DAG.getNode(ISD::SUB, DL, VT,
2162                          DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
2163                          N0.getOperand(1));
2164     }
2165
2166     // add (sext i1 X), 1 -> zext (not i1 X)
2167     // We don't transform this pattern:
2168     //   add (zext i1 X), -1 -> sext (not i1 X)
2169     // because most (?) targets generate better code for the zext form.
2170     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2171         isOneOrOneSplat(N1)) {
2172       SDValue X = N0.getOperand(0);
2173       if ((!LegalOperations ||
2174            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2175             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2176           X.getScalarValueSizeInBits() == 1) {
2177         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2178         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2179       }
2180     }
2181
2182     // Undo the add -> or combine to merge constant offsets from a frame index.
2183     if (N0.getOpcode() == ISD::OR &&
2184         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2185         isa<ConstantSDNode>(N0.getOperand(1)) &&
2186         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2187       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2188       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2189     }
2190   }
2191
2192   if (SDValue NewSel = foldBinOpIntoSelect(N))
2193     return NewSel;
2194
2195   // reassociate add
2196   if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2197     return RADD;
2198
2199   // fold ((0-A) + B) -> B-A
2200   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2201     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2202
2203   // fold (A + (0-B)) -> A-B
2204   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2205     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2206
2207   // fold (A+(B-A)) -> B
2208   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2209     return N1.getOperand(0);
2210
2211   // fold ((B-A)+A) -> B
2212   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2213     return N0.getOperand(0);
2214
2215   // fold ((A-B)+(C-A)) -> (C-B)
2216   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2217       N0.getOperand(0) == N1.getOperand(1))
2218     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2219                        N0.getOperand(1));
2220
2221   // fold ((A-B)+(B-C)) -> (A-C)
2222   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2223       N0.getOperand(1) == N1.getOperand(0))
2224     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2225                        N1.getOperand(1));
2226
2227   // fold (A+(B-(A+C))) to (B-C)
2228   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2229       N0 == N1.getOperand(1).getOperand(0))
2230     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2231                        N1.getOperand(1).getOperand(1));
2232
2233   // fold (A+(B-(C+A))) to (B-C)
2234   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2235       N0 == N1.getOperand(1).getOperand(1))
2236     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2237                        N1.getOperand(1).getOperand(0));
2238
2239   // fold (A+((B-A)+or-C)) to (B+or-C)
2240   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2241       N1.getOperand(0).getOpcode() == ISD::SUB &&
2242       N0 == N1.getOperand(0).getOperand(1))
2243     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2244                        N1.getOperand(1));
2245
2246   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2247   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2248     SDValue N00 = N0.getOperand(0);
2249     SDValue N01 = N0.getOperand(1);
2250     SDValue N10 = N1.getOperand(0);
2251     SDValue N11 = N1.getOperand(1);
2252
2253     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2254       return DAG.getNode(ISD::SUB, DL, VT,
2255                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2256                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2257   }
2258
2259   // fold (add (umax X, C), -C) --> (usubsat X, C)
2260   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2261     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2262       return (!Max && !Op) ||
2263              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2264     };
2265     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2266                                   /*AllowUndefs*/ true))
2267       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2268                          N0.getOperand(1));
2269   }
2270
2271   if (SimplifyDemandedBits(SDValue(N, 0)))
2272     return SDValue(N, 0);
2273
2274   if (isOneOrOneSplat(N1)) {
2275     // fold (add (xor a, -1), 1) -> (sub 0, a)
2276     if (isBitwiseNot(N0))
2277       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2278                          N0.getOperand(0));
2279
2280     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2281     if (N0.getOpcode() == ISD::ADD ||
2282         N0.getOpcode() == ISD::UADDO ||
2283         N0.getOpcode() == ISD::SADDO) {
2284       SDValue A, Xor;
2285
2286       if (isBitwiseNot(N0.getOperand(0))) {
2287         A = N0.getOperand(1);
2288         Xor = N0.getOperand(0);
2289       } else if (isBitwiseNot(N0.getOperand(1))) {
2290         A = N0.getOperand(0);
2291         Xor = N0.getOperand(1);
2292       }
2293
2294       if (Xor)
2295         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2296     }
2297   }
2298
2299   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2300     return Combined;
2301
2302   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2303     return Combined;
2304
2305   return SDValue();
2306 }
2307
2308 SDValue DAGCombiner::visitADD(SDNode *N) {
2309   SDValue N0 = N->getOperand(0);
2310   SDValue N1 = N->getOperand(1);
2311   EVT VT = N0.getValueType();
2312   SDLoc DL(N);
2313
2314   if (SDValue Combined = visitADDLike(N))
2315     return Combined;
2316
2317   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2318     return V;
2319
2320   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2321     return V;
2322
2323   // fold (a+b) -> (a|b) iff a and b share no bits.
2324   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2325       DAG.haveNoCommonBitsSet(N0, N1))
2326     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2327
2328   return SDValue();
2329 }
2330
2331 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2332   unsigned Opcode = N->getOpcode();
2333   SDValue N0 = N->getOperand(0);
2334   SDValue N1 = N->getOperand(1);
2335   EVT VT = N0.getValueType();
2336   SDLoc DL(N);
2337
2338   // fold vector ops
2339   if (VT.isVector()) {
2340     // TODO SimplifyVBinOp
2341
2342     // fold (add_sat x, 0) -> x, vector edition
2343     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2344       return N0;
2345     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2346       return N1;
2347   }
2348
2349   // fold (add_sat x, undef) -> -1
2350   if (N0.isUndef() || N1.isUndef())
2351     return DAG.getAllOnesConstant(DL, VT);
2352
2353   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2354     // canonicalize constant to RHS
2355     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2356       return DAG.getNode(Opcode, DL, VT, N1, N0);
2357     // fold (add_sat c1, c2) -> c3
2358     return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2359                                       N1.getNode());
2360   }
2361
2362   // fold (add_sat x, 0) -> x
2363   if (isNullConstant(N1))
2364     return N0;
2365
2366   // If it cannot overflow, transform into an add.
2367   if (Opcode == ISD::UADDSAT)
2368     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2369       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2370
2371   return SDValue();
2372 }
2373
2374 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2375   bool Masked = false;
2376
2377   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2378   while (true) {
2379     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2380       V = V.getOperand(0);
2381       continue;
2382     }
2383
2384     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2385       Masked = true;
2386       V = V.getOperand(0);
2387       continue;
2388     }
2389
2390     break;
2391   }
2392
2393   // If this is not a carry, return.
2394   if (V.getResNo() != 1)
2395     return SDValue();
2396
2397   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2398       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2399     return SDValue();
2400
2401   EVT VT = V.getNode()->getValueType(0);
2402   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2403     return SDValue();
2404
2405   // If the result is masked, then no matter what kind of bool it is we can
2406   // return. If it isn't, then we need to make sure the bool type is either 0 or
2407   // 1 and not other values.
2408   if (Masked ||
2409       TLI.getBooleanContents(V.getValueType()) ==
2410           TargetLoweringBase::ZeroOrOneBooleanContent)
2411     return V;
2412
2413   return SDValue();
2414 }
2415
2416 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2417 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2418 /// the opcode and bypass the mask operation.
2419 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2420                                  SelectionDAG &DAG, const SDLoc &DL) {
2421   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2422     return SDValue();
2423
2424   EVT VT = N0.getValueType();
2425   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2426     return SDValue();
2427
2428   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2429   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2430   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2431 }
2432
2433 /// Helper for doing combines based on N0 and N1 being added to each other.
2434 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2435                                           SDNode *LocReference) {
2436   EVT VT = N0.getValueType();
2437   SDLoc DL(LocReference);
2438
2439   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2440   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2441       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2442     return DAG.getNode(ISD::SUB, DL, VT, N0,
2443                        DAG.getNode(ISD::SHL, DL, VT,
2444                                    N1.getOperand(0).getOperand(1),
2445                                    N1.getOperand(1)));
2446
2447   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2448     return V;
2449
2450   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2451   // rather than 'add 0/-1' (the zext should get folded).
2452   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2453   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2454       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2455       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2456     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2457     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2458   }
2459
2460   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2461   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2462     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2463     if (TN->getVT() == MVT::i1) {
2464       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2465                                  DAG.getConstant(1, DL, VT));
2466       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2467     }
2468   }
2469
2470   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2471   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2472       N1.getResNo() == 0)
2473     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2474                        N0, N1.getOperand(0), N1.getOperand(2));
2475
2476   // (add X, Carry) -> (addcarry X, 0, Carry)
2477   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2478     if (SDValue Carry = getAsCarry(TLI, N1))
2479       return DAG.getNode(ISD::ADDCARRY, DL,
2480                          DAG.getVTList(VT, Carry.getValueType()), N0,
2481                          DAG.getConstant(0, DL, VT), Carry);
2482
2483   return SDValue();
2484 }
2485
2486 SDValue DAGCombiner::visitADDC(SDNode *N) {
2487   SDValue N0 = N->getOperand(0);
2488   SDValue N1 = N->getOperand(1);
2489   EVT VT = N0.getValueType();
2490   SDLoc DL(N);
2491
2492   // If the flag result is dead, turn this into an ADD.
2493   if (!N->hasAnyUseOfValue(1))
2494     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2495                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2496
2497   // canonicalize constant to RHS.
2498   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2499   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2500   if (N0C && !N1C)
2501     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2502
2503   // fold (addc x, 0) -> x + no carry out
2504   if (isNullConstant(N1))
2505     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2506                                         DL, MVT::Glue));
2507
2508   // If it cannot overflow, transform into an add.
2509   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2510     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2511                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2512
2513   return SDValue();
2514 }
2515
2516 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2517                            SelectionDAG &DAG, const TargetLowering &TLI) {
2518   EVT VT = V.getValueType();
2519
2520   SDValue Cst;
2521   switch (TLI.getBooleanContents(VT)) {
2522   case TargetLowering::ZeroOrOneBooleanContent:
2523   case TargetLowering::UndefinedBooleanContent:
2524     Cst = DAG.getConstant(1, DL, VT);
2525     break;
2526   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2527     Cst = DAG.getConstant(-1, DL, VT);
2528     break;
2529   }
2530
2531   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2532 }
2533
2534 static SDValue extractBooleanFlip(SDValue V, const TargetLowering &TLI) {
2535   if (V.getOpcode() != ISD::XOR)
2536     return SDValue();
2537
2538   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2539   if (!Const)
2540     return SDValue();
2541
2542   EVT VT = V.getValueType();
2543
2544   bool IsFlip = false;
2545   switch(TLI.getBooleanContents(VT)) {
2546     case TargetLowering::ZeroOrOneBooleanContent:
2547       IsFlip = Const->isOne();
2548       break;
2549     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2550       IsFlip = Const->isAllOnesValue();
2551       break;
2552     case TargetLowering::UndefinedBooleanContent:
2553       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2554       break;
2555   }
2556
2557   if (IsFlip)
2558     return V.getOperand(0);
2559   return SDValue();
2560 }
2561
2562 SDValue DAGCombiner::visitADDO(SDNode *N) {
2563   SDValue N0 = N->getOperand(0);
2564   SDValue N1 = N->getOperand(1);
2565   EVT VT = N0.getValueType();
2566   bool IsSigned = (ISD::SADDO == N->getOpcode());
2567
2568   EVT CarryVT = N->getValueType(1);
2569   SDLoc DL(N);
2570
2571   // If the flag result is dead, turn this into an ADD.
2572   if (!N->hasAnyUseOfValue(1))
2573     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2574                      DAG.getUNDEF(CarryVT));
2575
2576   // canonicalize constant to RHS.
2577   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2578       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2579     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2580
2581   // fold (addo x, 0) -> x + no carry out
2582   if (isNullOrNullSplat(N1))
2583     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2584
2585   if (!IsSigned) {
2586     // If it cannot overflow, transform into an add.
2587     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2588       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2589                        DAG.getConstant(0, DL, CarryVT));
2590
2591     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2592     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2593       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2594                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2595       return CombineTo(N, Sub,
2596                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2597     }
2598
2599     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2600       return Combined;
2601
2602     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2603       return Combined;
2604   }
2605
2606   return SDValue();
2607 }
2608
2609 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2610   EVT VT = N0.getValueType();
2611   if (VT.isVector())
2612     return SDValue();
2613
2614   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2615   // If Y + 1 cannot overflow.
2616   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2617     SDValue Y = N1.getOperand(0);
2618     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2619     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2620       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2621                          N1.getOperand(2));
2622   }
2623
2624   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2625   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2626     if (SDValue Carry = getAsCarry(TLI, N1))
2627       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2628                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2629
2630   return SDValue();
2631 }
2632
2633 SDValue DAGCombiner::visitADDE(SDNode *N) {
2634   SDValue N0 = N->getOperand(0);
2635   SDValue N1 = N->getOperand(1);
2636   SDValue CarryIn = N->getOperand(2);
2637
2638   // canonicalize constant to RHS
2639   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2640   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2641   if (N0C && !N1C)
2642     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2643                        N1, N0, CarryIn);
2644
2645   // fold (adde x, y, false) -> (addc x, y)
2646   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2647     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2648
2649   return SDValue();
2650 }
2651
2652 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2653   SDValue N0 = N->getOperand(0);
2654   SDValue N1 = N->getOperand(1);
2655   SDValue CarryIn = N->getOperand(2);
2656   SDLoc DL(N);
2657
2658   // canonicalize constant to RHS
2659   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2660   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2661   if (N0C && !N1C)
2662     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2663
2664   // fold (addcarry x, y, false) -> (uaddo x, y)
2665   if (isNullConstant(CarryIn)) {
2666     if (!LegalOperations ||
2667         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2668       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2669   }
2670
2671   EVT CarryVT = CarryIn.getValueType();
2672
2673   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2674   if (isNullConstant(N0) && isNullConstant(N1)) {
2675     EVT VT = N0.getValueType();
2676     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2677     AddToWorklist(CarryExt.getNode());
2678     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2679                                     DAG.getConstant(1, DL, VT)),
2680                      DAG.getConstant(0, DL, CarryVT));
2681   }
2682
2683   // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
2684   if (isBitwiseNot(N0) && isNullConstant(N1)) {
2685     if (SDValue B = extractBooleanFlip(CarryIn, TLI)) {
2686       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
2687                                 DAG.getConstant(0, DL, N0.getValueType()),
2688                                 N0.getOperand(0), B);
2689       return CombineTo(N, Sub,
2690                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2691     }
2692   }
2693
2694   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2695     return Combined;
2696
2697   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2698     return Combined;
2699
2700   return SDValue();
2701 }
2702
2703 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2704                                        SDNode *N) {
2705   // Iff the flag result is dead:
2706   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2707   if ((N0.getOpcode() == ISD::ADD ||
2708        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
2709       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2710     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2711                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2712
2713   /**
2714    * When one of the addcarry argument is itself a carry, we may be facing
2715    * a diamond carry propagation. In which case we try to transform the DAG
2716    * to ensure linear carry propagation if that is possible.
2717    *
2718    * We are trying to get:
2719    *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2720    */
2721   if (auto Y = getAsCarry(TLI, N1)) {
2722     /**
2723      *            (uaddo A, B)
2724      *             /       \
2725      *          Carry      Sum
2726      *            |          \
2727      *            | (addcarry *, 0, Z)
2728      *            |       /
2729      *             \   Carry
2730      *              |   /
2731      * (addcarry X, *, *)
2732      */
2733     if (Y.getOpcode() == ISD::UADDO &&
2734         CarryIn.getResNo() == 1 &&
2735         CarryIn.getOpcode() == ISD::ADDCARRY &&
2736         isNullConstant(CarryIn.getOperand(1)) &&
2737         CarryIn.getOperand(0) == Y.getValue(0)) {
2738       auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
2739                               Y.getOperand(0), Y.getOperand(1),
2740                               CarryIn.getOperand(2));
2741       AddToWorklist(NewY.getNode());
2742       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2743                          DAG.getConstant(0, SDLoc(N), N0.getValueType()),
2744                          NewY.getValue(1));
2745     }
2746   }
2747
2748   return SDValue();
2749 }
2750
2751 // Since it may not be valid to emit a fold to zero for vector initializers
2752 // check if we can before folding.
2753 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2754                              SelectionDAG &DAG, bool LegalOperations) {
2755   if (!VT.isVector())
2756     return DAG.getConstant(0, DL, VT);
2757   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2758     return DAG.getConstant(0, DL, VT);
2759   return SDValue();
2760 }
2761
2762 SDValue DAGCombiner::visitSUB(SDNode *N) {
2763   SDValue N0 = N->getOperand(0);
2764   SDValue N1 = N->getOperand(1);
2765   EVT VT = N0.getValueType();
2766   SDLoc DL(N);
2767
2768   // fold vector ops
2769   if (VT.isVector()) {
2770     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2771       return FoldedVOp;
2772
2773     // fold (sub x, 0) -> x, vector edition
2774     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2775       return N0;
2776   }
2777
2778   // fold (sub x, x) -> 0
2779   // FIXME: Refactor this and xor and other similar operations together.
2780   if (N0 == N1)
2781     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2782   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2783       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2784     // fold (sub c1, c2) -> c1-c2
2785     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2786                                       N1.getNode());
2787   }
2788
2789   if (SDValue NewSel = foldBinOpIntoSelect(N))
2790     return NewSel;
2791
2792   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2793
2794   // fold (sub x, c) -> (add x, -c)
2795   if (N1C) {
2796     return DAG.getNode(ISD::ADD, DL, VT, N0,
2797                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2798   }
2799
2800   if (isNullOrNullSplat(N0)) {
2801     unsigned BitWidth = VT.getScalarSizeInBits();
2802     // Right-shifting everything out but the sign bit followed by negation is
2803     // the same as flipping arithmetic/logical shift type without the negation:
2804     // -(X >>u 31) -> (X >>s 31)
2805     // -(X >>s 31) -> (X >>u 31)
2806     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2807       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2808       if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
2809         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2810         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2811           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2812       }
2813     }
2814
2815     // 0 - X --> 0 if the sub is NUW.
2816     if (N->getFlags().hasNoUnsignedWrap())
2817       return N0;
2818
2819     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2820       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2821       // N1 must be 0 because negating the minimum signed value is undefined.
2822       if (N->getFlags().hasNoSignedWrap())
2823         return N0;
2824
2825       // 0 - X --> X if X is 0 or the minimum signed value.
2826       return N1;
2827     }
2828   }
2829
2830   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2831   if (isAllOnesOrAllOnesSplat(N0))
2832     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2833
2834   // fold (A - (0-B)) -> A+B
2835   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2836     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2837
2838   // fold A-(A-B) -> B
2839   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2840     return N1.getOperand(1);
2841
2842   // fold (A+B)-A -> B
2843   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2844     return N0.getOperand(1);
2845
2846   // fold (A+B)-B -> A
2847   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2848     return N0.getOperand(0);
2849
2850   // fold C2-(A+C1) -> (C2-C1)-A
2851   if (N1.getOpcode() == ISD::ADD) {
2852     SDValue N11 = N1.getOperand(1);
2853     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2854         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2855       SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
2856       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2857     }
2858   }
2859
2860   // fold ((A+(B+or-C))-B) -> A+or-C
2861   if (N0.getOpcode() == ISD::ADD &&
2862       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2863        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2864       N0.getOperand(1).getOperand(0) == N1)
2865     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2866                        N0.getOperand(1).getOperand(1));
2867
2868   // fold ((A+(C+B))-B) -> A+C
2869   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2870       N0.getOperand(1).getOperand(1) == N1)
2871     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2872                        N0.getOperand(1).getOperand(0));
2873
2874   // fold ((A-(B-C))-C) -> A-B
2875   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2876       N0.getOperand(1).getOperand(1) == N1)
2877     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2878                        N0.getOperand(1).getOperand(0));
2879
2880   // fold (A-(B-C)) -> A+(C-B)
2881   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
2882     return DAG.getNode(ISD::ADD, DL, VT, N0,
2883                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
2884                                    N1.getOperand(0)));
2885
2886   // fold (X - (-Y * Z)) -> (X + (Y * Z))
2887   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
2888     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
2889         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
2890       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2891                                 N1.getOperand(0).getOperand(1),
2892                                 N1.getOperand(1));
2893       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2894     }
2895     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
2896         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
2897       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
2898                                 N1.getOperand(0),
2899                                 N1.getOperand(1).getOperand(1));
2900       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
2901     }
2902   }
2903
2904   // If either operand of a sub is undef, the result is undef
2905   if (N0.isUndef())
2906     return N0;
2907   if (N1.isUndef())
2908     return N1;
2909
2910   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2911     return V;
2912
2913   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2914     return V;
2915
2916   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
2917     return V;
2918
2919   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
2920   // rather than 'sub 0/1' (the sext should get folded).
2921   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
2922   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
2923       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
2924       TLI.getBooleanContents(VT) ==
2925           TargetLowering::ZeroOrNegativeOneBooleanContent) {
2926     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
2927     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
2928   }
2929
2930   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
2931   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
2932     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
2933       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
2934       SDValue S0 = N1.getOperand(0);
2935       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
2936         unsigned OpSizeInBits = VT.getScalarSizeInBits();
2937         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
2938           if (C->getAPIntValue() == (OpSizeInBits - 1))
2939             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
2940       }
2941     }
2942   }
2943
2944   // If the relocation model supports it, consider symbol offsets.
2945   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
2946     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
2947       // fold (sub Sym, c) -> Sym-c
2948       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
2949         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
2950                                     GA->getOffset() -
2951                                         (uint64_t)N1C->getSExtValue());
2952       // fold (sub Sym+c1, Sym+c2) -> c1-c2
2953       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
2954         if (GA->getGlobal() == GB->getGlobal())
2955           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
2956                                  DL, VT);
2957     }
2958
2959   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
2960   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2961     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2962     if (TN->getVT() == MVT::i1) {
2963       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2964                                  DAG.getConstant(1, DL, VT));
2965       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
2966     }
2967   }
2968
2969   // Prefer an add for more folding potential and possibly better codegen:
2970   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
2971   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
2972     SDValue ShAmt = N1.getOperand(1);
2973     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2974     if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
2975       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
2976       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
2977     }
2978   }
2979
2980   return SDValue();
2981 }
2982
2983 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
2984   SDValue N0 = N->getOperand(0);
2985   SDValue N1 = N->getOperand(1);
2986   EVT VT = N0.getValueType();
2987   SDLoc DL(N);
2988
2989   // fold vector ops
2990   if (VT.isVector()) {
2991     // TODO SimplifyVBinOp
2992
2993     // fold (sub_sat x, 0) -> x, vector edition
2994     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2995       return N0;
2996   }
2997
2998   // fold (sub_sat x, undef) -> 0
2999   if (N0.isUndef() || N1.isUndef())
3000     return DAG.getConstant(0, DL, VT);
3001
3002   // fold (sub_sat x, x) -> 0
3003   if (N0 == N1)
3004     return DAG.getConstant(0, DL, VT);
3005
3006   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3007       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
3008     // fold (sub_sat c1, c2) -> c3
3009     return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3010                                       N1.getNode());
3011   }
3012
3013   // fold (sub_sat x, 0) -> x
3014   if (isNullConstant(N1))
3015     return N0;
3016
3017   return SDValue();
3018 }
3019
3020 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3021   SDValue N0 = N->getOperand(0);
3022   SDValue N1 = N->getOperand(1);
3023   EVT VT = N0.getValueType();
3024   SDLoc DL(N);
3025
3026   // If the flag result is dead, turn this into an SUB.
3027   if (!N->hasAnyUseOfValue(1))
3028     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3029                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3030
3031   // fold (subc x, x) -> 0 + no borrow
3032   if (N0 == N1)
3033     return CombineTo(N, DAG.getConstant(0, DL, VT),
3034                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3035
3036   // fold (subc x, 0) -> x + no borrow
3037   if (isNullConstant(N1))
3038     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3039
3040   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3041   if (isAllOnesConstant(N0))
3042     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3043                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3044
3045   return SDValue();
3046 }
3047
3048 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3049   SDValue N0 = N->getOperand(0);
3050   SDValue N1 = N->getOperand(1);
3051   EVT VT = N0.getValueType();
3052   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3053
3054   EVT CarryVT = N->getValueType(1);
3055   SDLoc DL(N);
3056
3057   // If the flag result is dead, turn this into an SUB.
3058   if (!N->hasAnyUseOfValue(1))
3059     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3060                      DAG.getUNDEF(CarryVT));
3061
3062   // fold (subo x, x) -> 0 + no borrow
3063   if (N0 == N1)
3064     return CombineTo(N, DAG.getConstant(0, DL, VT),
3065                      DAG.getConstant(0, DL, CarryVT));
3066
3067   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3068
3069   // fold (subox, c) -> (addo x, -c)
3070   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3071     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3072                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3073   }
3074
3075   // fold (subo x, 0) -> x + no borrow
3076   if (isNullOrNullSplat(N1))
3077     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3078
3079   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3080   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3081     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3082                      DAG.getConstant(0, DL, CarryVT));
3083
3084   return SDValue();
3085 }
3086
3087 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3088   SDValue N0 = N->getOperand(0);
3089   SDValue N1 = N->getOperand(1);
3090   SDValue CarryIn = N->getOperand(2);
3091
3092   // fold (sube x, y, false) -> (subc x, y)
3093   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3094     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3095
3096   return SDValue();
3097 }
3098
3099 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3100   SDValue N0 = N->getOperand(0);
3101   SDValue N1 = N->getOperand(1);
3102   SDValue CarryIn = N->getOperand(2);
3103
3104   // fold (subcarry x, y, false) -> (usubo x, y)
3105   if (isNullConstant(CarryIn)) {
3106     if (!LegalOperations ||
3107         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3108       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3109   }
3110
3111   return SDValue();
3112 }
3113
3114 SDValue DAGCombiner::visitMUL(SDNode *N) {
3115   SDValue N0 = N->getOperand(0);
3116   SDValue N1 = N->getOperand(1);
3117   EVT VT = N0.getValueType();
3118
3119   // fold (mul x, undef) -> 0
3120   if (N0.isUndef() || N1.isUndef())
3121     return DAG.getConstant(0, SDLoc(N), VT);
3122
3123   bool N0IsConst = false;
3124   bool N1IsConst = false;
3125   bool N1IsOpaqueConst = false;
3126   bool N0IsOpaqueConst = false;
3127   APInt ConstValue0, ConstValue1;
3128   // fold vector ops
3129   if (VT.isVector()) {
3130     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3131       return FoldedVOp;
3132
3133     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3134     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3135     assert((!N0IsConst ||
3136             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3137            "Splat APInt should be element width");
3138     assert((!N1IsConst ||
3139             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3140            "Splat APInt should be element width");
3141   } else {
3142     N0IsConst = isa<ConstantSDNode>(N0);
3143     if (N0IsConst) {
3144       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3145       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3146     }
3147     N1IsConst = isa<ConstantSDNode>(N1);
3148     if (N1IsConst) {
3149       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3150       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3151     }
3152   }
3153
3154   // fold (mul c1, c2) -> c1*c2
3155   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3156     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3157                                       N0.getNode(), N1.getNode());
3158
3159   // canonicalize constant to RHS (vector doesn't have to splat)
3160   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3161      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3162     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3163   // fold (mul x, 0) -> 0
3164   if (N1IsConst && ConstValue1.isNullValue())
3165     return N1;
3166   // fold (mul x, 1) -> x
3167   if (N1IsConst && ConstValue1.isOneValue())
3168     return N0;
3169
3170   if (SDValue NewSel = foldBinOpIntoSelect(N))
3171     return NewSel;
3172
3173   // fold (mul x, -1) -> 0-x
3174   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3175     SDLoc DL(N);
3176     return DAG.getNode(ISD::SUB, DL, VT,
3177                        DAG.getConstant(0, DL, VT), N0);
3178   }
3179   // fold (mul x, (1 << c)) -> x << c
3180   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3181       DAG.isKnownToBeAPowerOfTwo(N1) &&
3182       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3183     SDLoc DL(N);
3184     SDValue LogBase2 = BuildLogBase2(N1, DL);
3185     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3186     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3187     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3188   }
3189   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3190   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3191     unsigned Log2Val = (-ConstValue1).logBase2();
3192     SDLoc DL(N);
3193     // FIXME: If the input is something that is easily negated (e.g. a
3194     // single-use add), we should put the negate there.
3195     return DAG.getNode(ISD::SUB, DL, VT,
3196                        DAG.getConstant(0, DL, VT),
3197                        DAG.getNode(ISD::SHL, DL, VT, N0,
3198                             DAG.getConstant(Log2Val, DL,
3199                                       getShiftAmountTy(N0.getValueType()))));
3200   }
3201
3202   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3203   // mul x, (2^N + 1) --> add (shl x, N), x
3204   // mul x, (2^N - 1) --> sub (shl x, N), x
3205   // Examples: x * 33 --> (x << 5) + x
3206   //           x * 15 --> (x << 4) - x
3207   //           x * -33 --> -((x << 5) + x)
3208   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3209   if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
3210     // TODO: We could handle more general decomposition of any constant by
3211     //       having the target set a limit on number of ops and making a
3212     //       callback to determine that sequence (similar to sqrt expansion).
3213     unsigned MathOp = ISD::DELETED_NODE;
3214     APInt MulC = ConstValue1.abs();
3215     if ((MulC - 1).isPowerOf2())
3216       MathOp = ISD::ADD;
3217     else if ((MulC + 1).isPowerOf2())
3218       MathOp = ISD::SUB;
3219
3220     if (MathOp != ISD::DELETED_NODE) {
3221       unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
3222                                           : (MulC + 1).logBase2();
3223       assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
3224              "Not expecting multiply-by-constant that could have simplified");
3225       SDLoc DL(N);
3226       SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
3227                                 DAG.getConstant(ShAmt, DL, VT));
3228       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3229       if (ConstValue1.isNegative())
3230         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3231       return R;
3232     }
3233   }
3234
3235   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3236   if (N0.getOpcode() == ISD::SHL &&
3237       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3238       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3239     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3240     if (isConstantOrConstantVector(C3))
3241       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3242   }
3243
3244   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3245   // use.
3246   {
3247     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3248
3249     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3250     if (N0.getOpcode() == ISD::SHL &&
3251         isConstantOrConstantVector(N0.getOperand(1)) &&
3252         N0.getNode()->hasOneUse()) {
3253       Sh = N0; Y = N1;
3254     } else if (N1.getOpcode() == ISD::SHL &&
3255                isConstantOrConstantVector(N1.getOperand(1)) &&
3256                N1.getNode()->hasOneUse()) {
3257       Sh = N1; Y = N0;
3258     }
3259
3260     if (Sh.getNode()) {
3261       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3262       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3263     }
3264   }
3265
3266   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3267   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3268       N0.getOpcode() == ISD::ADD &&
3269       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3270       isMulAddWithConstProfitable(N, N0, N1))
3271       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3272                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3273                                      N0.getOperand(0), N1),
3274                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3275                                      N0.getOperand(1), N1));
3276
3277   // reassociate mul
3278   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3279     return RMUL;
3280
3281   return SDValue();
3282 }
3283
3284 /// Return true if divmod libcall is available.
3285 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3286                                      const TargetLowering &TLI) {
3287   RTLIB::Libcall LC;
3288   EVT NodeType = Node->getValueType(0);
3289   if (!NodeType.isSimple())
3290     return false;
3291   switch (NodeType.getSimpleVT().SimpleTy) {
3292   default: return false; // No libcall for vector types.
3293   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3294   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3295   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3296   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3297   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3298   }
3299
3300   return TLI.getLibcallName(LC) != nullptr;
3301 }
3302
3303 /// Issue divrem if both quotient and remainder are needed.
3304 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3305   if (Node->use_empty())
3306     return SDValue(); // This is a dead node, leave it alone.
3307
3308   unsigned Opcode = Node->getOpcode();
3309   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3310   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3311
3312   // DivMod lib calls can still work on non-legal types if using lib-calls.
3313   EVT VT = Node->getValueType(0);
3314   if (VT.isVector() || !VT.isInteger())
3315     return SDValue();
3316
3317   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3318     return SDValue();
3319
3320   // If DIVREM is going to get expanded into a libcall,
3321   // but there is no libcall available, then don't combine.
3322   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3323       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3324     return SDValue();
3325
3326   // If div is legal, it's better to do the normal expansion
3327   unsigned OtherOpcode = 0;
3328   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3329     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3330     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3331       return SDValue();
3332   } else {
3333     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3334     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3335       return SDValue();
3336   }
3337
3338   SDValue Op0 = Node->getOperand(0);
3339   SDValue Op1 = Node->getOperand(1);
3340   SDValue combined;
3341   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3342          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3343     SDNode *User = *UI;
3344     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3345         User->use_empty())
3346       continue;
3347     // Convert the other matching node(s), too;
3348     // otherwise, the DIVREM may get target-legalized into something
3349     // target-specific that we won't be able to recognize.
3350     unsigned UserOpc = User->getOpcode();
3351     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3352         User->getOperand(0) == Op0 &&
3353         User->getOperand(1) == Op1) {
3354       if (!combined) {
3355         if (UserOpc == OtherOpcode) {
3356           SDVTList VTs = DAG.getVTList(VT, VT);
3357           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3358         } else if (UserOpc == DivRemOpc) {
3359           combined = SDValue(User, 0);
3360         } else {
3361           assert(UserOpc == Opcode);
3362           continue;
3363         }
3364       }
3365       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3366         CombineTo(User, combined);
3367       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3368         CombineTo(User, combined.getValue(1));
3369     }
3370   }
3371   return combined;
3372 }
3373
3374 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3375   SDValue N0 = N->getOperand(0);
3376   SDValue N1 = N->getOperand(1);
3377   EVT VT = N->getValueType(0);
3378   SDLoc DL(N);
3379
3380   unsigned Opc = N->getOpcode();
3381   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3382   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3383
3384   // X / undef -> undef
3385   // X % undef -> undef
3386   // X / 0 -> undef
3387   // X % 0 -> undef
3388   // NOTE: This includes vectors where any divisor element is zero/undef.
3389   if (DAG.isUndef(Opc, {N0, N1}))
3390     return DAG.getUNDEF(VT);
3391
3392   // undef / X -> 0
3393   // undef % X -> 0
3394   if (N0.isUndef())
3395     return DAG.getConstant(0, DL, VT);
3396
3397   // 0 / X -> 0
3398   // 0 % X -> 0
3399   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3400   if (N0C && N0C->isNullValue())
3401     return N0;
3402
3403   // X / X -> 1
3404   // X % X -> 0
3405   if (N0 == N1)
3406     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3407
3408   // X / 1 -> X
3409   // X % 1 -> 0
3410   // If this is a boolean op (single-bit element type), we can't have
3411   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3412   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3413   // it's a 1.
3414   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3415     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3416
3417   return SDValue();
3418 }
3419
3420 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3421   SDValue N0 = N->getOperand(0);
3422   SDValue N1 = N->getOperand(1);
3423   EVT VT = N->getValueType(0);
3424   EVT CCVT = getSetCCResultType(VT);
3425
3426   // fold vector ops
3427   if (VT.isVector())
3428     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3429       return FoldedVOp;
3430
3431   SDLoc DL(N);
3432
3433   // fold (sdiv c1, c2) -> c1/c2
3434   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3435   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3436   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3437     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3438   // fold (sdiv X, -1) -> 0-X
3439   if (N1C && N1C->isAllOnesValue())
3440     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3441   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3442   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3443     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3444                          DAG.getConstant(1, DL, VT),
3445                          DAG.getConstant(0, DL, VT));
3446
3447   if (SDValue V = simplifyDivRem(N, DAG))
3448     return V;
3449
3450   if (SDValue NewSel = foldBinOpIntoSelect(N))
3451     return NewSel;
3452
3453   // If we know the sign bits of both operands are zero, strength reduce to a
3454   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3455   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3456     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3457
3458   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3459     // If the corresponding remainder node exists, update its users with
3460     // (Dividend - (Quotient * Divisor).
3461     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3462                                               { N0, N1 })) {
3463       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3464       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3465       AddToWorklist(Mul.getNode());
3466       AddToWorklist(Sub.getNode());
3467       CombineTo(RemNode, Sub);
3468     }
3469     return V;
3470   }
3471
3472   // sdiv, srem -> sdivrem
3473   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3474   // true.  Otherwise, we break the simplification logic in visitREM().
3475   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3476   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3477     if (SDValue DivRem = useDivRem(N))
3478         return DivRem;
3479
3480   return SDValue();
3481 }
3482
3483 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3484   SDLoc DL(N);
3485   EVT VT = N->getValueType(0);
3486   EVT CCVT = getSetCCResultType(VT);
3487   unsigned BitWidth = VT.getScalarSizeInBits();
3488
3489   // Helper for determining whether a value is a power-2 constant scalar or a
3490   // vector of such elements.
3491   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3492     if (C->isNullValue() || C->isOpaque())
3493       return false;
3494     if (C->getAPIntValue().isPowerOf2())
3495       return true;
3496     if ((-C->getAPIntValue()).isPowerOf2())
3497       return true;
3498     return false;
3499   };
3500
3501   // fold (sdiv X, pow2) -> simple ops after legalize
3502   // FIXME: We check for the exact bit here because the generic lowering gives
3503   // better results in that case. The target-specific lowering should learn how
3504   // to handle exact sdivs efficiently.
3505   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3506     // Target-specific implementation of sdiv x, pow2.
3507     if (SDValue Res = BuildSDIVPow2(N))
3508       return Res;
3509
3510     // Create constants that are functions of the shift amount value.
3511     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3512     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3513     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3514     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3515     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3516     if (!isConstantOrConstantVector(Inexact))
3517       return SDValue();
3518
3519     // Splat the sign bit into the register
3520     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3521                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3522     AddToWorklist(Sign.getNode());
3523
3524     // Add (N0 < 0) ? abs2 - 1 : 0;
3525     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3526     AddToWorklist(Srl.getNode());
3527     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3528     AddToWorklist(Add.getNode());
3529     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3530     AddToWorklist(Sra.getNode());
3531
3532     // Special case: (sdiv X, 1) -> X
3533     // Special Case: (sdiv X, -1) -> 0-X
3534     SDValue One = DAG.getConstant(1, DL, VT);
3535     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3536     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3537     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3538     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3539     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3540
3541     // If dividing by a positive value, we're done. Otherwise, the result must
3542     // be negated.
3543     SDValue Zero = DAG.getConstant(0, DL, VT);
3544     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3545
3546     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3547     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3548     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3549     return Res;
3550   }
3551
3552   // If integer divide is expensive and we satisfy the requirements, emit an
3553   // alternate sequence.  Targets may check function attributes for size/speed
3554   // trade-offs.
3555   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3556   if (isConstantOrConstantVector(N1) &&
3557       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3558     if (SDValue Op = BuildSDIV(N))
3559       return Op;
3560
3561   return SDValue();
3562 }
3563
3564 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3565   SDValue N0 = N->getOperand(0);
3566   SDValue N1 = N->getOperand(1);
3567   EVT VT = N->getValueType(0);
3568   EVT CCVT = getSetCCResultType(VT);
3569
3570   // fold vector ops
3571   if (VT.isVector())
3572     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3573       return FoldedVOp;
3574
3575   SDLoc DL(N);
3576
3577   // fold (udiv c1, c2) -> c1/c2
3578   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3579   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3580   if (N0C && N1C)
3581     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3582                                                     N0C, N1C))
3583       return Folded;
3584   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3585   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3586     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3587                          DAG.getConstant(1, DL, VT),
3588                          DAG.getConstant(0, DL, VT));
3589
3590   if (SDValue V = simplifyDivRem(N, DAG))
3591     return V;
3592
3593   if (SDValue NewSel = foldBinOpIntoSelect(N))
3594     return NewSel;
3595
3596   if (SDValue V = visitUDIVLike(N0, N1, N)) {
3597     // If the corresponding remainder node exists, update its users with
3598     // (Dividend - (Quotient * Divisor).
3599     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3600                                               { N0, N1 })) {
3601       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3602       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3603       AddToWorklist(Mul.getNode());
3604       AddToWorklist(Sub.getNode());
3605       CombineTo(RemNode, Sub);
3606     }
3607     return V;
3608   }
3609
3610   // sdiv, srem -> sdivrem
3611   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3612   // true.  Otherwise, we break the simplification logic in visitREM().
3613   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3614   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3615     if (SDValue DivRem = useDivRem(N))
3616         return DivRem;
3617
3618   return SDValue();
3619 }
3620
3621 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3622   SDLoc DL(N);
3623   EVT VT = N->getValueType(0);
3624
3625   // fold (udiv x, (1 << c)) -> x >>u c
3626   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3627       DAG.isKnownToBeAPowerOfTwo(N1)) {
3628     SDValue LogBase2 = BuildLogBase2(N1, DL);
3629     AddToWorklist(LogBase2.getNode());
3630
3631     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3632     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3633     AddToWorklist(Trunc.getNode());
3634     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3635   }
3636
3637   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3638   if (N1.getOpcode() == ISD::SHL) {
3639     SDValue N10 = N1.getOperand(0);
3640     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3641         DAG.isKnownToBeAPowerOfTwo(N10)) {
3642       SDValue LogBase2 = BuildLogBase2(N10, DL);
3643       AddToWorklist(LogBase2.getNode());
3644
3645       EVT ADDVT = N1.getOperand(1).getValueType();
3646       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3647       AddToWorklist(Trunc.getNode());
3648       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3649       AddToWorklist(Add.getNode());
3650       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3651     }
3652   }
3653
3654   // fold (udiv x, c) -> alternate
3655   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3656   if (isConstantOrConstantVector(N1) &&
3657       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3658     if (SDValue Op = BuildUDIV(N))
3659       return Op;
3660
3661   return SDValue();
3662 }
3663
3664 // handles ISD::SREM and ISD::UREM
3665 SDValue DAGCombiner::visitREM(SDNode *N) {
3666   unsigned Opcode = N->getOpcode();
3667   SDValue N0 = N->getOperand(0);
3668   SDValue N1 = N->getOperand(1);
3669   EVT VT = N->getValueType(0);
3670   EVT CCVT = getSetCCResultType(VT);
3671
3672   bool isSigned = (Opcode == ISD::SREM);
3673   SDLoc DL(N);
3674
3675   // fold (rem c1, c2) -> c1%c2
3676   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3677   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3678   if (N0C && N1C)
3679     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3680       return Folded;
3681   // fold (urem X, -1) -> select(X == -1, 0, x)
3682   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3683     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3684                          DAG.getConstant(0, DL, VT), N0);
3685
3686   if (SDValue V = simplifyDivRem(N, DAG))
3687     return V;
3688
3689   if (SDValue NewSel = foldBinOpIntoSelect(N))
3690     return NewSel;
3691
3692   if (isSigned) {
3693     // If we know the sign bits of both operands are zero, strength reduce to a
3694     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3695     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3696       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3697   } else {
3698     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3699     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3700       // fold (urem x, pow2) -> (and x, pow2-1)
3701       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3702       AddToWorklist(Add.getNode());
3703       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3704     }
3705     if (N1.getOpcode() == ISD::SHL &&
3706         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3707       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3708       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3709       AddToWorklist(Add.getNode());
3710       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3711     }
3712   }
3713
3714   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3715
3716   // If X/C can be simplified by the division-by-constant logic, lower
3717   // X%C to the equivalent of X-X/C*C.
3718   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3719   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3720   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3721   // combine will not return a DIVREM.  Regardless, checking cheapness here
3722   // makes sense since the simplification results in fatter code.
3723   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3724     SDValue OptimizedDiv =
3725         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3726     if (OptimizedDiv.getNode()) {
3727       // If the equivalent Div node also exists, update its users.
3728       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3729       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3730                                                 { N0, N1 }))
3731         CombineTo(DivNode, OptimizedDiv);
3732       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3733       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3734       AddToWorklist(OptimizedDiv.getNode());
3735       AddToWorklist(Mul.getNode());
3736       return Sub;
3737     }
3738   }
3739
3740   // sdiv, srem -> sdivrem
3741   if (SDValue DivRem = useDivRem(N))
3742     return DivRem.getValue(1);
3743
3744   return SDValue();
3745 }
3746
3747 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3748   SDValue N0 = N->getOperand(0);
3749   SDValue N1 = N->getOperand(1);
3750   EVT VT = N->getValueType(0);
3751   SDLoc DL(N);
3752
3753   if (VT.isVector()) {
3754     // fold (mulhs x, 0) -> 0
3755     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3756       return N1;
3757     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3758       return N0;
3759   }
3760
3761   // fold (mulhs x, 0) -> 0
3762   if (isNullConstant(N1))
3763     return N1;
3764   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3765   if (isOneConstant(N1))
3766     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3767                        DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
3768                                        getShiftAmountTy(N0.getValueType())));
3769
3770   // fold (mulhs x, undef) -> 0
3771   if (N0.isUndef() || N1.isUndef())
3772     return DAG.getConstant(0, DL, VT);
3773
3774   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3775   // plus a shift.
3776   if (VT.isSimple() && !VT.isVector()) {
3777     MVT Simple = VT.getSimpleVT();
3778     unsigned SimpleSize = Simple.getSizeInBits();
3779     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3780     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3781       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3782       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3783       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3784       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3785             DAG.getConstant(SimpleSize, DL,
3786                             getShiftAmountTy(N1.getValueType())));
3787       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3788     }
3789   }
3790
3791   return SDValue();
3792 }
3793
3794 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3795   SDValue N0 = N->getOperand(0);
3796   SDValue N1 = N->getOperand(1);
3797   EVT VT = N->getValueType(0);
3798   SDLoc DL(N);
3799
3800   if (VT.isVector()) {
3801     // fold (mulhu x, 0) -> 0
3802     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3803       return N1;
3804     if (ISD::isBuildVectorAllZeros(N0.getNode()))
3805       return N0;
3806   }
3807
3808   // fold (mulhu x, 0) -> 0
3809   if (isNullConstant(N1))
3810     return N1;
3811   // fold (mulhu x, 1) -> 0
3812   if (isOneConstant(N1))
3813     return DAG.getConstant(0, DL, N0.getValueType());
3814   // fold (mulhu x, undef) -> 0
3815   if (N0.isUndef() || N1.isUndef())
3816     return DAG.getConstant(0, DL, VT);
3817
3818   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
3819   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3820       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
3821     unsigned NumEltBits = VT.getScalarSizeInBits();
3822     SDValue LogBase2 = BuildLogBase2(N1, DL);
3823     SDValue SRLAmt = DAG.getNode(
3824         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
3825     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3826     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
3827     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3828   }
3829
3830   // If the type twice as wide is legal, transform the mulhu to a wider multiply
3831   // plus a shift.
3832   if (VT.isSimple() && !VT.isVector()) {
3833     MVT Simple = VT.getSimpleVT();
3834     unsigned SimpleSize = Simple.getSizeInBits();
3835     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3836     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3837       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
3838       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
3839       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3840       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3841             DAG.getConstant(SimpleSize, DL,
3842                             getShiftAmountTy(N1.getValueType())));
3843       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3844     }
3845   }
3846
3847   return SDValue();
3848 }
3849
3850 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
3851 /// give the opcodes for the two computations that are being performed. Return
3852 /// true if a simplification was made.
3853 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
3854                                                 unsigned HiOp) {
3855   // If the high half is not needed, just compute the low half.
3856   bool HiExists = N->hasAnyUseOfValue(1);
3857   if (!HiExists && (!LegalOperations ||
3858                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
3859     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3860     return CombineTo(N, Res, Res);
3861   }
3862
3863   // If the low half is not needed, just compute the high half.
3864   bool LoExists = N->hasAnyUseOfValue(0);
3865   if (!LoExists && (!LegalOperations ||
3866                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
3867     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3868     return CombineTo(N, Res, Res);
3869   }
3870
3871   // If both halves are used, return as it is.
3872   if (LoExists && HiExists)
3873     return SDValue();
3874
3875   // If the two computed results can be simplified separately, separate them.
3876   if (LoExists) {
3877     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
3878     AddToWorklist(Lo.getNode());
3879     SDValue LoOpt = combine(Lo.getNode());
3880     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
3881         (!LegalOperations ||
3882          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
3883       return CombineTo(N, LoOpt, LoOpt);
3884   }
3885
3886   if (HiExists) {
3887     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
3888     AddToWorklist(Hi.getNode());
3889     SDValue HiOpt = combine(Hi.getNode());
3890     if (HiOpt.getNode() && HiOpt != Hi &&
3891         (!LegalOperations ||
3892          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
3893       return CombineTo(N, HiOpt, HiOpt);
3894   }
3895
3896   return SDValue();
3897 }
3898
3899 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
3900   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
3901     return Res;
3902
3903   EVT VT = N->getValueType(0);
3904   SDLoc DL(N);
3905
3906   // If the type is twice as wide is legal, transform the mulhu to a wider
3907   // multiply plus a shift.
3908   if (VT.isSimple() && !VT.isVector()) {
3909     MVT Simple = VT.getSimpleVT();
3910     unsigned SimpleSize = Simple.getSizeInBits();
3911     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3912     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3913       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
3914       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
3915       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3916       // Compute the high part as N1.
3917       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3918             DAG.getConstant(SimpleSize, DL,
3919                             getShiftAmountTy(Lo.getValueType())));
3920       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3921       // Compute the low part as N0.
3922       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3923       return CombineTo(N, Lo, Hi);
3924     }
3925   }
3926
3927   return SDValue();
3928 }
3929
3930 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
3931   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
3932     return Res;
3933
3934   EVT VT = N->getValueType(0);
3935   SDLoc DL(N);
3936
3937   // If the type is twice as wide is legal, transform the mulhu to a wider
3938   // multiply plus a shift.
3939   if (VT.isSimple() && !VT.isVector()) {
3940     MVT Simple = VT.getSimpleVT();
3941     unsigned SimpleSize = Simple.getSizeInBits();
3942     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3943     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3944       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
3945       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
3946       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
3947       // Compute the high part as N1.
3948       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
3949             DAG.getConstant(SimpleSize, DL,
3950                             getShiftAmountTy(Lo.getValueType())));
3951       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
3952       // Compute the low part as N0.
3953       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
3954       return CombineTo(N, Lo, Hi);
3955     }
3956   }
3957
3958   return SDValue();
3959 }
3960
3961 SDValue DAGCombiner::visitMULO(SDNode *N) {
3962   bool IsSigned = (ISD::SMULO == N->getOpcode());
3963
3964   // (mulo x, 2) -> (addo x, x)
3965   if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
3966     if (C2->getAPIntValue() == 2)
3967       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
3968                          N->getVTList(), N->getOperand(0), N->getOperand(0));
3969
3970   return SDValue();
3971 }
3972
3973 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
3974   SDValue N0 = N->getOperand(0);
3975   SDValue N1 = N->getOperand(1);
3976   EVT VT = N0.getValueType();
3977
3978   // fold vector ops
3979   if (VT.isVector())
3980     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3981       return FoldedVOp;
3982
3983   // fold operation with constant operands.
3984   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
3985   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3986   if (N0C && N1C)
3987     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
3988
3989   // canonicalize constant to RHS
3990   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3991      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3992     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
3993
3994   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
3995   // Only do this if the current op isn't legal and the flipped is.
3996   unsigned Opcode = N->getOpcode();
3997   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3998   if (!TLI.isOperationLegal(Opcode, VT) &&
3999       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4000       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4001     unsigned AltOpcode;
4002     switch (Opcode) {
4003     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4004     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4005     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4006     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4007     default: llvm_unreachable("Unknown MINMAX opcode");
4008     }
4009     if (TLI.isOperationLegal(AltOpcode, VT))
4010       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4011   }
4012
4013   return SDValue();
4014 }
4015
4016 /// If this is a bitwise logic instruction and both operands have the same
4017 /// opcode, try to sink the other opcode after the logic instruction.
4018 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4019   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4020   EVT VT = N0.getValueType();
4021   unsigned LogicOpcode = N->getOpcode();
4022   unsigned HandOpcode = N0.getOpcode();
4023   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4024           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4025   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4026
4027   // Bail early if none of these transforms apply.
4028   if (N0.getNumOperands() == 0)
4029     return SDValue();
4030
4031   // FIXME: We should check number of uses of the operands to not increase
4032   //        the instruction count for all transforms.
4033
4034   // Handle size-changing casts.
4035   SDValue X = N0.getOperand(0);
4036   SDValue Y = N1.getOperand(0);
4037   EVT XVT = X.getValueType();
4038   SDLoc DL(N);
4039   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4040       HandOpcode == ISD::SIGN_EXTEND) {
4041     // If both operands have other uses, this transform would create extra
4042     // instructions without eliminating anything.
4043     if (!N0.hasOneUse() && !N1.hasOneUse())
4044       return SDValue();
4045     // We need matching integer source types.
4046     if (XVT != Y.getValueType())
4047       return SDValue();
4048     // Don't create an illegal op during or after legalization. Don't ever
4049     // create an unsupported vector op.
4050     if ((VT.isVector() || LegalOperations) &&
4051         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4052       return SDValue();
4053     // Avoid infinite looping with PromoteIntBinOp.
4054     // TODO: Should we apply desirable/legal constraints to all opcodes?
4055     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4056         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4057       return SDValue();
4058     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4059     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4060     return DAG.getNode(HandOpcode, DL, VT, Logic);
4061   }
4062
4063   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4064   if (HandOpcode == ISD::TRUNCATE) {
4065     // If both operands have other uses, this transform would create extra
4066     // instructions without eliminating anything.
4067     if (!N0.hasOneUse() && !N1.hasOneUse())
4068       return SDValue();
4069     // We need matching source types.
4070     if (XVT != Y.getValueType())
4071       return SDValue();
4072     // Don't create an illegal op during or after legalization.
4073     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4074       return SDValue();
4075     // Be extra careful sinking truncate. If it's free, there's no benefit in
4076     // widening a binop. Also, don't create a logic op on an illegal type.
4077     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4078       return SDValue();
4079     if (!TLI.isTypeLegal(XVT))
4080       return SDValue();
4081     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4082     return DAG.getNode(HandOpcode, DL, VT, Logic);
4083   }
4084
4085   // For binops SHL/SRL/SRA/AND:
4086   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4087   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4088        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4089       N0.getOperand(1) == N1.getOperand(1)) {
4090     // If either operand has other uses, this transform is not an improvement.
4091     if (!N0.hasOneUse() || !N1.hasOneUse())
4092       return SDValue();
4093     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4094     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4095   }
4096
4097   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4098   if (HandOpcode == ISD::BSWAP) {
4099     // If either operand has other uses, this transform is not an improvement.
4100     if (!N0.hasOneUse() || !N1.hasOneUse())
4101       return SDValue();
4102     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4103     return DAG.getNode(HandOpcode, DL, VT, Logic);
4104   }
4105
4106   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4107   // Only perform this optimization up until type legalization, before
4108   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4109   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4110   // we don't want to undo this promotion.
4111   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4112   // on scalars.
4113   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4114        Level <= AfterLegalizeTypes) {
4115     // Input types must be integer and the same.
4116     if (XVT.isInteger() && XVT == Y.getValueType()) {
4117       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4118       return DAG.getNode(HandOpcode, DL, VT, Logic);
4119     }
4120   }
4121
4122   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4123   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4124   // If both shuffles use the same mask, and both shuffle within a single
4125   // vector, then it is worthwhile to move the swizzle after the operation.
4126   // The type-legalizer generates this pattern when loading illegal
4127   // vector types from memory. In many cases this allows additional shuffle
4128   // optimizations.
4129   // There are other cases where moving the shuffle after the xor/and/or
4130   // is profitable even if shuffles don't perform a swizzle.
4131   // If both shuffles use the same mask, and both shuffles have the same first
4132   // or second operand, then it might still be profitable to move the shuffle
4133   // after the xor/and/or operation.
4134   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4135     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4136     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4137     assert(X.getValueType() == Y.getValueType() &&
4138            "Inputs to shuffles are not the same type");
4139
4140     // Check that both shuffles use the same mask. The masks are known to be of
4141     // the same length because the result vector type is the same.
4142     // Check also that shuffles have only one use to avoid introducing extra
4143     // instructions.
4144     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4145         !SVN0->getMask().equals(SVN1->getMask()))
4146       return SDValue();
4147
4148     // Don't try to fold this node if it requires introducing a
4149     // build vector of all zeros that might be illegal at this stage.
4150     SDValue ShOp = N0.getOperand(1);
4151     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4152       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4153
4154     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4155     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4156       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4157                                   N0.getOperand(0), N1.getOperand(0));
4158       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4159     }
4160
4161     // Don't try to fold this node if it requires introducing a
4162     // build vector of all zeros that might be illegal at this stage.
4163     ShOp = N0.getOperand(0);
4164     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4165       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4166
4167     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4168     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4169       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4170                                   N1.getOperand(1));
4171       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4172     }
4173   }
4174
4175   return SDValue();
4176 }
4177
4178 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4179 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4180                                        const SDLoc &DL) {
4181   SDValue LL, LR, RL, RR, N0CC, N1CC;
4182   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4183       !isSetCCEquivalent(N1, RL, RR, N1CC))
4184     return SDValue();
4185
4186   assert(N0.getValueType() == N1.getValueType() &&
4187          "Unexpected operand types for bitwise logic op");
4188   assert(LL.getValueType() == LR.getValueType() &&
4189          RL.getValueType() == RR.getValueType() &&
4190          "Unexpected operand types for setcc");
4191
4192   // If we're here post-legalization or the logic op type is not i1, the logic
4193   // op type must match a setcc result type. Also, all folds require new
4194   // operations on the left and right operands, so those types must match.
4195   EVT VT = N0.getValueType();
4196   EVT OpVT = LL.getValueType();
4197   if (LegalOperations || VT.getScalarType() != MVT::i1)
4198     if (VT != getSetCCResultType(OpVT))
4199       return SDValue();
4200   if (OpVT != RL.getValueType())
4201     return SDValue();
4202
4203   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4204   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4205   bool IsInteger = OpVT.isInteger();
4206   if (LR == RR && CC0 == CC1 && IsInteger) {
4207     bool IsZero = isNullOrNullSplat(LR);
4208     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4209
4210     // All bits clear?
4211     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4212     // All sign bits clear?
4213     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4214     // Any bits set?
4215     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4216     // Any sign bits set?
4217     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4218
4219     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4220     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4221     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4222     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4223     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4224       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4225       AddToWorklist(Or.getNode());
4226       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4227     }
4228
4229     // All bits set?
4230     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4231     // All sign bits set?
4232     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4233     // Any bits clear?
4234     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4235     // Any sign bits clear?
4236     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4237
4238     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4239     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4240     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4241     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4242     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4243       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4244       AddToWorklist(And.getNode());
4245       return DAG.getSetCC(DL, VT, And, LR, CC1);
4246     }
4247   }
4248
4249   // TODO: What is the 'or' equivalent of this fold?
4250   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4251   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4252       IsInteger && CC0 == ISD::SETNE &&
4253       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4254        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4255     SDValue One = DAG.getConstant(1, DL, OpVT);
4256     SDValue Two = DAG.getConstant(2, DL, OpVT);
4257     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4258     AddToWorklist(Add.getNode());
4259     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4260   }
4261
4262   // Try more general transforms if the predicates match and the only user of
4263   // the compares is the 'and' or 'or'.
4264   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4265       N0.hasOneUse() && N1.hasOneUse()) {
4266     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4267     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4268     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4269       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4270       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4271       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4272       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4273       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4274     }
4275
4276     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4277     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4278       // Match a shared variable operand and 2 non-opaque constant operands.
4279       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4280       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4281       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4282         // Canonicalize larger constant as C0.
4283         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4284           std::swap(C0, C1);
4285
4286         // The difference of the constants must be a single bit.
4287         const APInt &C0Val = C0->getAPIntValue();
4288         const APInt &C1Val = C1->getAPIntValue();
4289         if ((C0Val - C1Val).isPowerOf2()) {
4290           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4291           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4292           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4293           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4294           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4295           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4296           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4297           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4298         }
4299       }
4300     }
4301   }
4302
4303   // Canonicalize equivalent operands to LL == RL.
4304   if (LL == RR && LR == RL) {
4305     CC1 = ISD::getSetCCSwappedOperands(CC1);
4306     std::swap(RL, RR);
4307   }
4308
4309   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4310   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4311   if (LL == RL && LR == RR) {
4312     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4313                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4314     if (NewCC != ISD::SETCC_INVALID &&
4315         (!LegalOperations ||
4316          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4317           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4318       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4319   }
4320
4321   return SDValue();
4322 }
4323
4324 /// This contains all DAGCombine rules which reduce two values combined by
4325 /// an And operation to a single value. This makes them reusable in the context
4326 /// of visitSELECT(). Rules involving constants are not included as
4327 /// visitSELECT() already handles those cases.
4328 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4329   EVT VT = N1.getValueType();
4330   SDLoc DL(N);
4331
4332   // fold (and x, undef) -> 0
4333   if (N0.isUndef() || N1.isUndef())
4334     return DAG.getConstant(0, DL, VT);
4335
4336   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4337     return V;
4338
4339   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4340       VT.getSizeInBits() <= 64) {
4341     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4342       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4343         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4344         // immediate for an add, but it is legal if its top c2 bits are set,
4345         // transform the ADD so the immediate doesn't need to be materialized
4346         // in a register.
4347         APInt ADDC = ADDI->getAPIntValue();
4348         APInt SRLC = SRLI->getAPIntValue();
4349         if (ADDC.getMinSignedBits() <= 64 &&
4350             SRLC.ult(VT.getSizeInBits()) &&
4351             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4352           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4353                                              SRLC.getZExtValue());
4354           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4355             ADDC |= Mask;
4356             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4357               SDLoc DL0(N0);
4358               SDValue NewAdd =
4359                 DAG.getNode(ISD::ADD, DL0, VT,
4360                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4361               CombineTo(N0.getNode(), NewAdd);
4362               // Return N so it doesn't get rechecked!
4363               return SDValue(N, 0);
4364             }
4365           }
4366         }
4367       }
4368     }
4369   }
4370
4371   // Reduce bit extract of low half of an integer to the narrower type.
4372   // (and (srl i64:x, K), KMask) ->
4373   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4374   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4375     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4376       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4377         unsigned Size = VT.getSizeInBits();
4378         const APInt &AndMask = CAnd->getAPIntValue();
4379         unsigned ShiftBits = CShift->getZExtValue();
4380
4381         // Bail out, this node will probably disappear anyway.
4382         if (ShiftBits == 0)
4383           return SDValue();
4384
4385         unsigned MaskBits = AndMask.countTrailingOnes();
4386         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4387
4388         if (AndMask.isMask() &&
4389             // Required bits must not span the two halves of the integer and
4390             // must fit in the half size type.
4391             (ShiftBits + MaskBits <= Size / 2) &&
4392             TLI.isNarrowingProfitable(VT, HalfVT) &&
4393             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4394             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4395             TLI.isTruncateFree(VT, HalfVT) &&
4396             TLI.isZExtFree(HalfVT, VT)) {
4397           // The isNarrowingProfitable is to avoid regressions on PPC and
4398           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4399           // on downstream users of this. Those patterns could probably be
4400           // extended to handle extensions mixed in.
4401
4402           SDValue SL(N0);
4403           assert(MaskBits <= Size);
4404
4405           // Extracting the highest bit of the low half.
4406           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4407           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4408                                       N0.getOperand(0));
4409
4410           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4411           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4412           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4413           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4414           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4415         }
4416       }
4417     }
4418   }
4419
4420   return SDValue();
4421 }
4422
4423 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4424                                    EVT LoadResultTy, EVT &ExtVT) {
4425   if (!AndC->getAPIntValue().isMask())
4426     return false;
4427
4428   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4429
4430   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4431   EVT LoadedVT = LoadN->getMemoryVT();
4432
4433   if (ExtVT == LoadedVT &&
4434       (!LegalOperations ||
4435        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4436     // ZEXTLOAD will match without needing to change the size of the value being
4437     // loaded.
4438     return true;
4439   }
4440
4441   // Do not change the width of a volatile load.
4442   if (LoadN->isVolatile())
4443     return false;
4444
4445   // Do not generate loads of non-round integer types since these can
4446   // be expensive (and would be wrong if the type is not byte sized).
4447   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4448     return false;
4449
4450   if (LegalOperations &&
4451       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4452     return false;
4453
4454   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4455     return false;
4456
4457   return true;
4458 }
4459
4460 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4461                                     ISD::LoadExtType ExtType, EVT &MemVT,
4462                                     unsigned ShAmt) {
4463   if (!LDST)
4464     return false;
4465   // Only allow byte offsets.
4466   if (ShAmt % 8)
4467     return false;
4468
4469   // Do not generate loads of non-round integer types since these can
4470   // be expensive (and would be wrong if the type is not byte sized).
4471   if (!MemVT.isRound())
4472     return false;
4473
4474   // Don't change the width of a volatile load.
4475   if (LDST->isVolatile())
4476     return false;
4477
4478   // Verify that we are actually reducing a load width here.
4479   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4480     return false;
4481
4482   // Ensure that this isn't going to produce an unsupported unaligned access.
4483   if (ShAmt &&
4484       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4485                               LDST->getAddressSpace(), ShAmt / 8))
4486     return false;
4487
4488   // It's not possible to generate a constant of extended or untyped type.
4489   EVT PtrType = LDST->getBasePtr().getValueType();
4490   if (PtrType == MVT::Untyped || PtrType.isExtended())
4491     return false;
4492
4493   if (isa<LoadSDNode>(LDST)) {
4494     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4495     // Don't transform one with multiple uses, this would require adding a new
4496     // load.
4497     if (!SDValue(Load, 0).hasOneUse())
4498       return false;
4499
4500     if (LegalOperations &&
4501         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4502       return false;
4503
4504     // For the transform to be legal, the load must produce only two values
4505     // (the value loaded and the chain).  Don't transform a pre-increment
4506     // load, for example, which produces an extra value.  Otherwise the
4507     // transformation is not equivalent, and the downstream logic to replace
4508     // uses gets things wrong.
4509     if (Load->getNumValues() > 2)
4510       return false;
4511
4512     // If the load that we're shrinking is an extload and we're not just
4513     // discarding the extension we can't simply shrink the load. Bail.
4514     // TODO: It would be possible to merge the extensions in some cases.
4515     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4516         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4517       return false;
4518
4519     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4520       return false;
4521   } else {
4522     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4523     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4524     // Can't write outside the original store
4525     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4526       return false;
4527
4528     if (LegalOperations &&
4529         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4530       return false;
4531   }
4532   return true;
4533 }
4534
4535 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4536                                     SmallVectorImpl<LoadSDNode*> &Loads,
4537                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4538                                     ConstantSDNode *Mask,
4539                                     SDNode *&NodeToMask) {
4540   // Recursively search for the operands, looking for loads which can be
4541   // narrowed.
4542   for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
4543     SDValue Op = N->getOperand(i);
4544
4545     if (Op.getValueType().isVector())
4546       return false;
4547
4548     // Some constants may need fixing up later if they are too large.
4549     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4550       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4551           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4552         NodesWithConsts.insert(N);
4553       continue;
4554     }
4555
4556     if (!Op.hasOneUse())
4557       return false;
4558
4559     switch(Op.getOpcode()) {
4560     case ISD::LOAD: {
4561       auto *Load = cast<LoadSDNode>(Op);
4562       EVT ExtVT;
4563       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4564           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4565
4566         // ZEXTLOAD is already small enough.
4567         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4568             ExtVT.bitsGE(Load->getMemoryVT()))
4569           continue;
4570
4571         // Use LE to convert equal sized loads to zext.
4572         if (ExtVT.bitsLE(Load->getMemoryVT()))
4573           Loads.push_back(Load);
4574
4575         continue;
4576       }
4577       return false;
4578     }
4579     case ISD::ZERO_EXTEND:
4580     case ISD::AssertZext: {
4581       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4582       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4583       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4584         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4585         Op.getOperand(0).getValueType();
4586
4587       // We can accept extending nodes if the mask is wider or an equal
4588       // width to the original type.
4589       if (ExtVT.bitsGE(VT))
4590         continue;
4591       break;
4592     }
4593     case ISD::OR:
4594     case ISD::XOR:
4595     case ISD::AND:
4596       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4597                              NodeToMask))
4598         return false;
4599       continue;
4600     }
4601
4602     // Allow one node which will masked along with any loads found.
4603     if (NodeToMask)
4604       return false;
4605
4606     // Also ensure that the node to be masked only produces one data result.
4607     NodeToMask = Op.getNode();
4608     if (NodeToMask->getNumValues() > 1) {
4609       bool HasValue = false;
4610       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4611         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4612         if (VT != MVT::Glue && VT != MVT::Other) {
4613           if (HasValue) {
4614             NodeToMask = nullptr;
4615             return false;
4616           }
4617           HasValue = true;
4618         }
4619       }
4620       assert(HasValue && "Node to be masked has no data result?");
4621     }
4622   }
4623   return true;
4624 }
4625
4626 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4627   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4628   if (!Mask)
4629     return false;
4630
4631   if (!Mask->getAPIntValue().isMask())
4632     return false;
4633
4634   // No need to do anything if the and directly uses a load.
4635   if (isa<LoadSDNode>(N->getOperand(0)))
4636     return false;
4637
4638   SmallVector<LoadSDNode*, 8> Loads;
4639   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4640   SDNode *FixupNode = nullptr;
4641   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4642     if (Loads.size() == 0)
4643       return false;
4644
4645     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4646     SDValue MaskOp = N->getOperand(1);
4647
4648     // If it exists, fixup the single node we allow in the tree that needs
4649     // masking.
4650     if (FixupNode) {
4651       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4652       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4653                                 FixupNode->getValueType(0),
4654                                 SDValue(FixupNode, 0), MaskOp);
4655       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4656       if (And.getOpcode() == ISD ::AND)
4657         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4658     }
4659
4660     // Narrow any constants that need it.
4661     for (auto *LogicN : NodesWithConsts) {
4662       SDValue Op0 = LogicN->getOperand(0);
4663       SDValue Op1 = LogicN->getOperand(1);
4664
4665       if (isa<ConstantSDNode>(Op0))
4666           std::swap(Op0, Op1);
4667
4668       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4669                                 Op1, MaskOp);
4670
4671       DAG.UpdateNodeOperands(LogicN, Op0, And);
4672     }
4673
4674     // Create narrow loads.
4675     for (auto *Load : Loads) {
4676       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4677       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4678                                 SDValue(Load, 0), MaskOp);
4679       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4680       if (And.getOpcode() == ISD ::AND)
4681         And = SDValue(
4682             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4683       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4684       assert(NewLoad &&
4685              "Shouldn't be masking the load if it can't be narrowed");
4686       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4687     }
4688     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4689     return true;
4690   }
4691   return false;
4692 }
4693
4694 // Unfold
4695 //    x &  (-1 'logical shift' y)
4696 // To
4697 //    (x 'opposite logical shift' y) 'logical shift' y
4698 // if it is better for performance.
4699 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4700   assert(N->getOpcode() == ISD::AND);
4701
4702   SDValue N0 = N->getOperand(0);
4703   SDValue N1 = N->getOperand(1);
4704
4705   // Do we actually prefer shifts over mask?
4706   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
4707     return SDValue();
4708
4709   // Try to match  (-1 '[outer] logical shift' y)
4710   unsigned OuterShift;
4711   unsigned InnerShift; // The opposite direction to the OuterShift.
4712   SDValue Y;           // Shift amount.
4713   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4714     if (!M.hasOneUse())
4715       return false;
4716     OuterShift = M->getOpcode();
4717     if (OuterShift == ISD::SHL)
4718       InnerShift = ISD::SRL;
4719     else if (OuterShift == ISD::SRL)
4720       InnerShift = ISD::SHL;
4721     else
4722       return false;
4723     if (!isAllOnesConstant(M->getOperand(0)))
4724       return false;
4725     Y = M->getOperand(1);
4726     return true;
4727   };
4728
4729   SDValue X;
4730   if (matchMask(N1))
4731     X = N0;
4732   else if (matchMask(N0))
4733     X = N1;
4734   else
4735     return SDValue();
4736
4737   SDLoc DL(N);
4738   EVT VT = N->getValueType(0);
4739
4740   //     tmp = x   'opposite logical shift' y
4741   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4742   //     ret = tmp 'logical shift' y
4743   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4744
4745   return T1;
4746 }
4747
4748 SDValue DAGCombiner::visitAND(SDNode *N) {
4749   SDValue N0 = N->getOperand(0);
4750   SDValue N1 = N->getOperand(1);
4751   EVT VT = N1.getValueType();
4752
4753   // x & x --> x
4754   if (N0 == N1)
4755     return N0;
4756
4757   // fold vector ops
4758   if (VT.isVector()) {
4759     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4760       return FoldedVOp;
4761
4762     // fold (and x, 0) -> 0, vector edition
4763     if (ISD::isBuildVectorAllZeros(N0.getNode()))
4764       // do not return N0, because undef node may exist in N0
4765       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
4766                              SDLoc(N), N0.getValueType());
4767     if (ISD::isBuildVectorAllZeros(N1.getNode()))
4768       // do not return N1, because undef node may exist in N1
4769       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
4770                              SDLoc(N), N1.getValueType());
4771
4772     // fold (and x, -1) -> x, vector edition
4773     if (ISD::isBuildVectorAllOnes(N0.getNode()))
4774       return N1;
4775     if (ISD::isBuildVectorAllOnes(N1.getNode()))
4776       return N0;
4777   }
4778
4779   // fold (and c1, c2) -> c1&c2
4780   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4781   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4782   if (N0C && N1C && !N1C->isOpaque())
4783     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
4784   // canonicalize constant to RHS
4785   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4786       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4787     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
4788   // fold (and x, -1) -> x
4789   if (isAllOnesConstant(N1))
4790     return N0;
4791   // if (and x, c) is known to be zero, return 0
4792   unsigned BitWidth = VT.getScalarSizeInBits();
4793   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
4794                                    APInt::getAllOnesValue(BitWidth)))
4795     return DAG.getConstant(0, SDLoc(N), VT);
4796
4797   if (SDValue NewSel = foldBinOpIntoSelect(N))
4798     return NewSel;
4799
4800   // reassociate and
4801   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
4802     return RAND;
4803
4804   // Try to convert a constant mask AND into a shuffle clear mask.
4805   if (VT.isVector())
4806     if (SDValue Shuffle = XformToShuffleWithZero(N))
4807       return Shuffle;
4808
4809   // fold (and (or x, C), D) -> D if (C & D) == D
4810   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
4811     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
4812   };
4813   if (N0.getOpcode() == ISD::OR &&
4814       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
4815     return N1;
4816   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
4817   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
4818     SDValue N0Op0 = N0.getOperand(0);
4819     APInt Mask = ~N1C->getAPIntValue();
4820     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
4821     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
4822       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
4823                                  N0.getValueType(), N0Op0);
4824
4825       // Replace uses of the AND with uses of the Zero extend node.
4826       CombineTo(N, Zext);
4827
4828       // We actually want to replace all uses of the any_extend with the
4829       // zero_extend, to avoid duplicating things.  This will later cause this
4830       // AND to be folded.
4831       CombineTo(N0.getNode(), Zext);
4832       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4833     }
4834   }
4835   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
4836   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
4837   // already be zero by virtue of the width of the base type of the load.
4838   //
4839   // the 'X' node here can either be nothing or an extract_vector_elt to catch
4840   // more cases.
4841   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
4842        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
4843        N0.getOperand(0).getOpcode() == ISD::LOAD &&
4844        N0.getOperand(0).getResNo() == 0) ||
4845       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
4846     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
4847                                          N0 : N0.getOperand(0) );
4848
4849     // Get the constant (if applicable) the zero'th operand is being ANDed with.
4850     // This can be a pure constant or a vector splat, in which case we treat the
4851     // vector as a scalar and use the splat value.
4852     APInt Constant = APInt::getNullValue(1);
4853     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
4854       Constant = C->getAPIntValue();
4855     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
4856       APInt SplatValue, SplatUndef;
4857       unsigned SplatBitSize;
4858       bool HasAnyUndefs;
4859       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
4860                                              SplatBitSize, HasAnyUndefs);
4861       if (IsSplat) {
4862         // Undef bits can contribute to a possible optimisation if set, so
4863         // set them.
4864         SplatValue |= SplatUndef;
4865
4866         // The splat value may be something like "0x00FFFFFF", which means 0 for
4867         // the first vector value and FF for the rest, repeating. We need a mask
4868         // that will apply equally to all members of the vector, so AND all the
4869         // lanes of the constant together.
4870         EVT VT = Vector->getValueType(0);
4871         unsigned BitWidth = VT.getScalarSizeInBits();
4872
4873         // If the splat value has been compressed to a bitlength lower
4874         // than the size of the vector lane, we need to re-expand it to
4875         // the lane size.
4876         if (BitWidth > SplatBitSize)
4877           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
4878                SplatBitSize < BitWidth;
4879                SplatBitSize = SplatBitSize * 2)
4880             SplatValue |= SplatValue.shl(SplatBitSize);
4881
4882         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
4883         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
4884         if (SplatBitSize % BitWidth == 0) {
4885           Constant = APInt::getAllOnesValue(BitWidth);
4886           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
4887             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
4888         }
4889       }
4890     }
4891
4892     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
4893     // actually legal and isn't going to get expanded, else this is a false
4894     // optimisation.
4895     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
4896                                                     Load->getValueType(0),
4897                                                     Load->getMemoryVT());
4898
4899     // Resize the constant to the same size as the original memory access before
4900     // extension. If it is still the AllOnesValue then this AND is completely
4901     // unneeded.
4902     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
4903
4904     bool B;
4905     switch (Load->getExtensionType()) {
4906     default: B = false; break;
4907     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
4908     case ISD::ZEXTLOAD:
4909     case ISD::NON_EXTLOAD: B = true; break;
4910     }
4911
4912     if (B && Constant.isAllOnesValue()) {
4913       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
4914       // preserve semantics once we get rid of the AND.
4915       SDValue NewLoad(Load, 0);
4916
4917       // Fold the AND away. NewLoad may get replaced immediately.
4918       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
4919
4920       if (Load->getExtensionType() == ISD::EXTLOAD) {
4921         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
4922                               Load->getValueType(0), SDLoc(Load),
4923                               Load->getChain(), Load->getBasePtr(),
4924                               Load->getOffset(), Load->getMemoryVT(),
4925                               Load->getMemOperand());
4926         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
4927         if (Load->getNumValues() == 3) {
4928           // PRE/POST_INC loads have 3 values.
4929           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
4930                            NewLoad.getValue(2) };
4931           CombineTo(Load, To, 3, true);
4932         } else {
4933           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
4934         }
4935       }
4936
4937       return SDValue(N, 0); // Return N so it doesn't get rechecked!
4938     }
4939   }
4940
4941   // fold (and (load x), 255) -> (zextload x, i8)
4942   // fold (and (extload x, i16), 255) -> (zextload x, i8)
4943   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
4944   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
4945                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
4946                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
4947     if (SDValue Res = ReduceLoadWidth(N)) {
4948       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
4949         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
4950       AddToWorklist(N);
4951       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
4952       return SDValue(N, 0);
4953     }
4954   }
4955
4956   if (Level >= AfterLegalizeTypes) {
4957     // Attempt to propagate the AND back up to the leaves which, if they're
4958     // loads, can be combined to narrow loads and the AND node can be removed.
4959     // Perform after legalization so that extend nodes will already be
4960     // combined into the loads.
4961     if (BackwardsPropagateMask(N, DAG)) {
4962       return SDValue(N, 0);
4963     }
4964   }
4965
4966   if (SDValue Combined = visitANDLike(N0, N1, N))
4967     return Combined;
4968
4969   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
4970   if (N0.getOpcode() == N1.getOpcode())
4971     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
4972       return V;
4973
4974   // Masking the negated extension of a boolean is just the zero-extended
4975   // boolean:
4976   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
4977   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
4978   //
4979   // Note: the SimplifyDemandedBits fold below can make an information-losing
4980   // transform, and then we have no way to find this better fold.
4981   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
4982     if (isNullOrNullSplat(N0.getOperand(0))) {
4983       SDValue SubRHS = N0.getOperand(1);
4984       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
4985           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4986         return SubRHS;
4987       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
4988           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
4989         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
4990     }
4991   }
4992
4993   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
4994   // fold (and (sra)) -> (and (srl)) when possible.
4995   if (SimplifyDemandedBits(SDValue(N, 0)))
4996     return SDValue(N, 0);
4997
4998   // fold (zext_inreg (extload x)) -> (zextload x)
4999   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
5000     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5001     EVT MemVT = LN0->getMemoryVT();
5002     // If we zero all the possible extended bits, then we can turn this into
5003     // a zextload if we are running before legalize or the operation is legal.
5004     unsigned BitWidth = N1.getScalarValueSizeInBits();
5005     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
5006                            BitWidth - MemVT.getScalarSizeInBits())) &&
5007         ((!LegalOperations && !LN0->isVolatile()) ||
5008          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5009       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
5010                                        LN0->getChain(), LN0->getBasePtr(),
5011                                        MemVT, LN0->getMemOperand());
5012       AddToWorklist(N);
5013       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5014       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5015     }
5016   }
5017   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5018   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
5019       N0.hasOneUse()) {
5020     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5021     EVT MemVT = LN0->getMemoryVT();
5022     // If we zero all the possible extended bits, then we can turn this into
5023     // a zextload if we are running before legalize or the operation is legal.
5024     unsigned BitWidth = N1.getScalarValueSizeInBits();
5025     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
5026                            BitWidth - MemVT.getScalarSizeInBits())) &&
5027         ((!LegalOperations && !LN0->isVolatile()) ||
5028          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5029       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
5030                                        LN0->getChain(), LN0->getBasePtr(),
5031                                        MemVT, LN0->getMemOperand());
5032       AddToWorklist(N);
5033       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5034       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5035     }
5036   }
5037   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5038   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5039     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5040                                            N0.getOperand(1), false))
5041       return BSwap;
5042   }
5043
5044   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5045     return Shifts;
5046
5047   return SDValue();
5048 }
5049
5050 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5051 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5052                                         bool DemandHighBits) {
5053   if (!LegalOperations)
5054     return SDValue();
5055
5056   EVT VT = N->getValueType(0);
5057   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5058     return SDValue();
5059   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5060     return SDValue();
5061
5062   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5063   bool LookPassAnd0 = false;
5064   bool LookPassAnd1 = false;
5065   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5066       std::swap(N0, N1);
5067   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5068       std::swap(N0, N1);
5069   if (N0.getOpcode() == ISD::AND) {
5070     if (!N0.getNode()->hasOneUse())
5071       return SDValue();
5072     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5073     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5074     // This is needed for X86.
5075     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5076                   N01C->getZExtValue() != 0xFFFF))
5077       return SDValue();
5078     N0 = N0.getOperand(0);
5079     LookPassAnd0 = true;
5080   }
5081
5082   if (N1.getOpcode() == ISD::AND) {
5083     if (!N1.getNode()->hasOneUse())
5084       return SDValue();
5085     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5086     if (!N11C || N11C->getZExtValue() != 0xFF)
5087       return SDValue();
5088     N1 = N1.getOperand(0);
5089     LookPassAnd1 = true;
5090   }
5091
5092   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5093     std::swap(N0, N1);
5094   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5095     return SDValue();
5096   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5097     return SDValue();
5098
5099   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5100   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5101   if (!N01C || !N11C)
5102     return SDValue();
5103   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5104     return SDValue();
5105
5106   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5107   SDValue N00 = N0->getOperand(0);
5108   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5109     if (!N00.getNode()->hasOneUse())
5110       return SDValue();
5111     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5112     if (!N001C || N001C->getZExtValue() != 0xFF)
5113       return SDValue();
5114     N00 = N00.getOperand(0);
5115     LookPassAnd0 = true;
5116   }
5117
5118   SDValue N10 = N1->getOperand(0);
5119   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5120     if (!N10.getNode()->hasOneUse())
5121       return SDValue();
5122     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5123     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5124     // for X86.
5125     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5126                    N101C->getZExtValue() != 0xFFFF))
5127       return SDValue();
5128     N10 = N10.getOperand(0);
5129     LookPassAnd1 = true;
5130   }
5131
5132   if (N00 != N10)
5133     return SDValue();
5134
5135   // Make sure everything beyond the low halfword gets set to zero since the SRL
5136   // 16 will clear the top bits.
5137   unsigned OpSizeInBits = VT.getSizeInBits();
5138   if (DemandHighBits && OpSizeInBits > 16) {
5139     // If the left-shift isn't masked out then the only way this is a bswap is
5140     // if all bits beyond the low 8 are 0. In that case the entire pattern
5141     // reduces to a left shift anyway: leave it for other parts of the combiner.
5142     if (!LookPassAnd0)
5143       return SDValue();
5144
5145     // However, if the right shift isn't masked out then it might be because
5146     // it's not needed. See if we can spot that too.
5147     if (!LookPassAnd1 &&
5148         !DAG.MaskedValueIsZero(
5149             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5150       return SDValue();
5151   }
5152
5153   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5154   if (OpSizeInBits > 16) {
5155     SDLoc DL(N);
5156     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5157                       DAG.getConstant(OpSizeInBits - 16, DL,
5158                                       getShiftAmountTy(VT)));
5159   }
5160   return Res;
5161 }
5162
5163 /// Return true if the specified node is an element that makes up a 32-bit
5164 /// packed halfword byteswap.
5165 /// ((x & 0x000000ff) << 8) |
5166 /// ((x & 0x0000ff00) >> 8) |
5167 /// ((x & 0x00ff0000) << 8) |
5168 /// ((x & 0xff000000) >> 8)
5169 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5170   if (!N.getNode()->hasOneUse())
5171     return false;
5172
5173   unsigned Opc = N.getOpcode();
5174   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5175     return false;
5176
5177   SDValue N0 = N.getOperand(0);
5178   unsigned Opc0 = N0.getOpcode();
5179   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5180     return false;
5181
5182   ConstantSDNode *N1C = nullptr;
5183   // SHL or SRL: look upstream for AND mask operand
5184   if (Opc == ISD::AND)
5185     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5186   else if (Opc0 == ISD::AND)
5187     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5188   if (!N1C)
5189     return false;
5190
5191   unsigned MaskByteOffset;
5192   switch (N1C->getZExtValue()) {
5193   default:
5194     return false;
5195   case 0xFF:       MaskByteOffset = 0; break;
5196   case 0xFF00:     MaskByteOffset = 1; break;
5197   case 0xFFFF:
5198     // In case demanded bits didn't clear the bits that will be shifted out.
5199     // This is needed for X86.
5200     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5201       MaskByteOffset = 1;
5202       break;
5203     }
5204     return false;
5205   case 0xFF0000:   MaskByteOffset = 2; break;
5206   case 0xFF000000: MaskByteOffset = 3; break;
5207   }
5208
5209   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5210   if (Opc == ISD::AND) {
5211     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5212       // (x >> 8) & 0xff
5213       // (x >> 8) & 0xff0000
5214       if (Opc0 != ISD::SRL)
5215         return false;
5216       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5217       if (!C || C->getZExtValue() != 8)
5218         return false;
5219     } else {
5220       // (x << 8) & 0xff00
5221       // (x << 8) & 0xff000000
5222       if (Opc0 != ISD::SHL)
5223         return false;
5224       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5225       if (!C || C->getZExtValue() != 8)
5226         return false;
5227     }
5228   } else if (Opc == ISD::SHL) {
5229     // (x & 0xff) << 8
5230     // (x & 0xff0000) << 8
5231     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5232       return false;
5233     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5234     if (!C || C->getZExtValue() != 8)
5235       return false;
5236   } else { // Opc == ISD::SRL
5237     // (x & 0xff00) >> 8
5238     // (x & 0xff000000) >> 8
5239     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5240       return false;
5241     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5242     if (!C || C->getZExtValue() != 8)
5243       return false;
5244   }
5245
5246   if (Parts[MaskByteOffset])
5247     return false;
5248
5249   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5250   return true;
5251 }
5252
5253 /// Match a 32-bit packed halfword bswap. That is
5254 /// ((x & 0x000000ff) << 8) |
5255 /// ((x & 0x0000ff00) >> 8) |
5256 /// ((x & 0x00ff0000) << 8) |
5257 /// ((x & 0xff000000) >> 8)
5258 /// => (rotl (bswap x), 16)
5259 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5260   if (!LegalOperations)
5261     return SDValue();
5262
5263   EVT VT = N->getValueType(0);
5264   if (VT != MVT::i32)
5265     return SDValue();
5266   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5267     return SDValue();
5268
5269   // Look for either
5270   // (or (or (and), (and)), (or (and), (and)))
5271   // (or (or (or (and), (and)), (and)), (and))
5272   if (N0.getOpcode() != ISD::OR)
5273     return SDValue();
5274   SDValue N00 = N0.getOperand(0);
5275   SDValue N01 = N0.getOperand(1);
5276   SDNode *Parts[4] = {};
5277
5278   if (N1.getOpcode() == ISD::OR &&
5279       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5280     // (or (or (and), (and)), (or (and), (and)))
5281     if (!isBSwapHWordElement(N00, Parts))
5282       return SDValue();
5283
5284     if (!isBSwapHWordElement(N01, Parts))
5285       return SDValue();
5286     SDValue N10 = N1.getOperand(0);
5287     if (!isBSwapHWordElement(N10, Parts))
5288       return SDValue();
5289     SDValue N11 = N1.getOperand(1);
5290     if (!isBSwapHWordElement(N11, Parts))
5291       return SDValue();
5292   } else {
5293     // (or (or (or (and), (and)), (and)), (and))
5294     if (!isBSwapHWordElement(N1, Parts))
5295       return SDValue();
5296     if (!isBSwapHWordElement(N01, Parts))
5297       return SDValue();
5298     if (N00.getOpcode() != ISD::OR)
5299       return SDValue();
5300     SDValue N000 = N00.getOperand(0);
5301     if (!isBSwapHWordElement(N000, Parts))
5302       return SDValue();
5303     SDValue N001 = N00.getOperand(1);
5304     if (!isBSwapHWordElement(N001, Parts))
5305       return SDValue();
5306   }
5307
5308   // Make sure the parts are all coming from the same node.
5309   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5310     return SDValue();
5311
5312   SDLoc DL(N);
5313   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5314                               SDValue(Parts[0], 0));
5315
5316   // Result of the bswap should be rotated by 16. If it's not legal, then
5317   // do  (x << 16) | (x >> 16).
5318   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5319   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5320     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5321   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5322     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5323   return DAG.getNode(ISD::OR, DL, VT,
5324                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5325                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5326 }
5327
5328 /// This contains all DAGCombine rules which reduce two values combined by
5329 /// an Or operation to a single value \see visitANDLike().
5330 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5331   EVT VT = N1.getValueType();
5332   SDLoc DL(N);
5333
5334   // fold (or x, undef) -> -1
5335   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5336     return DAG.getAllOnesConstant(DL, VT);
5337
5338   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5339     return V;
5340
5341   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5342   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5343       // Don't increase # computations.
5344       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5345     // We can only do this xform if we know that bits from X that are set in C2
5346     // but not in C1 are already zero.  Likewise for Y.
5347     if (const ConstantSDNode *N0O1C =
5348         getAsNonOpaqueConstant(N0.getOperand(1))) {
5349       if (const ConstantSDNode *N1O1C =
5350           getAsNonOpaqueConstant(N1.getOperand(1))) {
5351         // We can only do this xform if we know that bits from X that are set in
5352         // C2 but not in C1 are already zero.  Likewise for Y.
5353         const APInt &LHSMask = N0O1C->getAPIntValue();
5354         const APInt &RHSMask = N1O1C->getAPIntValue();
5355
5356         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5357             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5358           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5359                                   N0.getOperand(0), N1.getOperand(0));
5360           return DAG.getNode(ISD::AND, DL, VT, X,
5361                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5362         }
5363       }
5364     }
5365   }
5366
5367   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5368   if (N0.getOpcode() == ISD::AND &&
5369       N1.getOpcode() == ISD::AND &&
5370       N0.getOperand(0) == N1.getOperand(0) &&
5371       // Don't increase # computations.
5372       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5373     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5374                             N0.getOperand(1), N1.getOperand(1));
5375     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5376   }
5377
5378   return SDValue();
5379 }
5380
5381 /// OR combines for which the commuted variant will be tried as well.
5382 static SDValue visitORCommutative(
5383     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5384   EVT VT = N0.getValueType();
5385   if (N0.getOpcode() == ISD::AND) {
5386     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5387     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5388       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5389
5390     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5391     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5392       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5393   }
5394
5395   return SDValue();
5396 }
5397
5398 SDValue DAGCombiner::visitOR(SDNode *N) {
5399   SDValue N0 = N->getOperand(0);
5400   SDValue N1 = N->getOperand(1);
5401   EVT VT = N1.getValueType();
5402
5403   // x | x --> x
5404   if (N0 == N1)
5405     return N0;
5406
5407   // fold vector ops
5408   if (VT.isVector()) {
5409     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5410       return FoldedVOp;
5411
5412     // fold (or x, 0) -> x, vector edition
5413     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5414       return N1;
5415     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5416       return N0;
5417
5418     // fold (or x, -1) -> -1, vector edition
5419     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5420       // do not return N0, because undef node may exist in N0
5421       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5422     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5423       // do not return N1, because undef node may exist in N1
5424       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5425
5426     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5427     // Do this only if the resulting shuffle is legal.
5428     if (isa<ShuffleVectorSDNode>(N0) &&
5429         isa<ShuffleVectorSDNode>(N1) &&
5430         // Avoid folding a node with illegal type.
5431         TLI.isTypeLegal(VT)) {
5432       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5433       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5434       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5435       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5436       // Ensure both shuffles have a zero input.
5437       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5438         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5439         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5440         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5441         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5442         bool CanFold = true;
5443         int NumElts = VT.getVectorNumElements();
5444         SmallVector<int, 4> Mask(NumElts);
5445
5446         for (int i = 0; i != NumElts; ++i) {
5447           int M0 = SV0->getMaskElt(i);
5448           int M1 = SV1->getMaskElt(i);
5449
5450           // Determine if either index is pointing to a zero vector.
5451           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5452           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5453
5454           // If one element is zero and the otherside is undef, keep undef.
5455           // This also handles the case that both are undef.
5456           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5457             Mask[i] = -1;
5458             continue;
5459           }
5460
5461           // Make sure only one of the elements is zero.
5462           if (M0Zero == M1Zero) {
5463             CanFold = false;
5464             break;
5465           }
5466
5467           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5468
5469           // We have a zero and non-zero element. If the non-zero came from
5470           // SV0 make the index a LHS index. If it came from SV1, make it
5471           // a RHS index. We need to mod by NumElts because we don't care
5472           // which operand it came from in the original shuffles.
5473           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5474         }
5475
5476         if (CanFold) {
5477           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5478           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5479
5480           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5481           if (!LegalMask) {
5482             std::swap(NewLHS, NewRHS);
5483             ShuffleVectorSDNode::commuteMask(Mask);
5484             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
5485           }
5486
5487           if (LegalMask)
5488             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
5489         }
5490       }
5491     }
5492   }
5493
5494   // fold (or c1, c2) -> c1|c2
5495   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5496   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5497   if (N0C && N1C && !N1C->isOpaque())
5498     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5499   // canonicalize constant to RHS
5500   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5501      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5502     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5503   // fold (or x, 0) -> x
5504   if (isNullConstant(N1))
5505     return N0;
5506   // fold (or x, -1) -> -1
5507   if (isAllOnesConstant(N1))
5508     return N1;
5509
5510   if (SDValue NewSel = foldBinOpIntoSelect(N))
5511     return NewSel;
5512
5513   // fold (or x, c) -> c iff (x & ~c) == 0
5514   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5515     return N1;
5516
5517   if (SDValue Combined = visitORLike(N0, N1, N))
5518     return Combined;
5519
5520   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5521   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5522     return BSwap;
5523   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5524     return BSwap;
5525
5526   // reassociate or
5527   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5528     return ROR;
5529
5530   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5531   // iff (c1 & c2) != 0 or c1/c2 are undef.
5532   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5533     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5534   };
5535   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5536       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5537     if (SDValue COR = DAG.FoldConstantArithmetic(
5538             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5539       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5540       AddToWorklist(IOR.getNode());
5541       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5542     }
5543   }
5544
5545   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
5546     return Combined;
5547   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
5548     return Combined;
5549
5550   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5551   if (N0.getOpcode() == N1.getOpcode())
5552     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5553       return V;
5554
5555   // See if this is some rotate idiom.
5556   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
5557     return SDValue(Rot, 0);
5558
5559   if (SDValue Load = MatchLoadCombine(N))
5560     return Load;
5561
5562   // Simplify the operands using demanded-bits information.
5563   if (SimplifyDemandedBits(SDValue(N, 0)))
5564     return SDValue(N, 0);
5565
5566   // If OR can be rewritten into ADD, try combines based on ADD.
5567   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
5568       DAG.haveNoCommonBitsSet(N0, N1))
5569     if (SDValue Combined = visitADDLike(N))
5570       return Combined;
5571
5572   return SDValue();
5573 }
5574
5575 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5576   if (Op.getOpcode() == ISD::AND &&
5577       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5578     Mask = Op.getOperand(1);
5579     return Op.getOperand(0);
5580   }
5581   return Op;
5582 }
5583
5584 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5585 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5586                             SDValue &Mask) {
5587   Op = stripConstantMask(DAG, Op, Mask);
5588   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5589     Shift = Op;
5590     return true;
5591   }
5592   return false;
5593 }
5594
5595 /// Helper function for visitOR to extract the needed side of a rotate idiom
5596 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5597 /// InstCombine merged some outside op with one of the shifts from
5598 /// the rotate pattern.
5599 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5600 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5601 /// patterns:
5602 ///
5603 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5604 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5605 ///
5606 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5607 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5608 ///
5609 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5610 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5611 ///
5612 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5613 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5614 ///
5615 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5616 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5617                                      SDValue ExtractFrom, SDValue &Mask,
5618                                      const SDLoc &DL) {
5619   assert(OppShift && ExtractFrom && "Empty SDValue");
5620   assert(
5621       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5622       "Existing shift must be valid as a rotate half");
5623
5624   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5625   // Preconditions:
5626   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5627   //
5628   // Find opcode of the needed shift to be extracted from (op0 v c0).
5629   unsigned Opcode = ISD::DELETED_NODE;
5630   bool IsMulOrDiv = false;
5631   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5632   // opcode or its arithmetic (mul or udiv) variant.
5633   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5634     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5635     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5636       return false;
5637     Opcode = NeededShift;
5638     return true;
5639   };
5640   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5641   // that the needed shift can be extracted from.
5642   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5643       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5644     return SDValue();
5645
5646   // op0 must be the same opcode on both sides, have the same LHS argument,
5647   // and produce the same value type.
5648   SDValue OppShiftLHS = OppShift.getOperand(0);
5649   EVT ShiftedVT = OppShiftLHS.getValueType();
5650   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5651       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5652       ShiftedVT != ExtractFrom.getValueType())
5653     return SDValue();
5654
5655   // Amount of the existing shift.
5656   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5657   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5658   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5659   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5660   ConstantSDNode *ExtractFromCst =
5661       isConstOrConstSplat(ExtractFrom.getOperand(1));
5662   // TODO: We should be able to handle non-uniform constant vectors for these values
5663   // Check that we have constant values.
5664   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5665       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5666       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5667     return SDValue();
5668
5669   // Compute the shift amount we need to extract to complete the rotate.
5670   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5671   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5672     return SDValue();
5673   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5674   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5675   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5676   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5677   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5678
5679   // Now try extract the needed shift from the ExtractFrom op and see if the
5680   // result matches up with the existing shift's LHS op.
5681   if (IsMulOrDiv) {
5682     // Op to extract from is a mul or udiv by a constant.
5683     // Check:
5684     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5685     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5686     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5687                                                  NeededShiftAmt.getZExtValue());
5688     APInt ResultAmt;
5689     APInt Rem;
5690     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5691     if (Rem != 0 || ResultAmt != OppLHSAmt)
5692       return SDValue();
5693   } else {
5694     // Op to extract from is a shift by a constant.
5695     // Check:
5696     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5697     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5698                                           ExtractFromAmt.getBitWidth()))
5699       return SDValue();
5700   }
5701
5702   // Return the expanded shift op that should allow a rotate to be formed.
5703   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5704   EVT ResVT = ExtractFrom.getValueType();
5705   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5706   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5707 }
5708
5709 // Return true if we can prove that, whenever Neg and Pos are both in the
5710 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5711 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5712 //
5713 //     (or (shift1 X, Neg), (shift2 X, Pos))
5714 //
5715 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
5716 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
5717 // to consider shift amounts with defined behavior.
5718 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
5719                            SelectionDAG &DAG) {
5720   // If EltSize is a power of 2 then:
5721   //
5722   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
5723   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
5724   //
5725   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
5726   // for the stronger condition:
5727   //
5728   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
5729   //
5730   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
5731   // we can just replace Neg with Neg' for the rest of the function.
5732   //
5733   // In other cases we check for the even stronger condition:
5734   //
5735   //     Neg == EltSize - Pos                                    [B]
5736   //
5737   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
5738   // behavior if Pos == 0 (and consequently Neg == EltSize).
5739   //
5740   // We could actually use [A] whenever EltSize is a power of 2, but the
5741   // only extra cases that it would match are those uninteresting ones
5742   // where Neg and Pos are never in range at the same time.  E.g. for
5743   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
5744   // as well as (sub 32, Pos), but:
5745   //
5746   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
5747   //
5748   // always invokes undefined behavior for 32-bit X.
5749   //
5750   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
5751   unsigned MaskLoBits = 0;
5752   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
5753     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
5754       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
5755       unsigned Bits = Log2_64(EltSize);
5756       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
5757           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
5758         Neg = Neg.getOperand(0);
5759         MaskLoBits = Bits;
5760       }
5761     }
5762   }
5763
5764   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
5765   if (Neg.getOpcode() != ISD::SUB)
5766     return false;
5767   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
5768   if (!NegC)
5769     return false;
5770   SDValue NegOp1 = Neg.getOperand(1);
5771
5772   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
5773   // Pos'.  The truncation is redundant for the purpose of the equality.
5774   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
5775     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
5776       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
5777       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
5778           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
5779            MaskLoBits))
5780         Pos = Pos.getOperand(0);
5781     }
5782   }
5783
5784   // The condition we need is now:
5785   //
5786   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
5787   //
5788   // If NegOp1 == Pos then we need:
5789   //
5790   //              EltSize & Mask == NegC & Mask
5791   //
5792   // (because "x & Mask" is a truncation and distributes through subtraction).
5793   APInt Width;
5794   if (Pos == NegOp1)
5795     Width = NegC->getAPIntValue();
5796
5797   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
5798   // Then the condition we want to prove becomes:
5799   //
5800   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
5801   //
5802   // which, again because "x & Mask" is a truncation, becomes:
5803   //
5804   //                NegC & Mask == (EltSize - PosC) & Mask
5805   //             EltSize & Mask == (NegC + PosC) & Mask
5806   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
5807     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
5808       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
5809     else
5810       return false;
5811   } else
5812     return false;
5813
5814   // Now we just need to check that EltSize & Mask == Width & Mask.
5815   if (MaskLoBits)
5816     // EltSize & Mask is 0 since Mask is EltSize - 1.
5817     return Width.getLoBits(MaskLoBits) == 0;
5818   return Width == EltSize;
5819 }
5820
5821 // A subroutine of MatchRotate used once we have found an OR of two opposite
5822 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
5823 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
5824 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
5825 // Neg with outer conversions stripped away.
5826 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
5827                                        SDValue Neg, SDValue InnerPos,
5828                                        SDValue InnerNeg, unsigned PosOpcode,
5829                                        unsigned NegOpcode, const SDLoc &DL) {
5830   // fold (or (shl x, (*ext y)),
5831   //          (srl x, (*ext (sub 32, y)))) ->
5832   //   (rotl x, y) or (rotr x, (sub 32, y))
5833   //
5834   // fold (or (shl x, (*ext (sub 32, y))),
5835   //          (srl x, (*ext y))) ->
5836   //   (rotr x, y) or (rotl x, (sub 32, y))
5837   EVT VT = Shifted.getValueType();
5838   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
5839     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
5840     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
5841                        HasPos ? Pos : Neg).getNode();
5842   }
5843
5844   return nullptr;
5845 }
5846
5847 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
5848 // idioms for rotate, and if the target supports rotation instructions, generate
5849 // a rot[lr].
5850 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
5851   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
5852   EVT VT = LHS.getValueType();
5853   if (!TLI.isTypeLegal(VT)) return nullptr;
5854
5855   // The target must have at least one rotate flavor.
5856   bool HasROTL = hasOperation(ISD::ROTL, VT);
5857   bool HasROTR = hasOperation(ISD::ROTR, VT);
5858   if (!HasROTL && !HasROTR) return nullptr;
5859
5860   // Check for truncated rotate.
5861   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
5862       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
5863     assert(LHS.getValueType() == RHS.getValueType());
5864     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
5865       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
5866                          SDValue(Rot, 0)).getNode();
5867     }
5868   }
5869
5870   // Match "(X shl/srl V1) & V2" where V2 may not be present.
5871   SDValue LHSShift;   // The shift.
5872   SDValue LHSMask;    // AND value if any.
5873   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
5874
5875   SDValue RHSShift;   // The shift.
5876   SDValue RHSMask;    // AND value if any.
5877   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
5878
5879   // If neither side matched a rotate half, bail
5880   if (!LHSShift && !RHSShift)
5881     return nullptr;
5882
5883   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
5884   // side of the rotate, so try to handle that here. In all cases we need to
5885   // pass the matched shift from the opposite side to compute the opcode and
5886   // needed shift amount to extract.  We still want to do this if both sides
5887   // matched a rotate half because one half may be a potential overshift that
5888   // can be broken down (ie if InstCombine merged two shl or srl ops into a
5889   // single one).
5890
5891   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
5892   if (LHSShift)
5893     if (SDValue NewRHSShift =
5894             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
5895       RHSShift = NewRHSShift;
5896   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
5897   if (RHSShift)
5898     if (SDValue NewLHSShift =
5899             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
5900       LHSShift = NewLHSShift;
5901
5902   // If a side is still missing, nothing else we can do.
5903   if (!RHSShift || !LHSShift)
5904     return nullptr;
5905
5906   // At this point we've matched or extracted a shift op on each side.
5907
5908   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
5909     return nullptr;   // Not shifting the same value.
5910
5911   if (LHSShift.getOpcode() == RHSShift.getOpcode())
5912     return nullptr;   // Shifts must disagree.
5913
5914   // Canonicalize shl to left side in a shl/srl pair.
5915   if (RHSShift.getOpcode() == ISD::SHL) {
5916     std::swap(LHS, RHS);
5917     std::swap(LHSShift, RHSShift);
5918     std::swap(LHSMask, RHSMask);
5919   }
5920
5921   unsigned EltSizeInBits = VT.getScalarSizeInBits();
5922   SDValue LHSShiftArg = LHSShift.getOperand(0);
5923   SDValue LHSShiftAmt = LHSShift.getOperand(1);
5924   SDValue RHSShiftArg = RHSShift.getOperand(0);
5925   SDValue RHSShiftAmt = RHSShift.getOperand(1);
5926
5927   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
5928   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
5929   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
5930                                         ConstantSDNode *RHS) {
5931     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
5932   };
5933   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
5934     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
5935                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
5936
5937     // If there is an AND of either shifted operand, apply it to the result.
5938     if (LHSMask.getNode() || RHSMask.getNode()) {
5939       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
5940       SDValue Mask = AllOnes;
5941
5942       if (LHSMask.getNode()) {
5943         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
5944         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5945                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
5946       }
5947       if (RHSMask.getNode()) {
5948         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
5949         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
5950                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
5951       }
5952
5953       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
5954     }
5955
5956     return Rot.getNode();
5957   }
5958
5959   // If there is a mask here, and we have a variable shift, we can't be sure
5960   // that we're masking out the right stuff.
5961   if (LHSMask.getNode() || RHSMask.getNode())
5962     return nullptr;
5963
5964   // If the shift amount is sign/zext/any-extended just peel it off.
5965   SDValue LExtOp0 = LHSShiftAmt;
5966   SDValue RExtOp0 = RHSShiftAmt;
5967   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5968        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5969        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5970        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
5971       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
5972        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
5973        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
5974        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
5975     LExtOp0 = LHSShiftAmt.getOperand(0);
5976     RExtOp0 = RHSShiftAmt.getOperand(0);
5977   }
5978
5979   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
5980                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
5981   if (TryL)
5982     return TryL;
5983
5984   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
5985                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
5986   if (TryR)
5987     return TryR;
5988
5989   return nullptr;
5990 }
5991
5992 namespace {
5993
5994 /// Represents known origin of an individual byte in load combine pattern. The
5995 /// value of the byte is either constant zero or comes from memory.
5996 struct ByteProvider {
5997   // For constant zero providers Load is set to nullptr. For memory providers
5998   // Load represents the node which loads the byte from memory.
5999   // ByteOffset is the offset of the byte in the value produced by the load.
6000   LoadSDNode *Load = nullptr;
6001   unsigned ByteOffset = 0;
6002
6003   ByteProvider() = default;
6004
6005   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6006     return ByteProvider(Load, ByteOffset);
6007   }
6008
6009   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6010
6011   bool isConstantZero() const { return !Load; }
6012   bool isMemory() const { return Load; }
6013
6014   bool operator==(const ByteProvider &Other) const {
6015     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6016   }
6017
6018 private:
6019   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6020       : Load(Load), ByteOffset(ByteOffset) {}
6021 };
6022
6023 } // end anonymous namespace
6024
6025 /// Recursively traverses the expression calculating the origin of the requested
6026 /// byte of the given value. Returns None if the provider can't be calculated.
6027 ///
6028 /// For all the values except the root of the expression verifies that the value
6029 /// has exactly one use and if it's not true return None. This way if the origin
6030 /// of the byte is returned it's guaranteed that the values which contribute to
6031 /// the byte are not used outside of this expression.
6032 ///
6033 /// Because the parts of the expression are not allowed to have more than one
6034 /// use this function iterates over trees, not DAGs. So it never visits the same
6035 /// node more than once.
6036 static const Optional<ByteProvider>
6037 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6038                       bool Root = false) {
6039   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6040   if (Depth == 10)
6041     return None;
6042
6043   if (!Root && !Op.hasOneUse())
6044     return None;
6045
6046   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6047   unsigned BitWidth = Op.getValueSizeInBits();
6048   if (BitWidth % 8 != 0)
6049     return None;
6050   unsigned ByteWidth = BitWidth / 8;
6051   assert(Index < ByteWidth && "invalid index requested");
6052   (void) ByteWidth;
6053
6054   switch (Op.getOpcode()) {
6055   case ISD::OR: {
6056     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6057     if (!LHS)
6058       return None;
6059     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6060     if (!RHS)
6061       return None;
6062
6063     if (LHS->isConstantZero())
6064       return RHS;
6065     if (RHS->isConstantZero())
6066       return LHS;
6067     return None;
6068   }
6069   case ISD::SHL: {
6070     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6071     if (!ShiftOp)
6072       return None;
6073
6074     uint64_t BitShift = ShiftOp->getZExtValue();
6075     if (BitShift % 8 != 0)
6076       return None;
6077     uint64_t ByteShift = BitShift / 8;
6078
6079     return Index < ByteShift
6080                ? ByteProvider::getConstantZero()
6081                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6082                                        Depth + 1);
6083   }
6084   case ISD::ANY_EXTEND:
6085   case ISD::SIGN_EXTEND:
6086   case ISD::ZERO_EXTEND: {
6087     SDValue NarrowOp = Op->getOperand(0);
6088     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6089     if (NarrowBitWidth % 8 != 0)
6090       return None;
6091     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6092
6093     if (Index >= NarrowByteWidth)
6094       return Op.getOpcode() == ISD::ZERO_EXTEND
6095                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6096                  : None;
6097     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6098   }
6099   case ISD::BSWAP:
6100     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6101                                  Depth + 1);
6102   case ISD::LOAD: {
6103     auto L = cast<LoadSDNode>(Op.getNode());
6104     if (L->isVolatile() || L->isIndexed())
6105       return None;
6106
6107     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6108     if (NarrowBitWidth % 8 != 0)
6109       return None;
6110     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6111
6112     if (Index >= NarrowByteWidth)
6113       return L->getExtensionType() == ISD::ZEXTLOAD
6114                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6115                  : None;
6116     return ByteProvider::getMemory(L, Index);
6117   }
6118   }
6119
6120   return None;
6121 }
6122
6123 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6124   return i;
6125 }
6126
6127 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6128   return BW - i - 1;
6129 }
6130
6131 // Check if the bytes offsets we are looking at match with either big or
6132 // little endian value loaded. Return true for big endian, false for little
6133 // endian, and None if match failed.
6134 static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
6135                                   int64_t FirstOffset) {
6136   // The endian can be decided only when it is 2 bytes at least.
6137   unsigned Width = ByteOffsets.size();
6138   if (Width < 2)
6139     return None;
6140
6141   bool BigEndian = true, LittleEndian = true;
6142   for (unsigned i = 0; i < Width; i++) {
6143     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6144     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6145     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6146     if (!BigEndian && !LittleEndian)
6147       return None;
6148   }
6149
6150   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6151                                         "little endian");
6152   return BigEndian;
6153 }
6154
6155 /// Match a pattern where a wide type scalar value is loaded by several narrow
6156 /// loads and combined by shifts and ors. Fold it into a single load or a load
6157 /// and a BSWAP if the targets supports it.
6158 ///
6159 /// Assuming little endian target:
6160 ///  i8 *a = ...
6161 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6162 /// =>
6163 ///  i32 val = *((i32)a)
6164 ///
6165 ///  i8 *a = ...
6166 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6167 /// =>
6168 ///  i32 val = BSWAP(*((i32)a))
6169 ///
6170 /// TODO: This rule matches complex patterns with OR node roots and doesn't
6171 /// interact well with the worklist mechanism. When a part of the pattern is
6172 /// updated (e.g. one of the loads) its direct users are put into the worklist,
6173 /// but the root node of the pattern which triggers the load combine is not
6174 /// necessarily a direct user of the changed node. For example, once the address
6175 /// of t28 load is reassociated load combine won't be triggered:
6176 ///             t25: i32 = add t4, Constant:i32<2>
6177 ///           t26: i64 = sign_extend t25
6178 ///        t27: i64 = add t2, t26
6179 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6180 ///     t29: i32 = zero_extend t28
6181 ///   t32: i32 = shl t29, Constant:i8<8>
6182 /// t33: i32 = or t23, t32
6183 /// As a possible fix visitLoad can check if the load can be a part of a load
6184 /// combine pattern and add corresponding OR roots to the worklist.
6185 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6186   assert(N->getOpcode() == ISD::OR &&
6187          "Can only match load combining against OR nodes");
6188
6189   // Handles simple types only
6190   EVT VT = N->getValueType(0);
6191   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6192     return SDValue();
6193   unsigned ByteWidth = VT.getSizeInBits() / 8;
6194
6195   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6196   // Before legalize we can introduce too wide illegal loads which will be later
6197   // split into legal sized loads. This enables us to combine i64 load by i8
6198   // patterns to a couple of i32 loads on 32 bit targets.
6199   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
6200     return SDValue();
6201
6202   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6203   auto MemoryByteOffset = [&] (ByteProvider P) {
6204     assert(P.isMemory() && "Must be a memory byte provider");
6205     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6206     assert(LoadBitWidth % 8 == 0 &&
6207            "can only analyze providers for individual bytes not bit");
6208     unsigned LoadByteWidth = LoadBitWidth / 8;
6209     return IsBigEndianTarget
6210             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6211             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6212   };
6213
6214   Optional<BaseIndexOffset> Base;
6215   SDValue Chain;
6216
6217   SmallPtrSet<LoadSDNode *, 8> Loads;
6218   Optional<ByteProvider> FirstByteProvider;
6219   int64_t FirstOffset = INT64_MAX;
6220
6221   // Check if all the bytes of the OR we are looking at are loaded from the same
6222   // base address. Collect bytes offsets from Base address in ByteOffsets.
6223   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6224   for (unsigned i = 0; i < ByteWidth; i++) {
6225     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6226     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6227       return SDValue();
6228
6229     LoadSDNode *L = P->Load;
6230     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
6231            "Must be enforced by calculateByteProvider");
6232     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6233
6234     // All loads must share the same chain
6235     SDValue LChain = L->getChain();
6236     if (!Chain)
6237       Chain = LChain;
6238     else if (Chain != LChain)
6239       return SDValue();
6240
6241     // Loads must share the same base address
6242     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6243     int64_t ByteOffsetFromBase = 0;
6244     if (!Base)
6245       Base = Ptr;
6246     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6247       return SDValue();
6248
6249     // Calculate the offset of the current byte from the base address
6250     ByteOffsetFromBase += MemoryByteOffset(*P);
6251     ByteOffsets[i] = ByteOffsetFromBase;
6252
6253     // Remember the first byte load
6254     if (ByteOffsetFromBase < FirstOffset) {
6255       FirstByteProvider = P;
6256       FirstOffset = ByteOffsetFromBase;
6257     }
6258
6259     Loads.insert(L);
6260   }
6261   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6262          "memory, so there must be at least one load which produces the value");
6263   assert(Base && "Base address of the accessed memory location must be set");
6264   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6265
6266   // Check if the bytes of the OR we are looking at match with either big or
6267   // little endian value load
6268   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6269   if (!IsBigEndian.hasValue())
6270     return SDValue();
6271
6272   assert(FirstByteProvider && "must be set");
6273
6274   // Ensure that the first byte is loaded from zero offset of the first load.
6275   // So the combined value can be loaded from the first load address.
6276   if (MemoryByteOffset(*FirstByteProvider) != 0)
6277     return SDValue();
6278   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6279
6280   // The node we are looking at matches with the pattern, check if we can
6281   // replace it with a single load and bswap if needed.
6282
6283   // If the load needs byte swap check if the target supports it
6284   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6285
6286   // Before legalize we can introduce illegal bswaps which will be later
6287   // converted to an explicit bswap sequence. This way we end up with a single
6288   // load and byte shuffling instead of several loads and byte shuffling.
6289   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6290     return SDValue();
6291
6292   // Check that a load of the wide type is both allowed and fast on the target
6293   bool Fast = false;
6294   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6295                                         VT, FirstLoad->getAddressSpace(),
6296                                         FirstLoad->getAlignment(), &Fast);
6297   if (!Allowed || !Fast)
6298     return SDValue();
6299
6300   SDValue NewLoad =
6301       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6302                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6303
6304   // Transfer chain users from old loads to the new load.
6305   for (LoadSDNode *L : Loads)
6306     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6307
6308   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6309 }
6310
6311 // If the target has andn, bsl, or a similar bit-select instruction,
6312 // we want to unfold masked merge, with canonical pattern of:
6313 //   |        A  |  |B|
6314 //   ((x ^ y) & m) ^ y
6315 //    |  D  |
6316 // Into:
6317 //   (x & m) | (y & ~m)
6318 // If y is a constant, and the 'andn' does not work with immediates,
6319 // we unfold into a different pattern:
6320 //   ~(~x & m) & (m | y)
6321 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6322 //       the very least that breaks andnpd / andnps patterns, and because those
6323 //       patterns are simplified in IR and shouldn't be created in the DAG
6324 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6325   assert(N->getOpcode() == ISD::XOR);
6326
6327   // Don't touch 'not' (i.e. where y = -1).
6328   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6329     return SDValue();
6330
6331   EVT VT = N->getValueType(0);
6332
6333   // There are 3 commutable operators in the pattern,
6334   // so we have to deal with 8 possible variants of the basic pattern.
6335   SDValue X, Y, M;
6336   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6337     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6338       return false;
6339     SDValue Xor = And.getOperand(XorIdx);
6340     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6341       return false;
6342     SDValue Xor0 = Xor.getOperand(0);
6343     SDValue Xor1 = Xor.getOperand(1);
6344     // Don't touch 'not' (i.e. where y = -1).
6345     if (isAllOnesOrAllOnesSplat(Xor1))
6346       return false;
6347     if (Other == Xor0)
6348       std::swap(Xor0, Xor1);
6349     if (Other != Xor1)
6350       return false;
6351     X = Xor0;
6352     Y = Xor1;
6353     M = And.getOperand(XorIdx ? 0 : 1);
6354     return true;
6355   };
6356
6357   SDValue N0 = N->getOperand(0);
6358   SDValue N1 = N->getOperand(1);
6359   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6360       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6361     return SDValue();
6362
6363   // Don't do anything if the mask is constant. This should not be reachable.
6364   // InstCombine should have already unfolded this pattern, and DAGCombiner
6365   // probably shouldn't produce it, too.
6366   if (isa<ConstantSDNode>(M.getNode()))
6367     return SDValue();
6368
6369   // We can transform if the target has AndNot
6370   if (!TLI.hasAndNot(M))
6371     return SDValue();
6372
6373   SDLoc DL(N);
6374
6375   // If Y is a constant, check that 'andn' works with immediates.
6376   if (!TLI.hasAndNot(Y)) {
6377     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6378     // If not, we need to do a bit more work to make sure andn is still used.
6379     SDValue NotX = DAG.getNOT(DL, X, VT);
6380     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6381     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6382     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6383     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6384   }
6385
6386   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6387   SDValue NotM = DAG.getNOT(DL, M, VT);
6388   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6389
6390   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6391 }
6392
6393 SDValue DAGCombiner::visitXOR(SDNode *N) {
6394   SDValue N0 = N->getOperand(0);
6395   SDValue N1 = N->getOperand(1);
6396   EVT VT = N0.getValueType();
6397
6398   // fold vector ops
6399   if (VT.isVector()) {
6400     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6401       return FoldedVOp;
6402
6403     // fold (xor x, 0) -> x, vector edition
6404     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6405       return N1;
6406     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6407       return N0;
6408   }
6409
6410   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6411   SDLoc DL(N);
6412   if (N0.isUndef() && N1.isUndef())
6413     return DAG.getConstant(0, DL, VT);
6414   // fold (xor x, undef) -> undef
6415   if (N0.isUndef())
6416     return N0;
6417   if (N1.isUndef())
6418     return N1;
6419   // fold (xor c1, c2) -> c1^c2
6420   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6421   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6422   if (N0C && N1C)
6423     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6424   // canonicalize constant to RHS
6425   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6426      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6427     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6428   // fold (xor x, 0) -> x
6429   if (isNullConstant(N1))
6430     return N0;
6431
6432   if (SDValue NewSel = foldBinOpIntoSelect(N))
6433     return NewSel;
6434
6435   // reassociate xor
6436   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6437     return RXOR;
6438
6439   // fold !(x cc y) -> (x !cc y)
6440   unsigned N0Opcode = N0.getOpcode();
6441   SDValue LHS, RHS, CC;
6442   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6443     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6444                                                LHS.getValueType().isInteger());
6445     if (!LegalOperations ||
6446         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6447       switch (N0Opcode) {
6448       default:
6449         llvm_unreachable("Unhandled SetCC Equivalent!");
6450       case ISD::SETCC:
6451         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6452       case ISD::SELECT_CC:
6453         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6454                                N0.getOperand(3), NotCC);
6455       }
6456     }
6457   }
6458
6459   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6460   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6461       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6462     SDValue V = N0.getOperand(0);
6463     SDLoc DL0(N0);
6464     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6465                     DAG.getConstant(1, DL0, V.getValueType()));
6466     AddToWorklist(V.getNode());
6467     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6468   }
6469
6470   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6471   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6472       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6473     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6474     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
6475       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6476       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6477       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6478       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6479       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6480     }
6481   }
6482   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6483   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6484       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6485     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
6486     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
6487       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6488       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
6489       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
6490       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
6491       return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
6492     }
6493   }
6494   // fold (xor (and x, y), y) -> (and (not x), y)
6495   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6496     SDValue X = N0.getOperand(0);
6497     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6498     AddToWorklist(NotX.getNode());
6499     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6500   }
6501
6502   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6503     ConstantSDNode *XorC = isConstOrConstSplat(N1);
6504     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
6505     unsigned BitWidth = VT.getScalarSizeInBits();
6506     if (XorC && ShiftC) {
6507       // Don't crash on an oversized shift. We can not guarantee that a bogus
6508       // shift has been simplified to undef.
6509       uint64_t ShiftAmt = ShiftC->getLimitedValue();
6510       if (ShiftAmt < BitWidth) {
6511         APInt Ones = APInt::getAllOnesValue(BitWidth);
6512         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
6513         if (XorC->getAPIntValue() == Ones) {
6514           // If the xor constant is a shifted -1, do a 'not' before the shift:
6515           // xor (X << ShiftC), XorC --> (not X) << ShiftC
6516           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6517           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6518           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6519         }
6520       }
6521     }
6522   }
6523
6524   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6525   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6526     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
6527     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
6528     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
6529       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
6530       SDValue S0 = S.getOperand(0);
6531       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
6532         unsigned OpSizeInBits = VT.getScalarSizeInBits();
6533         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
6534           if (C->getAPIntValue() == (OpSizeInBits - 1))
6535             return DAG.getNode(ISD::ABS, DL, VT, S0);
6536       }
6537     }
6538   }
6539
6540   // fold (xor x, x) -> 0
6541   if (N0 == N1)
6542     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
6543
6544   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
6545   // Here is a concrete example of this equivalence:
6546   // i16   x ==  14
6547   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
6548   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
6549   //
6550   // =>
6551   //
6552   // i16     ~1      == 0b1111111111111110
6553   // i16 rol(~1, 14) == 0b1011111111111111
6554   //
6555   // Some additional tips to help conceptualize this transform:
6556   // - Try to see the operation as placing a single zero in a value of all ones.
6557   // - There exists no value for x which would allow the result to contain zero.
6558   // - Values of x larger than the bitwidth are undefined and do not require a
6559   //   consistent result.
6560   // - Pushing the zero left requires shifting one bits in from the right.
6561   // A rotate left of ~1 is a nice way of achieving the desired result.
6562   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
6563       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
6564     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
6565                        N0.getOperand(1));
6566   }
6567
6568   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
6569   if (N0Opcode == N1.getOpcode())
6570     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6571       return V;
6572
6573   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
6574   if (SDValue MM = unfoldMaskedMerge(N))
6575     return MM;
6576
6577   // Simplify the expression using non-local knowledge.
6578   if (SimplifyDemandedBits(SDValue(N, 0)))
6579     return SDValue(N, 0);
6580
6581   return SDValue();
6582 }
6583
6584 /// Handle transforms common to the three shifts, when the shift amount is a
6585 /// constant.
6586 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
6587   // Do not turn a 'not' into a regular xor.
6588   if (isBitwiseNot(N->getOperand(0)))
6589     return SDValue();
6590
6591   SDNode *LHS = N->getOperand(0).getNode();
6592   if (!LHS->hasOneUse()) return SDValue();
6593
6594   // We want to pull some binops through shifts, so that we have (and (shift))
6595   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
6596   // thing happens with address calculations, so it's important to canonicalize
6597   // it.
6598   bool HighBitSet = false;  // Can we transform this if the high bit is set?
6599
6600   switch (LHS->getOpcode()) {
6601   default: return SDValue();
6602   case ISD::OR:
6603   case ISD::XOR:
6604     HighBitSet = false; // We can only transform sra if the high bit is clear.
6605     break;
6606   case ISD::AND:
6607     HighBitSet = true;  // We can only transform sra if the high bit is set.
6608     break;
6609   case ISD::ADD:
6610     if (N->getOpcode() != ISD::SHL)
6611       return SDValue(); // only shl(add) not sr[al](add).
6612     HighBitSet = false; // We can only transform sra if the high bit is clear.
6613     break;
6614   }
6615
6616   // We require the RHS of the binop to be a constant and not opaque as well.
6617   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
6618   if (!BinOpCst) return SDValue();
6619
6620   // FIXME: disable this unless the input to the binop is a shift by a constant
6621   // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
6622   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
6623   bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
6624                  BinOpLHSVal->getOpcode() == ISD::SRA ||
6625                  BinOpLHSVal->getOpcode() == ISD::SRL;
6626   bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
6627                         BinOpLHSVal->getOpcode() == ISD::SELECT;
6628
6629   if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
6630       !isCopyOrSelect)
6631     return SDValue();
6632
6633   if (isCopyOrSelect && N->hasOneUse())
6634     return SDValue();
6635
6636   EVT VT = N->getValueType(0);
6637
6638   // If this is a signed shift right, and the high bit is modified by the
6639   // logical operation, do not perform the transformation. The highBitSet
6640   // boolean indicates the value of the high bit of the constant which would
6641   // cause it to be modified for this operation.
6642   if (N->getOpcode() == ISD::SRA) {
6643     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
6644     if (BinOpRHSSignSet != HighBitSet)
6645       return SDValue();
6646   }
6647
6648   if (!TLI.isDesirableToCommuteWithShift(N, Level))
6649     return SDValue();
6650
6651   // Fold the constants, shifting the binop RHS by the shift amount.
6652   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
6653                                N->getValueType(0),
6654                                LHS->getOperand(1), N->getOperand(1));
6655   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
6656
6657   // Create the new shift.
6658   SDValue NewShift = DAG.getNode(N->getOpcode(),
6659                                  SDLoc(LHS->getOperand(0)),
6660                                  VT, LHS->getOperand(0), N->getOperand(1));
6661
6662   // Create the new binop.
6663   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
6664 }
6665
6666 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
6667   assert(N->getOpcode() == ISD::TRUNCATE);
6668   assert(N->getOperand(0).getOpcode() == ISD::AND);
6669
6670   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
6671   EVT TruncVT = N->getValueType(0);
6672   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
6673       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
6674     SDValue N01 = N->getOperand(0).getOperand(1);
6675     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
6676       SDLoc DL(N);
6677       SDValue N00 = N->getOperand(0).getOperand(0);
6678       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
6679       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
6680       AddToWorklist(Trunc00.getNode());
6681       AddToWorklist(Trunc01.getNode());
6682       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
6683     }
6684   }
6685
6686   return SDValue();
6687 }
6688
6689 SDValue DAGCombiner::visitRotate(SDNode *N) {
6690   SDLoc dl(N);
6691   SDValue N0 = N->getOperand(0);
6692   SDValue N1 = N->getOperand(1);
6693   EVT VT = N->getValueType(0);
6694   unsigned Bitsize = VT.getScalarSizeInBits();
6695
6696   // fold (rot x, 0) -> x
6697   if (isNullOrNullSplat(N1))
6698     return N0;
6699
6700   // fold (rot x, c) -> x iff (c % BitSize) == 0
6701   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
6702     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
6703     if (DAG.MaskedValueIsZero(N1, ModuloMask))
6704       return N0;
6705   }
6706
6707   // fold (rot x, c) -> (rot x, c % BitSize)
6708   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
6709     if (Cst->getAPIntValue().uge(Bitsize)) {
6710       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
6711       return DAG.getNode(N->getOpcode(), dl, VT, N0,
6712                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
6713     }
6714   }
6715
6716   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
6717   if (N1.getOpcode() == ISD::TRUNCATE &&
6718       N1.getOperand(0).getOpcode() == ISD::AND) {
6719     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6720       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
6721   }
6722
6723   unsigned NextOp = N0.getOpcode();
6724   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
6725   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
6726     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
6727     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
6728     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
6729       EVT ShiftVT = C1->getValueType(0);
6730       bool SameSide = (N->getOpcode() == NextOp);
6731       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
6732       if (SDValue CombinedShift =
6733               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
6734         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
6735         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
6736             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
6737             BitsizeC.getNode());
6738         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
6739                            CombinedShiftNorm);
6740       }
6741     }
6742   }
6743   return SDValue();
6744 }
6745
6746 SDValue DAGCombiner::visitSHL(SDNode *N) {
6747   SDValue N0 = N->getOperand(0);
6748   SDValue N1 = N->getOperand(1);
6749   if (SDValue V = DAG.simplifyShift(N0, N1))
6750     return V;
6751
6752   EVT VT = N0.getValueType();
6753   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6754
6755   // fold vector ops
6756   if (VT.isVector()) {
6757     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6758       return FoldedVOp;
6759
6760     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
6761     // If setcc produces all-one true value then:
6762     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
6763     if (N1CV && N1CV->isConstant()) {
6764       if (N0.getOpcode() == ISD::AND) {
6765         SDValue N00 = N0->getOperand(0);
6766         SDValue N01 = N0->getOperand(1);
6767         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
6768
6769         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
6770             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
6771                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
6772           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
6773                                                      N01CV, N1CV))
6774             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
6775         }
6776       }
6777     }
6778   }
6779
6780   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6781
6782   // fold (shl c1, c2) -> c1<<c2
6783   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6784   if (N0C && N1C && !N1C->isOpaque())
6785     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
6786
6787   if (SDValue NewSel = foldBinOpIntoSelect(N))
6788     return NewSel;
6789
6790   // if (shl x, c) is known to be zero, return 0
6791   if (DAG.MaskedValueIsZero(SDValue(N, 0),
6792                             APInt::getAllOnesValue(OpSizeInBits)))
6793     return DAG.getConstant(0, SDLoc(N), VT);
6794   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
6795   if (N1.getOpcode() == ISD::TRUNCATE &&
6796       N1.getOperand(0).getOpcode() == ISD::AND) {
6797     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
6798       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
6799   }
6800
6801   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
6802     return SDValue(N, 0);
6803
6804   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
6805   if (N0.getOpcode() == ISD::SHL) {
6806     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
6807                                           ConstantSDNode *RHS) {
6808       APInt c1 = LHS->getAPIntValue();
6809       APInt c2 = RHS->getAPIntValue();
6810       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6811       return (c1 + c2).uge(OpSizeInBits);
6812     };
6813     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
6814       return DAG.getConstant(0, SDLoc(N), VT);
6815
6816     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
6817                                        ConstantSDNode *RHS) {
6818       APInt c1 = LHS->getAPIntValue();
6819       APInt c2 = RHS->getAPIntValue();
6820       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6821       return (c1 + c2).ult(OpSizeInBits);
6822     };
6823     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
6824       SDLoc DL(N);
6825       EVT ShiftVT = N1.getValueType();
6826       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
6827       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
6828     }
6829   }
6830
6831   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
6832   // For this to be valid, the second form must not preserve any of the bits
6833   // that are shifted out by the inner shift in the first form.  This means
6834   // the outer shift size must be >= the number of bits added by the ext.
6835   // As a corollary, we don't care what kind of ext it is.
6836   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
6837               N0.getOpcode() == ISD::ANY_EXTEND ||
6838               N0.getOpcode() == ISD::SIGN_EXTEND) &&
6839       N0.getOperand(0).getOpcode() == ISD::SHL) {
6840     SDValue N0Op0 = N0.getOperand(0);
6841     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6842       APInt c1 = N0Op0C1->getAPIntValue();
6843       APInt c2 = N1C->getAPIntValue();
6844       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
6845
6846       EVT InnerShiftVT = N0Op0.getValueType();
6847       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
6848       if (c2.uge(OpSizeInBits - InnerShiftSize)) {
6849         SDLoc DL(N0);
6850         APInt Sum = c1 + c2;
6851         if (Sum.uge(OpSizeInBits))
6852           return DAG.getConstant(0, DL, VT);
6853
6854         return DAG.getNode(
6855             ISD::SHL, DL, VT,
6856             DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
6857             DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
6858       }
6859     }
6860   }
6861
6862   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
6863   // Only fold this if the inner zext has no other uses to avoid increasing
6864   // the total number of instructions.
6865   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6866       N0.getOperand(0).getOpcode() == ISD::SRL) {
6867     SDValue N0Op0 = N0.getOperand(0);
6868     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
6869       if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
6870         uint64_t c1 = N0Op0C1->getZExtValue();
6871         uint64_t c2 = N1C->getZExtValue();
6872         if (c1 == c2) {
6873           SDValue NewOp0 = N0.getOperand(0);
6874           EVT CountVT = NewOp0.getOperand(1).getValueType();
6875           SDLoc DL(N);
6876           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
6877                                        NewOp0,
6878                                        DAG.getConstant(c2, DL, CountVT));
6879           AddToWorklist(NewSHL.getNode());
6880           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
6881         }
6882       }
6883     }
6884   }
6885
6886   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
6887   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
6888   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
6889       N0->getFlags().hasExact()) {
6890     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6891       uint64_t C1 = N0C1->getZExtValue();
6892       uint64_t C2 = N1C->getZExtValue();
6893       SDLoc DL(N);
6894       if (C1 <= C2)
6895         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6896                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
6897       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
6898                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
6899     }
6900   }
6901
6902   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
6903   //                               (and (srl x, (sub c1, c2), MASK)
6904   // Only fold this if the inner shift has no other uses -- if it does, folding
6905   // this will increase the total number of instructions.
6906   // TODO - drop hasOneUse requirement if c1 == c2?
6907   // TODO - support non-uniform vector shift amounts.
6908   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
6909       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
6910     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
6911       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
6912         uint64_t c1 = N0C1->getZExtValue();
6913         uint64_t c2 = N1C->getZExtValue();
6914         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
6915         SDValue Shift;
6916         if (c2 > c1) {
6917           Mask <<= c2 - c1;
6918           SDLoc DL(N);
6919           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
6920                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
6921         } else {
6922           Mask.lshrInPlace(c1 - c2);
6923           SDLoc DL(N);
6924           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
6925                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
6926         }
6927         SDLoc DL(N0);
6928         return DAG.getNode(ISD::AND, DL, VT, Shift,
6929                            DAG.getConstant(Mask, DL, VT));
6930       }
6931     }
6932   }
6933
6934   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
6935   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
6936       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
6937     SDLoc DL(N);
6938     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
6939     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
6940     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
6941   }
6942
6943   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
6944   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
6945   // Variant of version done on multiply, except mul by a power of 2 is turned
6946   // into a shift.
6947   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
6948       N0.getNode()->hasOneUse() &&
6949       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6950       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
6951       TLI.isDesirableToCommuteWithShift(N, Level)) {
6952     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
6953     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6954     AddToWorklist(Shl0.getNode());
6955     AddToWorklist(Shl1.getNode());
6956     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
6957   }
6958
6959   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
6960   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
6961       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
6962       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
6963     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
6964     if (isConstantOrConstantVector(Shl))
6965       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
6966   }
6967
6968   if (N1C && !N1C->isOpaque())
6969     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
6970       return NewSHL;
6971
6972   return SDValue();
6973 }
6974
6975 SDValue DAGCombiner::visitSRA(SDNode *N) {
6976   SDValue N0 = N->getOperand(0);
6977   SDValue N1 = N->getOperand(1);
6978   if (SDValue V = DAG.simplifyShift(N0, N1))
6979     return V;
6980
6981   EVT VT = N0.getValueType();
6982   unsigned OpSizeInBits = VT.getScalarSizeInBits();
6983
6984   // Arithmetic shifting an all-sign-bit value is a no-op.
6985   // fold (sra 0, x) -> 0
6986   // fold (sra -1, x) -> -1
6987   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
6988     return N0;
6989
6990   // fold vector ops
6991   if (VT.isVector())
6992     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6993       return FoldedVOp;
6994
6995   ConstantSDNode *N1C = isConstOrConstSplat(N1);
6996
6997   // fold (sra c1, c2) -> (sra c1, c2)
6998   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6999   if (N0C && N1C && !N1C->isOpaque())
7000     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
7001
7002   if (SDValue NewSel = foldBinOpIntoSelect(N))
7003     return NewSel;
7004
7005   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7006   // sext_inreg.
7007   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7008     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7009     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7010     if (VT.isVector())
7011       ExtVT = EVT::getVectorVT(*DAG.getContext(),
7012                                ExtVT, VT.getVectorNumElements());
7013     if ((!LegalOperations ||
7014          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
7015       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7016                          N0.getOperand(0), DAG.getValueType(ExtVT));
7017   }
7018
7019   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7020   // clamp (add c1, c2) to max shift.
7021   if (N0.getOpcode() == ISD::SRA) {
7022     SDLoc DL(N);
7023     EVT ShiftVT = N1.getValueType();
7024     EVT ShiftSVT = ShiftVT.getScalarType();
7025     SmallVector<SDValue, 16> ShiftValues;
7026
7027     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7028       APInt c1 = LHS->getAPIntValue();
7029       APInt c2 = RHS->getAPIntValue();
7030       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7031       APInt Sum = c1 + c2;
7032       unsigned ShiftSum =
7033           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7034       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7035       return true;
7036     };
7037     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7038       SDValue ShiftValue;
7039       if (VT.isVector())
7040         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7041       else
7042         ShiftValue = ShiftValues[0];
7043       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7044     }
7045   }
7046
7047   // fold (sra (shl X, m), (sub result_size, n))
7048   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7049   // result_size - n != m.
7050   // If truncate is free for the target sext(shl) is likely to result in better
7051   // code.
7052   if (N0.getOpcode() == ISD::SHL && N1C) {
7053     // Get the two constanst of the shifts, CN0 = m, CN = n.
7054     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7055     if (N01C) {
7056       LLVMContext &Ctx = *DAG.getContext();
7057       // Determine what the truncate's result bitsize and type would be.
7058       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7059
7060       if (VT.isVector())
7061         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7062
7063       // Determine the residual right-shift amount.
7064       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7065
7066       // If the shift is not a no-op (in which case this should be just a sign
7067       // extend already), the truncated to type is legal, sign_extend is legal
7068       // on that type, and the truncate to that type is both legal and free,
7069       // perform the transform.
7070       if ((ShiftAmt > 0) &&
7071           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
7072           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
7073           TLI.isTruncateFree(VT, TruncVT)) {
7074         SDLoc DL(N);
7075         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7076             getShiftAmountTy(N0.getOperand(0).getValueType()));
7077         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7078                                     N0.getOperand(0), Amt);
7079         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7080                                     Shift);
7081         return DAG.getNode(ISD::SIGN_EXTEND, DL,
7082                            N->getValueType(0), Trunc);
7083       }
7084     }
7085   }
7086
7087   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7088   if (N1.getOpcode() == ISD::TRUNCATE &&
7089       N1.getOperand(0).getOpcode() == ISD::AND) {
7090     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7091       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7092   }
7093
7094   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7095   //      if c1 is equal to the number of bits the trunc removes
7096   if (N0.getOpcode() == ISD::TRUNCATE &&
7097       (N0.getOperand(0).getOpcode() == ISD::SRL ||
7098        N0.getOperand(0).getOpcode() == ISD::SRA) &&
7099       N0.getOperand(0).hasOneUse() &&
7100       N0.getOperand(0).getOperand(1).hasOneUse() &&
7101       N1C) {
7102     SDValue N0Op0 = N0.getOperand(0);
7103     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7104       unsigned LargeShiftVal = LargeShift->getZExtValue();
7105       EVT LargeVT = N0Op0.getValueType();
7106
7107       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
7108         SDLoc DL(N);
7109         SDValue Amt =
7110           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
7111                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
7112         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
7113                                   N0Op0.getOperand(0), Amt);
7114         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7115       }
7116     }
7117   }
7118
7119   // Simplify, based on bits shifted out of the LHS.
7120   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7121     return SDValue(N, 0);
7122
7123   // If the sign bit is known to be zero, switch this to a SRL.
7124   if (DAG.SignBitIsZero(N0))
7125     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7126
7127   if (N1C && !N1C->isOpaque())
7128     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
7129       return NewSRA;
7130
7131   return SDValue();
7132 }
7133
7134 SDValue DAGCombiner::visitSRL(SDNode *N) {
7135   SDValue N0 = N->getOperand(0);
7136   SDValue N1 = N->getOperand(1);
7137   if (SDValue V = DAG.simplifyShift(N0, N1))
7138     return V;
7139
7140   EVT VT = N0.getValueType();
7141   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7142
7143   // fold vector ops
7144   if (VT.isVector())
7145     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7146       return FoldedVOp;
7147
7148   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7149
7150   // fold (srl c1, c2) -> c1 >>u c2
7151   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7152   if (N0C && N1C && !N1C->isOpaque())
7153     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
7154
7155   if (SDValue NewSel = foldBinOpIntoSelect(N))
7156     return NewSel;
7157
7158   // if (srl x, c) is known to be zero, return 0
7159   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7160                                    APInt::getAllOnesValue(OpSizeInBits)))
7161     return DAG.getConstant(0, SDLoc(N), VT);
7162
7163   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7164   if (N0.getOpcode() == ISD::SRL) {
7165     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7166                                           ConstantSDNode *RHS) {
7167       APInt c1 = LHS->getAPIntValue();
7168       APInt c2 = RHS->getAPIntValue();
7169       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7170       return (c1 + c2).uge(OpSizeInBits);
7171     };
7172     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7173       return DAG.getConstant(0, SDLoc(N), VT);
7174
7175     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7176                                        ConstantSDNode *RHS) {
7177       APInt c1 = LHS->getAPIntValue();
7178       APInt c2 = RHS->getAPIntValue();
7179       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7180       return (c1 + c2).ult(OpSizeInBits);
7181     };
7182     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7183       SDLoc DL(N);
7184       EVT ShiftVT = N1.getValueType();
7185       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7186       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7187     }
7188   }
7189
7190   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7191   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7192       N0.getOperand(0).getOpcode() == ISD::SRL) {
7193     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7194       uint64_t c1 = N001C->getZExtValue();
7195       uint64_t c2 = N1C->getZExtValue();
7196       EVT InnerShiftVT = N0.getOperand(0).getValueType();
7197       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7198       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7199       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7200       if (c1 + OpSizeInBits == InnerShiftSize) {
7201         SDLoc DL(N0);
7202         if (c1 + c2 >= InnerShiftSize)
7203           return DAG.getConstant(0, DL, VT);
7204         return DAG.getNode(ISD::TRUNCATE, DL, VT,
7205                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7206                                        N0.getOperand(0).getOperand(0),
7207                                        DAG.getConstant(c1 + c2, DL,
7208                                                        ShiftCountVT)));
7209       }
7210     }
7211   }
7212
7213   // fold (srl (shl x, c), c) -> (and x, cst2)
7214   // TODO - (srl (shl x, c1), c2).
7215   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7216       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7217     SDLoc DL(N);
7218     SDValue Mask =
7219         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7220     AddToWorklist(Mask.getNode());
7221     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7222   }
7223
7224   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7225   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7226     // Shifting in all undef bits?
7227     EVT SmallVT = N0.getOperand(0).getValueType();
7228     unsigned BitSize = SmallVT.getScalarSizeInBits();
7229     if (N1C->getZExtValue() >= BitSize)
7230       return DAG.getUNDEF(VT);
7231
7232     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
7233       uint64_t ShiftAmt = N1C->getZExtValue();
7234       SDLoc DL0(N0);
7235       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7236                                        N0.getOperand(0),
7237                           DAG.getConstant(ShiftAmt, DL0,
7238                                           getShiftAmountTy(SmallVT)));
7239       AddToWorklist(SmallShift.getNode());
7240       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7241       SDLoc DL(N);
7242       return DAG.getNode(ISD::AND, DL, VT,
7243                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7244                          DAG.getConstant(Mask, DL, VT));
7245     }
7246   }
7247
7248   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
7249   // bit, which is unmodified by sra.
7250   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
7251     if (N0.getOpcode() == ISD::SRA)
7252       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7253   }
7254
7255   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
7256   if (N1C && N0.getOpcode() == ISD::CTLZ &&
7257       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
7258     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7259
7260     // If any of the input bits are KnownOne, then the input couldn't be all
7261     // zeros, thus the result of the srl will always be zero.
7262     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
7263
7264     // If all of the bits input the to ctlz node are known to be zero, then
7265     // the result of the ctlz is "32" and the result of the shift is one.
7266     APInt UnknownBits = ~Known.Zero;
7267     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
7268
7269     // Otherwise, check to see if there is exactly one bit input to the ctlz.
7270     if (UnknownBits.isPowerOf2()) {
7271       // Okay, we know that only that the single bit specified by UnknownBits
7272       // could be set on input to the CTLZ node. If this bit is set, the SRL
7273       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7274       // to an SRL/XOR pair, which is likely to simplify more.
7275       unsigned ShAmt = UnknownBits.countTrailingZeros();
7276       SDValue Op = N0.getOperand(0);
7277
7278       if (ShAmt) {
7279         SDLoc DL(N0);
7280         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7281                   DAG.getConstant(ShAmt, DL,
7282                                   getShiftAmountTy(Op.getValueType())));
7283         AddToWorklist(Op.getNode());
7284       }
7285
7286       SDLoc DL(N);
7287       return DAG.getNode(ISD::XOR, DL, VT,
7288                          Op, DAG.getConstant(1, DL, VT));
7289     }
7290   }
7291
7292   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7293   if (N1.getOpcode() == ISD::TRUNCATE &&
7294       N1.getOperand(0).getOpcode() == ISD::AND) {
7295     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7296       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7297   }
7298
7299   // fold operands of srl based on knowledge that the low bits are not
7300   // demanded.
7301   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7302     return SDValue(N, 0);
7303
7304   if (N1C && !N1C->isOpaque())
7305     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
7306       return NewSRL;
7307
7308   // Attempt to convert a srl of a load into a narrower zero-extending load.
7309   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7310     return NarrowLoad;
7311
7312   // Here is a common situation. We want to optimize:
7313   //
7314   //   %a = ...
7315   //   %b = and i32 %a, 2
7316   //   %c = srl i32 %b, 1
7317   //   brcond i32 %c ...
7318   //
7319   // into
7320   //
7321   //   %a = ...
7322   //   %b = and %a, 2
7323   //   %c = setcc eq %b, 0
7324   //   brcond %c ...
7325   //
7326   // However when after the source operand of SRL is optimized into AND, the SRL
7327   // itself may not be optimized further. Look for it and add the BRCOND into
7328   // the worklist.
7329   if (N->hasOneUse()) {
7330     SDNode *Use = *N->use_begin();
7331     if (Use->getOpcode() == ISD::BRCOND)
7332       AddToWorklist(Use);
7333     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7334       // Also look pass the truncate.
7335       Use = *Use->use_begin();
7336       if (Use->getOpcode() == ISD::BRCOND)
7337         AddToWorklist(Use);
7338     }
7339   }
7340
7341   return SDValue();
7342 }
7343
7344 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7345   EVT VT = N->getValueType(0);
7346   SDValue N0 = N->getOperand(0);
7347   SDValue N1 = N->getOperand(1);
7348   SDValue N2 = N->getOperand(2);
7349   bool IsFSHL = N->getOpcode() == ISD::FSHL;
7350   unsigned BitWidth = VT.getScalarSizeInBits();
7351
7352   // fold (fshl N0, N1, 0) -> N0
7353   // fold (fshr N0, N1, 0) -> N1
7354   if (isPowerOf2_32(BitWidth))
7355     if (DAG.MaskedValueIsZero(
7356             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7357       return IsFSHL ? N0 : N1;
7358
7359   auto IsUndefOrZero = [](SDValue V) {
7360     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
7361   };
7362
7363   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7364     EVT ShAmtTy = N2.getValueType();
7365
7366     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7367     if (Cst->getAPIntValue().uge(BitWidth)) {
7368       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7369       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7370                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7371     }
7372
7373     unsigned ShAmt = Cst->getZExtValue();
7374     if (ShAmt == 0)
7375       return IsFSHL ? N0 : N1;
7376
7377     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7378     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7379     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7380     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7381     if (IsUndefOrZero(N0))
7382       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7383                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7384                                          SDLoc(N), ShAmtTy));
7385     if (IsUndefOrZero(N1))
7386       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7387                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7388                                          SDLoc(N), ShAmtTy));
7389   }
7390
7391   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7392   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7393   // iff We know the shift amount is in range.
7394   // TODO: when is it worth doing SUB(BW, N2) as well?
7395   if (isPowerOf2_32(BitWidth)) {
7396     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7397     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7398       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7399     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7400       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7401   }
7402
7403   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7404   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7405   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7406   // is legal as well we might be better off avoiding non-constant (BW - N2).
7407   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
7408   if (N0 == N1 && hasOperation(RotOpc, VT))
7409     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
7410
7411   // Simplify, based on bits shifted out of N0/N1.
7412   if (SimplifyDemandedBits(SDValue(N, 0)))
7413     return SDValue(N, 0);
7414
7415   return SDValue();
7416 }
7417
7418 SDValue DAGCombiner::visitABS(SDNode *N) {
7419   SDValue N0 = N->getOperand(0);
7420   EVT VT = N->getValueType(0);
7421
7422   // fold (abs c1) -> c2
7423   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7424     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
7425   // fold (abs (abs x)) -> (abs x)
7426   if (N0.getOpcode() == ISD::ABS)
7427     return N0;
7428   // fold (abs x) -> x iff not-negative
7429   if (DAG.SignBitIsZero(N0))
7430     return N0;
7431   return SDValue();
7432 }
7433
7434 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
7435   SDValue N0 = N->getOperand(0);
7436   EVT VT = N->getValueType(0);
7437
7438   // fold (bswap c1) -> c2
7439   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7440     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
7441   // fold (bswap (bswap x)) -> x
7442   if (N0.getOpcode() == ISD::BSWAP)
7443     return N0->getOperand(0);
7444   return SDValue();
7445 }
7446
7447 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
7448   SDValue N0 = N->getOperand(0);
7449   EVT VT = N->getValueType(0);
7450
7451   // fold (bitreverse c1) -> c2
7452   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7453     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
7454   // fold (bitreverse (bitreverse x)) -> x
7455   if (N0.getOpcode() == ISD::BITREVERSE)
7456     return N0.getOperand(0);
7457   return SDValue();
7458 }
7459
7460 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
7461   SDValue N0 = N->getOperand(0);
7462   EVT VT = N->getValueType(0);
7463
7464   // fold (ctlz c1) -> c2
7465   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7466     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
7467
7468   // If the value is known never to be zero, switch to the undef version.
7469   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
7470     if (DAG.isKnownNeverZero(N0))
7471       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7472   }
7473
7474   return SDValue();
7475 }
7476
7477 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
7478   SDValue N0 = N->getOperand(0);
7479   EVT VT = N->getValueType(0);
7480
7481   // fold (ctlz_zero_undef c1) -> c2
7482   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7483     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7484   return SDValue();
7485 }
7486
7487 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
7488   SDValue N0 = N->getOperand(0);
7489   EVT VT = N->getValueType(0);
7490
7491   // fold (cttz c1) -> c2
7492   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7493     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
7494
7495   // If the value is known never to be zero, switch to the undef version.
7496   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
7497     if (DAG.isKnownNeverZero(N0))
7498       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7499   }
7500
7501   return SDValue();
7502 }
7503
7504 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
7505   SDValue N0 = N->getOperand(0);
7506   EVT VT = N->getValueType(0);
7507
7508   // fold (cttz_zero_undef c1) -> c2
7509   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7510     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
7511   return SDValue();
7512 }
7513
7514 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
7515   SDValue N0 = N->getOperand(0);
7516   EVT VT = N->getValueType(0);
7517
7518   // fold (ctpop c1) -> c2
7519   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
7520     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
7521   return SDValue();
7522 }
7523
7524 // FIXME: This should be checking for no signed zeros on individual operands, as
7525 // well as no nans.
7526 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
7527                                          SDValue RHS,
7528                                          const TargetLowering &TLI) {
7529   const TargetOptions &Options = DAG.getTarget().Options;
7530   EVT VT = LHS.getValueType();
7531
7532   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
7533          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
7534          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
7535 }
7536
7537 /// Generate Min/Max node
7538 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
7539                                    SDValue RHS, SDValue True, SDValue False,
7540                                    ISD::CondCode CC, const TargetLowering &TLI,
7541                                    SelectionDAG &DAG) {
7542   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
7543     return SDValue();
7544
7545   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
7546   switch (CC) {
7547   case ISD::SETOLT:
7548   case ISD::SETOLE:
7549   case ISD::SETLT:
7550   case ISD::SETLE:
7551   case ISD::SETULT:
7552   case ISD::SETULE: {
7553     // Since it's known never nan to get here already, either fminnum or
7554     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
7555     // expanded in terms of it.
7556     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
7557     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7558       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7559
7560     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
7561     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7562       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7563     return SDValue();
7564   }
7565   case ISD::SETOGT:
7566   case ISD::SETOGE:
7567   case ISD::SETGT:
7568   case ISD::SETGE:
7569   case ISD::SETUGT:
7570   case ISD::SETUGE: {
7571     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
7572     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
7573       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
7574
7575     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
7576     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
7577       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
7578     return SDValue();
7579   }
7580   default:
7581     return SDValue();
7582   }
7583 }
7584
7585 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
7586   SDValue Cond = N->getOperand(0);
7587   SDValue N1 = N->getOperand(1);
7588   SDValue N2 = N->getOperand(2);
7589   EVT VT = N->getValueType(0);
7590   EVT CondVT = Cond.getValueType();
7591   SDLoc DL(N);
7592
7593   if (!VT.isInteger())
7594     return SDValue();
7595
7596   auto *C1 = dyn_cast<ConstantSDNode>(N1);
7597   auto *C2 = dyn_cast<ConstantSDNode>(N2);
7598   if (!C1 || !C2)
7599     return SDValue();
7600
7601   // Only do this before legalization to avoid conflicting with target-specific
7602   // transforms in the other direction (create a select from a zext/sext). There
7603   // is also a target-independent combine here in DAGCombiner in the other
7604   // direction for (select Cond, -1, 0) when the condition is not i1.
7605   if (CondVT == MVT::i1 && !LegalOperations) {
7606     if (C1->isNullValue() && C2->isOne()) {
7607       // select Cond, 0, 1 --> zext (!Cond)
7608       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7609       if (VT != MVT::i1)
7610         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
7611       return NotCond;
7612     }
7613     if (C1->isNullValue() && C2->isAllOnesValue()) {
7614       // select Cond, 0, -1 --> sext (!Cond)
7615       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
7616       if (VT != MVT::i1)
7617         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
7618       return NotCond;
7619     }
7620     if (C1->isOne() && C2->isNullValue()) {
7621       // select Cond, 1, 0 --> zext (Cond)
7622       if (VT != MVT::i1)
7623         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7624       return Cond;
7625     }
7626     if (C1->isAllOnesValue() && C2->isNullValue()) {
7627       // select Cond, -1, 0 --> sext (Cond)
7628       if (VT != MVT::i1)
7629         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7630       return Cond;
7631     }
7632
7633     // For any constants that differ by 1, we can transform the select into an
7634     // extend and add. Use a target hook because some targets may prefer to
7635     // transform in the other direction.
7636     if (TLI.convertSelectOfConstantsToMath(VT)) {
7637       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
7638         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
7639         if (VT != MVT::i1)
7640           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
7641         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7642       }
7643       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
7644         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
7645         if (VT != MVT::i1)
7646           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
7647         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
7648       }
7649     }
7650
7651     return SDValue();
7652   }
7653
7654   // fold (select Cond, 0, 1) -> (xor Cond, 1)
7655   // We can't do this reliably if integer based booleans have different contents
7656   // to floating point based booleans. This is because we can't tell whether we
7657   // have an integer-based boolean or a floating-point-based boolean unless we
7658   // can find the SETCC that produced it and inspect its operands. This is
7659   // fairly easy if C is the SETCC node, but it can potentially be
7660   // undiscoverable (or not reasonably discoverable). For example, it could be
7661   // in another basic block or it could require searching a complicated
7662   // expression.
7663   if (CondVT.isInteger() &&
7664       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
7665           TargetLowering::ZeroOrOneBooleanContent &&
7666       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
7667           TargetLowering::ZeroOrOneBooleanContent &&
7668       C1->isNullValue() && C2->isOne()) {
7669     SDValue NotCond =
7670         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
7671     if (VT.bitsEq(CondVT))
7672       return NotCond;
7673     return DAG.getZExtOrTrunc(NotCond, DL, VT);
7674   }
7675
7676   return SDValue();
7677 }
7678
7679 SDValue DAGCombiner::visitSELECT(SDNode *N) {
7680   SDValue N0 = N->getOperand(0);
7681   SDValue N1 = N->getOperand(1);
7682   SDValue N2 = N->getOperand(2);
7683   EVT VT = N->getValueType(0);
7684   EVT VT0 = N0.getValueType();
7685   SDLoc DL(N);
7686
7687   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
7688     return V;
7689
7690   // fold (select X, X, Y) -> (or X, Y)
7691   // fold (select X, 1, Y) -> (or C, Y)
7692   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
7693     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
7694
7695   if (SDValue V = foldSelectOfConstants(N))
7696     return V;
7697
7698   // fold (select C, 0, X) -> (and (not C), X)
7699   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
7700     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7701     AddToWorklist(NOTNode.getNode());
7702     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
7703   }
7704   // fold (select C, X, 1) -> (or (not C), X)
7705   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
7706     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
7707     AddToWorklist(NOTNode.getNode());
7708     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
7709   }
7710   // fold (select X, Y, X) -> (and X, Y)
7711   // fold (select X, Y, 0) -> (and X, Y)
7712   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
7713     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
7714
7715   // If we can fold this based on the true/false value, do so.
7716   if (SimplifySelectOps(N, N1, N2))
7717     return SDValue(N, 0); // Don't revisit N.
7718
7719   if (VT0 == MVT::i1) {
7720     // The code in this block deals with the following 2 equivalences:
7721     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
7722     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
7723     // The target can specify its preferred form with the
7724     // shouldNormalizeToSelectSequence() callback. However we always transform
7725     // to the right anyway if we find the inner select exists in the DAG anyway
7726     // and we always transform to the left side if we know that we can further
7727     // optimize the combination of the conditions.
7728     bool normalizeToSequence =
7729         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
7730     // select (and Cond0, Cond1), X, Y
7731     //   -> select Cond0, (select Cond1, X, Y), Y
7732     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
7733       SDValue Cond0 = N0->getOperand(0);
7734       SDValue Cond1 = N0->getOperand(1);
7735       SDValue InnerSelect =
7736           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7737       if (normalizeToSequence || !InnerSelect.use_empty())
7738         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
7739                            InnerSelect, N2);
7740       // Cleanup on failure.
7741       if (InnerSelect.use_empty())
7742         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7743     }
7744     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
7745     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
7746       SDValue Cond0 = N0->getOperand(0);
7747       SDValue Cond1 = N0->getOperand(1);
7748       SDValue InnerSelect =
7749           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
7750       if (normalizeToSequence || !InnerSelect.use_empty())
7751         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
7752                            InnerSelect);
7753       // Cleanup on failure.
7754       if (InnerSelect.use_empty())
7755         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
7756     }
7757
7758     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
7759     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
7760       SDValue N1_0 = N1->getOperand(0);
7761       SDValue N1_1 = N1->getOperand(1);
7762       SDValue N1_2 = N1->getOperand(2);
7763       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
7764         // Create the actual and node if we can generate good code for it.
7765         if (!normalizeToSequence) {
7766           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
7767           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
7768         }
7769         // Otherwise see if we can optimize the "and" to a better pattern.
7770         if (SDValue Combined = visitANDLike(N0, N1_0, N))
7771           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
7772                              N2);
7773       }
7774     }
7775     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
7776     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
7777       SDValue N2_0 = N2->getOperand(0);
7778       SDValue N2_1 = N2->getOperand(1);
7779       SDValue N2_2 = N2->getOperand(2);
7780       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
7781         // Create the actual or node if we can generate good code for it.
7782         if (!normalizeToSequence) {
7783           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
7784           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
7785         }
7786         // Otherwise see if we can optimize to a better pattern.
7787         if (SDValue Combined = visitORLike(N0, N2_0, N))
7788           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
7789                              N2_2);
7790       }
7791     }
7792   }
7793
7794   // select (not Cond), N1, N2 -> select Cond, N2, N1
7795   if (SDValue F = extractBooleanFlip(N0, TLI))
7796     return DAG.getSelect(DL, VT, F, N2, N1);
7797
7798   // Fold selects based on a setcc into other things, such as min/max/abs.
7799   if (N0.getOpcode() == ISD::SETCC) {
7800     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
7801     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
7802
7803     // select (fcmp lt x, y), x, y -> fminnum x, y
7804     // select (fcmp gt x, y), x, y -> fmaxnum x, y
7805     //
7806     // This is OK if we don't care what happens if either operand is a NaN.
7807     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
7808       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
7809                                                 CC, TLI, DAG))
7810         return FMinMax;
7811
7812     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
7813     // This is conservatively limited to pre-legal-operations to give targets
7814     // a chance to reverse the transform if they want to do that. Also, it is
7815     // unlikely that the pattern would be formed late, so it's probably not
7816     // worth going through the other checks.
7817     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
7818         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
7819         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
7820       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
7821       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
7822       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
7823         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
7824         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
7825         //
7826         // The IR equivalent of this transform would have this form:
7827         //   %a = add %x, C
7828         //   %c = icmp ugt %x, ~C
7829         //   %r = select %c, -1, %a
7830         //   =>
7831         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
7832         //   %u0 = extractvalue %u, 0
7833         //   %u1 = extractvalue %u, 1
7834         //   %r = select %u1, -1, %u0
7835         SDVTList VTs = DAG.getVTList(VT, VT0);
7836         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
7837         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
7838       }
7839     }
7840
7841     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
7842         (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
7843       return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
7844                          N0.getOperand(2));
7845
7846     return SimplifySelect(DL, N0, N1, N2);
7847   }
7848
7849   return SDValue();
7850 }
7851
7852 static
7853 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
7854   SDLoc DL(N);
7855   EVT LoVT, HiVT;
7856   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
7857
7858   // Split the inputs.
7859   SDValue Lo, Hi, LL, LH, RL, RH;
7860   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
7861   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
7862
7863   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
7864   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
7865
7866   return std::make_pair(Lo, Hi);
7867 }
7868
7869 // This function assumes all the vselect's arguments are CONCAT_VECTOR
7870 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
7871 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
7872   SDLoc DL(N);
7873   SDValue Cond = N->getOperand(0);
7874   SDValue LHS = N->getOperand(1);
7875   SDValue RHS = N->getOperand(2);
7876   EVT VT = N->getValueType(0);
7877   int NumElems = VT.getVectorNumElements();
7878   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
7879          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
7880          Cond.getOpcode() == ISD::BUILD_VECTOR);
7881
7882   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
7883   // binary ones here.
7884   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
7885     return SDValue();
7886
7887   // We're sure we have an even number of elements due to the
7888   // concat_vectors we have as arguments to vselect.
7889   // Skip BV elements until we find one that's not an UNDEF
7890   // After we find an UNDEF element, keep looping until we get to half the
7891   // length of the BV and see if all the non-undef nodes are the same.
7892   ConstantSDNode *BottomHalf = nullptr;
7893   for (int i = 0; i < NumElems / 2; ++i) {
7894     if (Cond->getOperand(i)->isUndef())
7895       continue;
7896
7897     if (BottomHalf == nullptr)
7898       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7899     else if (Cond->getOperand(i).getNode() != BottomHalf)
7900       return SDValue();
7901   }
7902
7903   // Do the same for the second half of the BuildVector
7904   ConstantSDNode *TopHalf = nullptr;
7905   for (int i = NumElems / 2; i < NumElems; ++i) {
7906     if (Cond->getOperand(i)->isUndef())
7907       continue;
7908
7909     if (TopHalf == nullptr)
7910       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
7911     else if (Cond->getOperand(i).getNode() != TopHalf)
7912       return SDValue();
7913   }
7914
7915   assert(TopHalf && BottomHalf &&
7916          "One half of the selector was all UNDEFs and the other was all the "
7917          "same value. This should have been addressed before this function.");
7918   return DAG.getNode(
7919       ISD::CONCAT_VECTORS, DL, VT,
7920       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
7921       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
7922 }
7923
7924 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
7925   if (Level >= AfterLegalizeTypes)
7926     return SDValue();
7927
7928   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
7929   SDValue Mask = MSC->getMask();
7930   SDValue Data  = MSC->getValue();
7931   SDLoc DL(N);
7932
7933   // If the MSCATTER data type requires splitting and the mask is provided by a
7934   // SETCC, then split both nodes and its operands before legalization. This
7935   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7936   // and enables future optimizations (e.g. min/max pattern matching on X86).
7937   if (Mask.getOpcode() != ISD::SETCC)
7938     return SDValue();
7939
7940   // Check if any splitting is required.
7941   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
7942       TargetLowering::TypeSplitVector)
7943     return SDValue();
7944   SDValue MaskLo, MaskHi;
7945   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
7946
7947   EVT LoVT, HiVT;
7948   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
7949
7950   SDValue Chain = MSC->getChain();
7951
7952   EVT MemoryVT = MSC->getMemoryVT();
7953   unsigned Alignment = MSC->getOriginalAlignment();
7954
7955   EVT LoMemVT, HiMemVT;
7956   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
7957
7958   SDValue DataLo, DataHi;
7959   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
7960
7961   SDValue Scale = MSC->getScale();
7962   SDValue BasePtr = MSC->getBasePtr();
7963   SDValue IndexLo, IndexHi;
7964   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
7965
7966   MachineMemOperand *MMO = DAG.getMachineFunction().
7967     getMachineMemOperand(MSC->getPointerInfo(),
7968                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
7969                           Alignment, MSC->getAAInfo(), MSC->getRanges());
7970
7971   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
7972   SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
7973                                     DataLo.getValueType(), DL, OpsLo, MMO);
7974
7975   // The order of the Scatter operation after split is well defined. The "Hi"
7976   // part comes after the "Lo". So these two operations should be chained one
7977   // after another.
7978   SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
7979   return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
7980                               DL, OpsHi, MMO);
7981 }
7982
7983 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
7984   if (Level >= AfterLegalizeTypes)
7985     return SDValue();
7986
7987   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
7988   SDValue Mask = MST->getMask();
7989   SDValue Data  = MST->getValue();
7990   EVT VT = Data.getValueType();
7991   SDLoc DL(N);
7992
7993   // If the MSTORE data type requires splitting and the mask is provided by a
7994   // SETCC, then split both nodes and its operands before legalization. This
7995   // prevents the type legalizer from unrolling SETCC into scalar comparisons
7996   // and enables future optimizations (e.g. min/max pattern matching on X86).
7997   if (Mask.getOpcode() == ISD::SETCC) {
7998     // Check if any splitting is required.
7999     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8000         TargetLowering::TypeSplitVector)
8001       return SDValue();
8002
8003     SDValue MaskLo, MaskHi, Lo, Hi;
8004     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8005
8006     SDValue Chain = MST->getChain();
8007     SDValue Ptr   = MST->getBasePtr();
8008
8009     EVT MemoryVT = MST->getMemoryVT();
8010     unsigned Alignment = MST->getOriginalAlignment();
8011
8012     // if Alignment is equal to the vector size,
8013     // take the half of it for the second part
8014     unsigned SecondHalfAlignment =
8015       (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
8016
8017     EVT LoMemVT, HiMemVT;
8018     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8019
8020     SDValue DataLo, DataHi;
8021     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
8022
8023     MachineMemOperand *MMO = DAG.getMachineFunction().
8024       getMachineMemOperand(MST->getPointerInfo(),
8025                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
8026                            Alignment, MST->getAAInfo(), MST->getRanges());
8027
8028     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
8029                             MST->isTruncatingStore(),
8030                             MST->isCompressingStore());
8031
8032     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8033                                      MST->isCompressingStore());
8034     unsigned HiOffset = LoMemVT.getStoreSize();
8035
8036     MMO = DAG.getMachineFunction().getMachineMemOperand(
8037         MST->getPointerInfo().getWithOffset(HiOffset),
8038         MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
8039         MST->getAAInfo(), MST->getRanges());
8040
8041     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
8042                             MST->isTruncatingStore(),
8043                             MST->isCompressingStore());
8044
8045     AddToWorklist(Lo.getNode());
8046     AddToWorklist(Hi.getNode());
8047
8048     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
8049   }
8050   return SDValue();
8051 }
8052
8053 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8054   if (Level >= AfterLegalizeTypes)
8055     return SDValue();
8056
8057   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8058   SDValue Mask = MGT->getMask();
8059   SDLoc DL(N);
8060
8061   // If the MGATHER result requires splitting and the mask is provided by a
8062   // SETCC, then split both nodes and its operands before legalization. This
8063   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8064   // and enables future optimizations (e.g. min/max pattern matching on X86).
8065
8066   if (Mask.getOpcode() != ISD::SETCC)
8067     return SDValue();
8068
8069   EVT VT = N->getValueType(0);
8070
8071   // Check if any splitting is required.
8072   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8073       TargetLowering::TypeSplitVector)
8074     return SDValue();
8075
8076   SDValue MaskLo, MaskHi, Lo, Hi;
8077   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8078
8079   SDValue PassThru = MGT->getPassThru();
8080   SDValue PassThruLo, PassThruHi;
8081   std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8082
8083   EVT LoVT, HiVT;
8084   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
8085
8086   SDValue Chain = MGT->getChain();
8087   EVT MemoryVT = MGT->getMemoryVT();
8088   unsigned Alignment = MGT->getOriginalAlignment();
8089
8090   EVT LoMemVT, HiMemVT;
8091   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8092
8093   SDValue Scale = MGT->getScale();
8094   SDValue BasePtr = MGT->getBasePtr();
8095   SDValue Index = MGT->getIndex();
8096   SDValue IndexLo, IndexHi;
8097   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
8098
8099   MachineMemOperand *MMO = DAG.getMachineFunction().
8100     getMachineMemOperand(MGT->getPointerInfo(),
8101                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
8102                           Alignment, MGT->getAAInfo(), MGT->getRanges());
8103
8104   SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
8105   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
8106                            MMO);
8107
8108   SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
8109   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
8110                            MMO);
8111
8112   AddToWorklist(Lo.getNode());
8113   AddToWorklist(Hi.getNode());
8114
8115   // Build a factor node to remember that this load is independent of the
8116   // other one.
8117   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8118                       Hi.getValue(1));
8119
8120   // Legalized the chain result - switch anything that used the old chain to
8121   // use the new one.
8122   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
8123
8124   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8125
8126   SDValue RetOps[] = { GatherRes, Chain };
8127   return DAG.getMergeValues(RetOps, DL);
8128 }
8129
8130 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8131   if (Level >= AfterLegalizeTypes)
8132     return SDValue();
8133
8134   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
8135   SDValue Mask = MLD->getMask();
8136   SDLoc DL(N);
8137
8138   // If the MLOAD result requires splitting and the mask is provided by a
8139   // SETCC, then split both nodes and its operands before legalization. This
8140   // prevents the type legalizer from unrolling SETCC into scalar comparisons
8141   // and enables future optimizations (e.g. min/max pattern matching on X86).
8142   if (Mask.getOpcode() == ISD::SETCC) {
8143     EVT VT = N->getValueType(0);
8144
8145     // Check if any splitting is required.
8146     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
8147         TargetLowering::TypeSplitVector)
8148       return SDValue();
8149
8150     SDValue MaskLo, MaskHi, Lo, Hi;
8151     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
8152
8153     SDValue PassThru = MLD->getPassThru();
8154     SDValue PassThruLo, PassThruHi;
8155     std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
8156
8157     EVT LoVT, HiVT;
8158     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
8159
8160     SDValue Chain = MLD->getChain();
8161     SDValue Ptr   = MLD->getBasePtr();
8162     EVT MemoryVT = MLD->getMemoryVT();
8163     unsigned Alignment = MLD->getOriginalAlignment();
8164
8165     // if Alignment is equal to the vector size,
8166     // take the half of it for the second part
8167     unsigned SecondHalfAlignment =
8168       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
8169          Alignment/2 : Alignment;
8170
8171     EVT LoMemVT, HiMemVT;
8172     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
8173
8174     MachineMemOperand *MMO = DAG.getMachineFunction().
8175     getMachineMemOperand(MLD->getPointerInfo(),
8176                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
8177                          Alignment, MLD->getAAInfo(), MLD->getRanges());
8178
8179     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
8180                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8181
8182     Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
8183                                      MLD->isExpandingLoad());
8184     unsigned HiOffset = LoMemVT.getStoreSize();
8185
8186     MMO = DAG.getMachineFunction().getMachineMemOperand(
8187         MLD->getPointerInfo().getWithOffset(HiOffset),
8188         MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
8189         MLD->getAAInfo(), MLD->getRanges());
8190
8191     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
8192                            MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
8193
8194     AddToWorklist(Lo.getNode());
8195     AddToWorklist(Hi.getNode());
8196
8197     // Build a factor node to remember that this load is independent of the
8198     // other one.
8199     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
8200                         Hi.getValue(1));
8201
8202     // Legalized the chain result - switch anything that used the old chain to
8203     // use the new one.
8204     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
8205
8206     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
8207
8208     SDValue RetOps[] = { LoadRes, Chain };
8209     return DAG.getMergeValues(RetOps, DL);
8210   }
8211   return SDValue();
8212 }
8213
8214 /// A vector select of 2 constant vectors can be simplified to math/logic to
8215 /// avoid a variable select instruction and possibly avoid constant loads.
8216 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8217   SDValue Cond = N->getOperand(0);
8218   SDValue N1 = N->getOperand(1);
8219   SDValue N2 = N->getOperand(2);
8220   EVT VT = N->getValueType(0);
8221   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8222       !TLI.convertSelectOfConstantsToMath(VT) ||
8223       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8224       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8225     return SDValue();
8226
8227   // Check if we can use the condition value to increment/decrement a single
8228   // constant value. This simplifies a select to an add and removes a constant
8229   // load/materialization from the general case.
8230   bool AllAddOne = true;
8231   bool AllSubOne = true;
8232   unsigned Elts = VT.getVectorNumElements();
8233   for (unsigned i = 0; i != Elts; ++i) {
8234     SDValue N1Elt = N1.getOperand(i);
8235     SDValue N2Elt = N2.getOperand(i);
8236     if (N1Elt.isUndef() || N2Elt.isUndef())
8237       continue;
8238
8239     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8240     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8241     if (C1 != C2 + 1)
8242       AllAddOne = false;
8243     if (C1 != C2 - 1)
8244       AllSubOne = false;
8245   }
8246
8247   // Further simplifications for the extra-special cases where the constants are
8248   // all 0 or all -1 should be implemented as folds of these patterns.
8249   SDLoc DL(N);
8250   if (AllAddOne || AllSubOne) {
8251     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8252     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8253     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8254     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8255     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8256   }
8257
8258   // The general case for select-of-constants:
8259   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8260   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8261   // leave that to a machine-specific pass.
8262   return SDValue();
8263 }
8264
8265 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8266   SDValue N0 = N->getOperand(0);
8267   SDValue N1 = N->getOperand(1);
8268   SDValue N2 = N->getOperand(2);
8269   EVT VT = N->getValueType(0);
8270   SDLoc DL(N);
8271
8272   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8273     return V;
8274
8275   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8276   if (SDValue F = extractBooleanFlip(N0, TLI))
8277     return DAG.getSelect(DL, VT, F, N2, N1);
8278
8279   // Canonicalize integer abs.
8280   // vselect (setg[te] X,  0),  X, -X ->
8281   // vselect (setgt    X, -1),  X, -X ->
8282   // vselect (setl[te] X,  0), -X,  X ->
8283   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8284   if (N0.getOpcode() == ISD::SETCC) {
8285     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8286     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8287     bool isAbs = false;
8288     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8289
8290     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8291          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8292         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8293       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8294     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8295              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8296       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8297
8298     if (isAbs) {
8299       EVT VT = LHS.getValueType();
8300       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8301         return DAG.getNode(ISD::ABS, DL, VT, LHS);
8302
8303       SDValue Shift = DAG.getNode(
8304           ISD::SRA, DL, VT, LHS,
8305           DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
8306       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8307       AddToWorklist(Shift.getNode());
8308       AddToWorklist(Add.getNode());
8309       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8310     }
8311
8312     // vselect x, y (fcmp lt x, y) -> fminnum x, y
8313     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8314     //
8315     // This is OK if we don't care about what happens if either operand is a
8316     // NaN.
8317     //
8318     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
8319                                                        N0.getOperand(1), TLI)) {
8320       if (SDValue FMinMax = combineMinNumMaxNum(
8321               DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
8322         return FMinMax;
8323     }
8324
8325     // If this select has a condition (setcc) with narrower operands than the
8326     // select, try to widen the compare to match the select width.
8327     // TODO: This should be extended to handle any constant.
8328     // TODO: This could be extended to handle non-loading patterns, but that
8329     //       requires thorough testing to avoid regressions.
8330     if (isNullOrNullSplat(RHS)) {
8331       EVT NarrowVT = LHS.getValueType();
8332       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8333       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8334       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8335       unsigned WideWidth = WideVT.getScalarSizeInBits();
8336       bool IsSigned = isSignedIntSetCC(CC);
8337       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8338       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8339           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8340           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8341           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8342         // Both compare operands can be widened for free. The LHS can use an
8343         // extended load, and the RHS is a constant:
8344         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8345         //   vselect (setcc extload(X), C'), N1, N2
8346         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8347         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8348         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8349         EVT WideSetCCVT = getSetCCResultType(WideVT);
8350         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8351         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8352       }
8353     }
8354   }
8355
8356   if (SimplifySelectOps(N, N1, N2))
8357     return SDValue(N, 0);  // Don't revisit N.
8358
8359   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8360   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8361     return N1;
8362   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8363   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8364     return N2;
8365
8366   // The ConvertSelectToConcatVector function is assuming both the above
8367   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8368   // and addressed.
8369   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8370       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8371       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8372     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8373       return CV;
8374   }
8375
8376   if (SDValue V = foldVSelectOfConstants(N))
8377     return V;
8378
8379   return SDValue();
8380 }
8381
8382 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8383   SDValue N0 = N->getOperand(0);
8384   SDValue N1 = N->getOperand(1);
8385   SDValue N2 = N->getOperand(2);
8386   SDValue N3 = N->getOperand(3);
8387   SDValue N4 = N->getOperand(4);
8388   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8389
8390   // fold select_cc lhs, rhs, x, x, cc -> x
8391   if (N2 == N3)
8392     return N2;
8393
8394   // Determine if the condition we're dealing with is constant
8395   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8396                                   CC, SDLoc(N), false)) {
8397     AddToWorklist(SCC.getNode());
8398
8399     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8400       if (!SCCC->isNullValue())
8401         return N2;    // cond always true -> true val
8402       else
8403         return N3;    // cond always false -> false val
8404     } else if (SCC->isUndef()) {
8405       // When the condition is UNDEF, just return the first operand. This is
8406       // coherent the DAG creation, no setcc node is created in this case
8407       return N2;
8408     } else if (SCC.getOpcode() == ISD::SETCC) {
8409       // Fold to a simpler select_cc
8410       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
8411                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
8412                          SCC.getOperand(2));
8413     }
8414   }
8415
8416   // If we can fold this based on the true/false value, do so.
8417   if (SimplifySelectOps(N, N2, N3))
8418     return SDValue(N, 0);  // Don't revisit N.
8419
8420   // fold select_cc into other things, such as min/max/abs
8421   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8422 }
8423
8424 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8425   // setcc is very commonly used as an argument to brcond. This pattern
8426   // also lend itself to numerous combines and, as a result, it is desired
8427   // we keep the argument to a brcond as a setcc as much as possible.
8428   bool PreferSetCC =
8429       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8430
8431   SDValue Combined = SimplifySetCC(
8432       N->getValueType(0), N->getOperand(0), N->getOperand(1),
8433       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8434
8435   if (!Combined)
8436     return SDValue();
8437
8438   // If we prefer to have a setcc, and we don't, we'll try our best to
8439   // recreate one using rebuildSetCC.
8440   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8441     SDValue NewSetCC = rebuildSetCC(Combined);
8442
8443     // We don't have anything interesting to combine to.
8444     if (NewSetCC.getNode() == N)
8445       return SDValue();
8446
8447     if (NewSetCC)
8448       return NewSetCC;
8449   }
8450
8451   return Combined;
8452 }
8453
8454 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8455   SDValue LHS = N->getOperand(0);
8456   SDValue RHS = N->getOperand(1);
8457   SDValue Carry = N->getOperand(2);
8458   SDValue Cond = N->getOperand(3);
8459
8460   // If Carry is false, fold to a regular SETCC.
8461   if (isNullConstant(Carry))
8462     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
8463
8464   return SDValue();
8465 }
8466
8467 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8468 /// a build_vector of constants.
8469 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8470 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8471 /// Vector extends are not folded if operations are legal; this is to
8472 /// avoid introducing illegal build_vector dag nodes.
8473 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
8474                                          SelectionDAG &DAG, bool LegalTypes) {
8475   unsigned Opcode = N->getOpcode();
8476   SDValue N0 = N->getOperand(0);
8477   EVT VT = N->getValueType(0);
8478
8479   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8480          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8481          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
8482          && "Expected EXTEND dag node in input!");
8483
8484   // fold (sext c1) -> c1
8485   // fold (zext c1) -> c1
8486   // fold (aext c1) -> c1
8487   if (isa<ConstantSDNode>(N0))
8488     return DAG.getNode(Opcode, SDLoc(N), VT, N0);
8489
8490   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8491   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8492   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8493   EVT SVT = VT.getScalarType();
8494   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
8495       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
8496     return SDValue();
8497
8498   // We can fold this node into a build_vector.
8499   unsigned VTBits = SVT.getSizeInBits();
8500   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8501   SmallVector<SDValue, 8> Elts;
8502   unsigned NumElts = VT.getVectorNumElements();
8503   SDLoc DL(N);
8504
8505   // For zero-extensions, UNDEF elements still guarantee to have the upper
8506   // bits set to zero.
8507   bool IsZext =
8508       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
8509
8510   for (unsigned i = 0; i != NumElts; ++i) {
8511     SDValue Op = N0.getOperand(i);
8512     if (Op.isUndef()) {
8513       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
8514       continue;
8515     }
8516
8517     SDLoc DL(Op);
8518     // Get the constant value and if needed trunc it to the size of the type.
8519     // Nodes like build_vector might have constants wider than the scalar type.
8520     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8521     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8522       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8523     else
8524       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8525   }
8526
8527   return DAG.getBuildVector(VT, DL, Elts);
8528 }
8529
8530 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8531 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8532 // transformation. Returns true if extension are possible and the above
8533 // mentioned transformation is profitable.
8534 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8535                                     unsigned ExtOpc,
8536                                     SmallVectorImpl<SDNode *> &ExtendNodes,
8537                                     const TargetLowering &TLI) {
8538   bool HasCopyToRegUses = false;
8539   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8540   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8541                             UE = N0.getNode()->use_end();
8542        UI != UE; ++UI) {
8543     SDNode *User = *UI;
8544     if (User == N)
8545       continue;
8546     if (UI.getUse().getResNo() != N0.getResNo())
8547       continue;
8548     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8549     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8550       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8551       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8552         // Sign bits will be lost after a zext.
8553         return false;
8554       bool Add = false;
8555       for (unsigned i = 0; i != 2; ++i) {
8556         SDValue UseOp = User->getOperand(i);
8557         if (UseOp == N0)
8558           continue;
8559         if (!isa<ConstantSDNode>(UseOp))
8560           return false;
8561         Add = true;
8562       }
8563       if (Add)
8564         ExtendNodes.push_back(User);
8565       continue;
8566     }
8567     // If truncates aren't free and there are users we can't
8568     // extend, it isn't worthwhile.
8569     if (!isTruncFree)
8570       return false;
8571     // Remember if this value is live-out.
8572     if (User->getOpcode() == ISD::CopyToReg)
8573       HasCopyToRegUses = true;
8574   }
8575
8576   if (HasCopyToRegUses) {
8577     bool BothLiveOut = false;
8578     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
8579          UI != UE; ++UI) {
8580       SDUse &Use = UI.getUse();
8581       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
8582         BothLiveOut = true;
8583         break;
8584       }
8585     }
8586     if (BothLiveOut)
8587       // Both unextended and extended values are live out. There had better be
8588       // a good reason for the transformation.
8589       return ExtendNodes.size();
8590   }
8591   return true;
8592 }
8593
8594 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
8595                                   SDValue OrigLoad, SDValue ExtLoad,
8596                                   ISD::NodeType ExtType) {
8597   // Extend SetCC uses if necessary.
8598   SDLoc DL(ExtLoad);
8599   for (SDNode *SetCC : SetCCs) {
8600     SmallVector<SDValue, 4> Ops;
8601
8602     for (unsigned j = 0; j != 2; ++j) {
8603       SDValue SOp = SetCC->getOperand(j);
8604       if (SOp == OrigLoad)
8605         Ops.push_back(ExtLoad);
8606       else
8607         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
8608     }
8609
8610     Ops.push_back(SetCC->getOperand(2));
8611     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
8612   }
8613 }
8614
8615 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
8616 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
8617   SDValue N0 = N->getOperand(0);
8618   EVT DstVT = N->getValueType(0);
8619   EVT SrcVT = N0.getValueType();
8620
8621   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8622           N->getOpcode() == ISD::ZERO_EXTEND) &&
8623          "Unexpected node type (not an extend)!");
8624
8625   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
8626   // For example, on a target with legal v4i32, but illegal v8i32, turn:
8627   //   (v8i32 (sext (v8i16 (load x))))
8628   // into:
8629   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
8630   //                          (v4i32 (sextload (x + 16)))))
8631   // Where uses of the original load, i.e.:
8632   //   (v8i16 (load x))
8633   // are replaced with:
8634   //   (v8i16 (truncate
8635   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
8636   //                            (v4i32 (sextload (x + 16)))))))
8637   //
8638   // This combine is only applicable to illegal, but splittable, vectors.
8639   // All legal types, and illegal non-vector types, are handled elsewhere.
8640   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
8641   //
8642   if (N0->getOpcode() != ISD::LOAD)
8643     return SDValue();
8644
8645   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8646
8647   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
8648       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
8649       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
8650     return SDValue();
8651
8652   SmallVector<SDNode *, 4> SetCCs;
8653   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
8654     return SDValue();
8655
8656   ISD::LoadExtType ExtType =
8657       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8658
8659   // Try to split the vector types to get down to legal types.
8660   EVT SplitSrcVT = SrcVT;
8661   EVT SplitDstVT = DstVT;
8662   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
8663          SplitSrcVT.getVectorNumElements() > 1) {
8664     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
8665     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
8666   }
8667
8668   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
8669     return SDValue();
8670
8671   SDLoc DL(N);
8672   const unsigned NumSplits =
8673       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
8674   const unsigned Stride = SplitSrcVT.getStoreSize();
8675   SmallVector<SDValue, 4> Loads;
8676   SmallVector<SDValue, 4> Chains;
8677
8678   SDValue BasePtr = LN0->getBasePtr();
8679   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
8680     const unsigned Offset = Idx * Stride;
8681     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
8682
8683     SDValue SplitLoad = DAG.getExtLoad(
8684         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
8685         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
8686         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
8687
8688     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
8689                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
8690
8691     Loads.push_back(SplitLoad.getValue(0));
8692     Chains.push_back(SplitLoad.getValue(1));
8693   }
8694
8695   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
8696   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
8697
8698   // Simplify TF.
8699   AddToWorklist(NewChain.getNode());
8700
8701   CombineTo(N, NewValue);
8702
8703   // Replace uses of the original load (before extension)
8704   // with a truncate of the concatenated sextloaded vectors.
8705   SDValue Trunc =
8706       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
8707   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
8708   CombineTo(N0.getNode(), Trunc, NewChain);
8709   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8710 }
8711
8712 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
8713 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
8714 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
8715   assert(N->getOpcode() == ISD::ZERO_EXTEND);
8716   EVT VT = N->getValueType(0);
8717   EVT OrigVT = N->getOperand(0).getValueType();
8718   if (TLI.isZExtFree(OrigVT, VT))
8719     return SDValue();
8720
8721   // and/or/xor
8722   SDValue N0 = N->getOperand(0);
8723   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
8724         N0.getOpcode() == ISD::XOR) ||
8725       N0.getOperand(1).getOpcode() != ISD::Constant ||
8726       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
8727     return SDValue();
8728
8729   // shl/shr
8730   SDValue N1 = N0->getOperand(0);
8731   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
8732       N1.getOperand(1).getOpcode() != ISD::Constant ||
8733       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
8734     return SDValue();
8735
8736   // load
8737   if (!isa<LoadSDNode>(N1.getOperand(0)))
8738     return SDValue();
8739   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
8740   EVT MemVT = Load->getMemoryVT();
8741   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
8742       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
8743     return SDValue();
8744
8745
8746   // If the shift op is SHL, the logic op must be AND, otherwise the result
8747   // will be wrong.
8748   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
8749     return SDValue();
8750
8751   if (!N0.hasOneUse() || !N1.hasOneUse())
8752     return SDValue();
8753
8754   SmallVector<SDNode*, 4> SetCCs;
8755   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
8756                                ISD::ZERO_EXTEND, SetCCs, TLI))
8757     return SDValue();
8758
8759   // Actually do the transformation.
8760   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
8761                                    Load->getChain(), Load->getBasePtr(),
8762                                    Load->getMemoryVT(), Load->getMemOperand());
8763
8764   SDLoc DL1(N1);
8765   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
8766                               N1.getOperand(1));
8767
8768   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
8769   Mask = Mask.zext(VT.getSizeInBits());
8770   SDLoc DL0(N0);
8771   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
8772                             DAG.getConstant(Mask, DL0, VT));
8773
8774   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
8775   CombineTo(N, And);
8776   if (SDValue(Load, 0).hasOneUse()) {
8777     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
8778   } else {
8779     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
8780                                 Load->getValueType(0), ExtLoad);
8781     CombineTo(Load, Trunc, ExtLoad.getValue(1));
8782   }
8783
8784   // N0 is dead at this point.
8785   recursivelyDeleteUnusedNodes(N0.getNode());
8786
8787   return SDValue(N,0); // Return N so it doesn't get rechecked!
8788 }
8789
8790 /// If we're narrowing or widening the result of a vector select and the final
8791 /// size is the same size as a setcc (compare) feeding the select, then try to
8792 /// apply the cast operation to the select's operands because matching vector
8793 /// sizes for a select condition and other operands should be more efficient.
8794 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
8795   unsigned CastOpcode = Cast->getOpcode();
8796   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
8797           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
8798           CastOpcode == ISD::FP_ROUND) &&
8799          "Unexpected opcode for vector select narrowing/widening");
8800
8801   // We only do this transform before legal ops because the pattern may be
8802   // obfuscated by target-specific operations after legalization. Do not create
8803   // an illegal select op, however, because that may be difficult to lower.
8804   EVT VT = Cast->getValueType(0);
8805   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
8806     return SDValue();
8807
8808   SDValue VSel = Cast->getOperand(0);
8809   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
8810       VSel.getOperand(0).getOpcode() != ISD::SETCC)
8811     return SDValue();
8812
8813   // Does the setcc have the same vector size as the casted select?
8814   SDValue SetCC = VSel.getOperand(0);
8815   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
8816   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
8817     return SDValue();
8818
8819   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
8820   SDValue A = VSel.getOperand(1);
8821   SDValue B = VSel.getOperand(2);
8822   SDValue CastA, CastB;
8823   SDLoc DL(Cast);
8824   if (CastOpcode == ISD::FP_ROUND) {
8825     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
8826     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
8827     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
8828   } else {
8829     CastA = DAG.getNode(CastOpcode, DL, VT, A);
8830     CastB = DAG.getNode(CastOpcode, DL, VT, B);
8831   }
8832   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
8833 }
8834
8835 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8836 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8837 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
8838                                      const TargetLowering &TLI, EVT VT,
8839                                      bool LegalOperations, SDNode *N,
8840                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
8841   SDNode *N0Node = N0.getNode();
8842   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
8843                                                    : ISD::isZEXTLoad(N0Node);
8844   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
8845       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
8846     return SDValue();
8847
8848   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8849   EVT MemVT = LN0->getMemoryVT();
8850   if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
8851       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
8852     return SDValue();
8853
8854   SDValue ExtLoad =
8855       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8856                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
8857   Combiner.CombineTo(N, ExtLoad);
8858   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8859   if (LN0->use_empty())
8860     Combiner.recursivelyDeleteUnusedNodes(LN0);
8861   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8862 }
8863
8864 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
8865 // Only generate vector extloads when 1) they're legal, and 2) they are
8866 // deemed desirable by the target.
8867 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
8868                                   const TargetLowering &TLI, EVT VT,
8869                                   bool LegalOperations, SDNode *N, SDValue N0,
8870                                   ISD::LoadExtType ExtLoadType,
8871                                   ISD::NodeType ExtOpc) {
8872   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
8873       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
8874       ((LegalOperations || VT.isVector() ||
8875         cast<LoadSDNode>(N0)->isVolatile()) &&
8876        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
8877     return {};
8878
8879   bool DoXform = true;
8880   SmallVector<SDNode *, 4> SetCCs;
8881   if (!N0.hasOneUse())
8882     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
8883   if (VT.isVector())
8884     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
8885   if (!DoXform)
8886     return {};
8887
8888   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
8889   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
8890                                    LN0->getBasePtr(), N0.getValueType(),
8891                                    LN0->getMemOperand());
8892   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
8893   // If the load value is used only by N, replace it via CombineTo N.
8894   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
8895   Combiner.CombineTo(N, ExtLoad);
8896   if (NoReplaceTrunc) {
8897     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
8898     Combiner.recursivelyDeleteUnusedNodes(LN0);
8899   } else {
8900     SDValue Trunc =
8901         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
8902     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
8903   }
8904   return SDValue(N, 0); // Return N so it doesn't get rechecked!
8905 }
8906
8907 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
8908                                        bool LegalOperations) {
8909   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
8910           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
8911
8912   SDValue SetCC = N->getOperand(0);
8913   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
8914       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
8915     return SDValue();
8916
8917   SDValue X = SetCC.getOperand(0);
8918   SDValue Ones = SetCC.getOperand(1);
8919   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
8920   EVT VT = N->getValueType(0);
8921   EVT XVT = X.getValueType();
8922   // setge X, C is canonicalized to setgt, so we do not need to match that
8923   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
8924   // not require the 'not' op.
8925   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
8926     // Invert and smear/shift the sign bit:
8927     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
8928     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
8929     SDLoc DL(N);
8930     SDValue NotX = DAG.getNOT(DL, X, VT);
8931     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
8932     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
8933     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
8934   }
8935   return SDValue();
8936 }
8937
8938 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
8939   SDValue N0 = N->getOperand(0);
8940   EVT VT = N->getValueType(0);
8941   SDLoc DL(N);
8942
8943   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
8944     return Res;
8945
8946   // fold (sext (sext x)) -> (sext x)
8947   // fold (sext (aext x)) -> (sext x)
8948   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
8949     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
8950
8951   if (N0.getOpcode() == ISD::TRUNCATE) {
8952     // fold (sext (truncate (load x))) -> (sext (smaller load x))
8953     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
8954     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
8955       SDNode *oye = N0.getOperand(0).getNode();
8956       if (NarrowLoad.getNode() != N0.getNode()) {
8957         CombineTo(N0.getNode(), NarrowLoad);
8958         // CombineTo deleted the truncate, if needed, but not what's under it.
8959         AddToWorklist(oye);
8960       }
8961       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
8962     }
8963
8964     // See if the value being truncated is already sign extended.  If so, just
8965     // eliminate the trunc/sext pair.
8966     SDValue Op = N0.getOperand(0);
8967     unsigned OpBits   = Op.getScalarValueSizeInBits();
8968     unsigned MidBits  = N0.getScalarValueSizeInBits();
8969     unsigned DestBits = VT.getScalarSizeInBits();
8970     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
8971
8972     if (OpBits == DestBits) {
8973       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
8974       // bits, it is already ready.
8975       if (NumSignBits > DestBits-MidBits)
8976         return Op;
8977     } else if (OpBits < DestBits) {
8978       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
8979       // bits, just sext from i32.
8980       if (NumSignBits > OpBits-MidBits)
8981         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
8982     } else {
8983       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
8984       // bits, just truncate to i32.
8985       if (NumSignBits > OpBits-MidBits)
8986         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
8987     }
8988
8989     // fold (sext (truncate x)) -> (sextinreg x).
8990     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
8991                                                  N0.getValueType())) {
8992       if (OpBits < DestBits)
8993         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
8994       else if (OpBits > DestBits)
8995         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
8996       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
8997                          DAG.getValueType(N0.getValueType()));
8998     }
8999   }
9000
9001   // Try to simplify (sext (load x)).
9002   if (SDValue foldedExt =
9003           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9004                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9005     return foldedExt;
9006
9007   // fold (sext (load x)) to multiple smaller sextloads.
9008   // Only on illegal but splittable vectors.
9009   if (SDValue ExtLoad = CombineExtLoad(N))
9010     return ExtLoad;
9011
9012   // Try to simplify (sext (sextload x)).
9013   if (SDValue foldedExt = tryToFoldExtOfExtload(
9014           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9015     return foldedExt;
9016
9017   // fold (sext (and/or/xor (load x), cst)) ->
9018   //      (and/or/xor (sextload x), (sext cst))
9019   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9020        N0.getOpcode() == ISD::XOR) &&
9021       isa<LoadSDNode>(N0.getOperand(0)) &&
9022       N0.getOperand(1).getOpcode() == ISD::Constant &&
9023       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9024     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9025     EVT MemVT = LN00->getMemoryVT();
9026     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9027       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9028       SmallVector<SDNode*, 4> SetCCs;
9029       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9030                                              ISD::SIGN_EXTEND, SetCCs, TLI);
9031       if (DoXform) {
9032         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9033                                          LN00->getChain(), LN00->getBasePtr(),
9034                                          LN00->getMemoryVT(),
9035                                          LN00->getMemOperand());
9036         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9037         Mask = Mask.sext(VT.getSizeInBits());
9038         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9039                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9040         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9041         bool NoReplaceTruncAnd = !N0.hasOneUse();
9042         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9043         CombineTo(N, And);
9044         // If N0 has multiple uses, change other uses as well.
9045         if (NoReplaceTruncAnd) {
9046           SDValue TruncAnd =
9047               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9048           CombineTo(N0.getNode(), TruncAnd);
9049         }
9050         if (NoReplaceTrunc) {
9051           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9052         } else {
9053           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9054                                       LN00->getValueType(0), ExtLoad);
9055           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9056         }
9057         return SDValue(N,0); // Return N so it doesn't get rechecked!
9058       }
9059     }
9060   }
9061
9062   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9063     return V;
9064
9065   if (N0.getOpcode() == ISD::SETCC) {
9066     SDValue N00 = N0.getOperand(0);
9067     SDValue N01 = N0.getOperand(1);
9068     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9069     EVT N00VT = N0.getOperand(0).getValueType();
9070
9071     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9072     // Only do this before legalize for now.
9073     if (VT.isVector() && !LegalOperations &&
9074         TLI.getBooleanContents(N00VT) ==
9075             TargetLowering::ZeroOrNegativeOneBooleanContent) {
9076       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9077       // of the same size as the compared operands. Only optimize sext(setcc())
9078       // if this is the case.
9079       EVT SVT = getSetCCResultType(N00VT);
9080
9081       // If we already have the desired type, don't change it.
9082       if (SVT != N0.getValueType()) {
9083         // We know that the # elements of the results is the same as the
9084         // # elements of the compare (and the # elements of the compare result
9085         // for that matter).  Check to see that they are the same size.  If so,
9086         // we know that the element size of the sext'd result matches the
9087         // element size of the compare operands.
9088         if (VT.getSizeInBits() == SVT.getSizeInBits())
9089           return DAG.getSetCC(DL, VT, N00, N01, CC);
9090
9091         // If the desired elements are smaller or larger than the source
9092         // elements, we can use a matching integer vector type and then
9093         // truncate/sign extend.
9094         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9095         if (SVT == MatchingVecType) {
9096           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9097           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9098         }
9099       }
9100     }
9101
9102     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9103     // Here, T can be 1 or -1, depending on the type of the setcc and
9104     // getBooleanContents().
9105     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9106
9107     // To determine the "true" side of the select, we need to know the high bit
9108     // of the value returned by the setcc if it evaluates to true.
9109     // If the type of the setcc is i1, then the true case of the select is just
9110     // sext(i1 1), that is, -1.
9111     // If the type of the setcc is larger (say, i8) then the value of the high
9112     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9113     // of the appropriate width.
9114     SDValue ExtTrueVal = (SetCCWidth == 1)
9115                              ? DAG.getAllOnesConstant(DL, VT)
9116                              : DAG.getBoolConstant(true, DL, VT, N00VT);
9117     SDValue Zero = DAG.getConstant(0, DL, VT);
9118     if (SDValue SCC =
9119             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9120       return SCC;
9121
9122     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9123       EVT SetCCVT = getSetCCResultType(N00VT);
9124       // Don't do this transform for i1 because there's a select transform
9125       // that would reverse it.
9126       // TODO: We should not do this transform at all without a target hook
9127       // because a sext is likely cheaper than a select?
9128       if (SetCCVT.getScalarSizeInBits() != 1 &&
9129           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9130         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9131         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9132       }
9133     }
9134   }
9135
9136   // fold (sext x) -> (zext x) if the sign bit is known zero.
9137   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9138       DAG.SignBitIsZero(N0))
9139     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9140
9141   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9142     return NewVSel;
9143
9144   // Eliminate this sign extend by doing a negation in the destination type:
9145   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9146   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9147       isNullOrNullSplat(N0.getOperand(0)) &&
9148       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
9149       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
9150     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9151     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9152   }
9153   // Eliminate this sign extend by doing a decrement in the destination type:
9154   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9155   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9156       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
9157       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9158       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
9159     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9160     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9161   }
9162
9163   return SDValue();
9164 }
9165
9166 // isTruncateOf - If N is a truncate of some other value, return true, record
9167 // the value being truncated in Op and which of Op's bits are zero/one in Known.
9168 // This function computes KnownBits to avoid a duplicated call to
9169 // computeKnownBits in the caller.
9170 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9171                          KnownBits &Known) {
9172   if (N->getOpcode() == ISD::TRUNCATE) {
9173     Op = N->getOperand(0);
9174     Known = DAG.computeKnownBits(Op);
9175     return true;
9176   }
9177
9178   if (N.getOpcode() != ISD::SETCC ||
9179       N.getValueType().getScalarType() != MVT::i1 ||
9180       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9181     return false;
9182
9183   SDValue Op0 = N->getOperand(0);
9184   SDValue Op1 = N->getOperand(1);
9185   assert(Op0.getValueType() == Op1.getValueType());
9186
9187   if (isNullOrNullSplat(Op0))
9188     Op = Op1;
9189   else if (isNullOrNullSplat(Op1))
9190     Op = Op0;
9191   else
9192     return false;
9193
9194   Known = DAG.computeKnownBits(Op);
9195
9196   return (Known.Zero | 1).isAllOnesValue();
9197 }
9198
9199 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9200   SDValue N0 = N->getOperand(0);
9201   EVT VT = N->getValueType(0);
9202
9203   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9204     return Res;
9205
9206   // fold (zext (zext x)) -> (zext x)
9207   // fold (zext (aext x)) -> (zext x)
9208   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9209     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9210                        N0.getOperand(0));
9211
9212   // fold (zext (truncate x)) -> (zext x) or
9213   //      (zext (truncate x)) -> (truncate x)
9214   // This is valid when the truncated bits of x are already zero.
9215   SDValue Op;
9216   KnownBits Known;
9217   if (isTruncateOf(DAG, N0, Op, Known)) {
9218     APInt TruncatedBits =
9219       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9220       APInt(Op.getScalarValueSizeInBits(), 0) :
9221       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9222                         N0.getScalarValueSizeInBits(),
9223                         std::min(Op.getScalarValueSizeInBits(),
9224                                  VT.getScalarSizeInBits()));
9225     if (TruncatedBits.isSubsetOf(Known.Zero))
9226       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9227   }
9228
9229   // fold (zext (truncate x)) -> (and x, mask)
9230   if (N0.getOpcode() == ISD::TRUNCATE) {
9231     // fold (zext (truncate (load x))) -> (zext (smaller load x))
9232     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9233     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9234       SDNode *oye = N0.getOperand(0).getNode();
9235       if (NarrowLoad.getNode() != N0.getNode()) {
9236         CombineTo(N0.getNode(), NarrowLoad);
9237         // CombineTo deleted the truncate, if needed, but not what's under it.
9238         AddToWorklist(oye);
9239       }
9240       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9241     }
9242
9243     EVT SrcVT = N0.getOperand(0).getValueType();
9244     EVT MinVT = N0.getValueType();
9245
9246     // Try to mask before the extension to avoid having to generate a larger mask,
9247     // possibly over several sub-vectors.
9248     if (SrcVT.bitsLT(VT) && VT.isVector()) {
9249       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9250                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9251         SDValue Op = N0.getOperand(0);
9252         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9253         AddToWorklist(Op.getNode());
9254         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9255         // Transfer the debug info; the new node is equivalent to N0.
9256         DAG.transferDbgValues(N0, ZExtOrTrunc);
9257         return ZExtOrTrunc;
9258       }
9259     }
9260
9261     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9262       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9263       AddToWorklist(Op.getNode());
9264       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9265       // We may safely transfer the debug info describing the truncate node over
9266       // to the equivalent and operation.
9267       DAG.transferDbgValues(N0, And);
9268       return And;
9269     }
9270   }
9271
9272   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9273   // if either of the casts is not free.
9274   if (N0.getOpcode() == ISD::AND &&
9275       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9276       N0.getOperand(1).getOpcode() == ISD::Constant &&
9277       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9278                            N0.getValueType()) ||
9279        !TLI.isZExtFree(N0.getValueType(), VT))) {
9280     SDValue X = N0.getOperand(0).getOperand(0);
9281     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9282     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9283     Mask = Mask.zext(VT.getSizeInBits());
9284     SDLoc DL(N);
9285     return DAG.getNode(ISD::AND, DL, VT,
9286                        X, DAG.getConstant(Mask, DL, VT));
9287   }
9288
9289   // Try to simplify (zext (load x)).
9290   if (SDValue foldedExt =
9291           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9292                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9293     return foldedExt;
9294
9295   // fold (zext (load x)) to multiple smaller zextloads.
9296   // Only on illegal but splittable vectors.
9297   if (SDValue ExtLoad = CombineExtLoad(N))
9298     return ExtLoad;
9299
9300   // fold (zext (and/or/xor (load x), cst)) ->
9301   //      (and/or/xor (zextload x), (zext cst))
9302   // Unless (and (load x) cst) will match as a zextload already and has
9303   // additional users.
9304   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9305        N0.getOpcode() == ISD::XOR) &&
9306       isa<LoadSDNode>(N0.getOperand(0)) &&
9307       N0.getOperand(1).getOpcode() == ISD::Constant &&
9308       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9309     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9310     EVT MemVT = LN00->getMemoryVT();
9311     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9312         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9313       bool DoXform = true;
9314       SmallVector<SDNode*, 4> SetCCs;
9315       if (!N0.hasOneUse()) {
9316         if (N0.getOpcode() == ISD::AND) {
9317           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9318           EVT LoadResultTy = AndC->getValueType(0);
9319           EVT ExtVT;
9320           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9321             DoXform = false;
9322         }
9323       }
9324       if (DoXform)
9325         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9326                                           ISD::ZERO_EXTEND, SetCCs, TLI);
9327       if (DoXform) {
9328         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9329                                          LN00->getChain(), LN00->getBasePtr(),
9330                                          LN00->getMemoryVT(),
9331                                          LN00->getMemOperand());
9332         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9333         Mask = Mask.zext(VT.getSizeInBits());
9334         SDLoc DL(N);
9335         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9336                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9337         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9338         bool NoReplaceTruncAnd = !N0.hasOneUse();
9339         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9340         CombineTo(N, And);
9341         // If N0 has multiple uses, change other uses as well.
9342         if (NoReplaceTruncAnd) {
9343           SDValue TruncAnd =
9344               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9345           CombineTo(N0.getNode(), TruncAnd);
9346         }
9347         if (NoReplaceTrunc) {
9348           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9349         } else {
9350           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9351                                       LN00->getValueType(0), ExtLoad);
9352           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9353         }
9354         return SDValue(N,0); // Return N so it doesn't get rechecked!
9355       }
9356     }
9357   }
9358
9359   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9360   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9361   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9362     return ZExtLoad;
9363
9364   // Try to simplify (zext (zextload x)).
9365   if (SDValue foldedExt = tryToFoldExtOfExtload(
9366           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9367     return foldedExt;
9368
9369   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9370     return V;
9371
9372   if (N0.getOpcode() == ISD::SETCC) {
9373     // Only do this before legalize for now.
9374     if (!LegalOperations && VT.isVector() &&
9375         N0.getValueType().getVectorElementType() == MVT::i1) {
9376       EVT N00VT = N0.getOperand(0).getValueType();
9377       if (getSetCCResultType(N00VT) == N0.getValueType())
9378         return SDValue();
9379
9380       // We know that the # elements of the results is the same as the #
9381       // elements of the compare (and the # elements of the compare result for
9382       // that matter). Check to see that they are the same size. If so, we know
9383       // that the element size of the sext'd result matches the element size of
9384       // the compare operands.
9385       SDLoc DL(N);
9386       SDValue VecOnes = DAG.getConstant(1, DL, VT);
9387       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9388         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9389         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9390                                      N0.getOperand(1), N0.getOperand(2));
9391         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9392       }
9393
9394       // If the desired elements are smaller or larger than the source
9395       // elements we can use a matching integer vector type and then
9396       // truncate/sign extend.
9397       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9398       SDValue VsetCC =
9399           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9400                       N0.getOperand(1), N0.getOperand(2));
9401       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9402                          VecOnes);
9403     }
9404
9405     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9406     SDLoc DL(N);
9407     if (SDValue SCC = SimplifySelectCC(
9408             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9409             DAG.getConstant(0, DL, VT),
9410             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9411       return SCC;
9412   }
9413
9414   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9415   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9416       isa<ConstantSDNode>(N0.getOperand(1)) &&
9417       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9418       N0.hasOneUse()) {
9419     SDValue ShAmt = N0.getOperand(1);
9420     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
9421     if (N0.getOpcode() == ISD::SHL) {
9422       SDValue InnerZExt = N0.getOperand(0);
9423       // If the original shl may be shifting out bits, do not perform this
9424       // transformation.
9425       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9426         InnerZExt.getOperand(0).getValueSizeInBits();
9427       if (ShAmtVal > KnownZeroBits)
9428         return SDValue();
9429     }
9430
9431     SDLoc DL(N);
9432
9433     // Ensure that the shift amount is wide enough for the shifted value.
9434     if (VT.getSizeInBits() >= 256)
9435       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9436
9437     return DAG.getNode(N0.getOpcode(), DL, VT,
9438                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
9439                        ShAmt);
9440   }
9441
9442   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9443     return NewVSel;
9444
9445   return SDValue();
9446 }
9447
9448 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
9449   SDValue N0 = N->getOperand(0);
9450   EVT VT = N->getValueType(0);
9451
9452   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9453     return Res;
9454
9455   // fold (aext (aext x)) -> (aext x)
9456   // fold (aext (zext x)) -> (zext x)
9457   // fold (aext (sext x)) -> (sext x)
9458   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
9459       N0.getOpcode() == ISD::ZERO_EXTEND ||
9460       N0.getOpcode() == ISD::SIGN_EXTEND)
9461     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9462
9463   // fold (aext (truncate (load x))) -> (aext (smaller load x))
9464   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9465   if (N0.getOpcode() == ISD::TRUNCATE) {
9466     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9467       SDNode *oye = N0.getOperand(0).getNode();
9468       if (NarrowLoad.getNode() != N0.getNode()) {
9469         CombineTo(N0.getNode(), NarrowLoad);
9470         // CombineTo deleted the truncate, if needed, but not what's under it.
9471         AddToWorklist(oye);
9472       }
9473       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9474     }
9475   }
9476
9477   // fold (aext (truncate x))
9478   if (N0.getOpcode() == ISD::TRUNCATE)
9479     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9480
9481   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9482   // if the trunc is not free.
9483   if (N0.getOpcode() == ISD::AND &&
9484       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9485       N0.getOperand(1).getOpcode() == ISD::Constant &&
9486       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9487                           N0.getValueType())) {
9488     SDLoc DL(N);
9489     SDValue X = N0.getOperand(0).getOperand(0);
9490     X = DAG.getAnyExtOrTrunc(X, DL, VT);
9491     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9492     Mask = Mask.zext(VT.getSizeInBits());
9493     return DAG.getNode(ISD::AND, DL, VT,
9494                        X, DAG.getConstant(Mask, DL, VT));
9495   }
9496
9497   // fold (aext (load x)) -> (aext (truncate (extload x)))
9498   // None of the supported targets knows how to perform load and any_ext
9499   // on vectors in one instruction.  We only perform this transformation on
9500   // scalars.
9501   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9502       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9503       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9504     bool DoXform = true;
9505     SmallVector<SDNode*, 4> SetCCs;
9506     if (!N0.hasOneUse())
9507       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9508                                         TLI);
9509     if (DoXform) {
9510       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9511       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9512                                        LN0->getChain(),
9513                                        LN0->getBasePtr(), N0.getValueType(),
9514                                        LN0->getMemOperand());
9515       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9516       // If the load value is used only by N, replace it via CombineTo N.
9517       bool NoReplaceTrunc = N0.hasOneUse();
9518       CombineTo(N, ExtLoad);
9519       if (NoReplaceTrunc) {
9520         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9521         recursivelyDeleteUnusedNodes(LN0);
9522       } else {
9523         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
9524                                     N0.getValueType(), ExtLoad);
9525         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9526       }
9527       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9528     }
9529   }
9530
9531   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
9532   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
9533   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
9534   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
9535       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
9536     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9537     ISD::LoadExtType ExtType = LN0->getExtensionType();
9538     EVT MemVT = LN0->getMemoryVT();
9539     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
9540       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
9541                                        VT, LN0->getChain(), LN0->getBasePtr(),
9542                                        MemVT, LN0->getMemOperand());
9543       CombineTo(N, ExtLoad);
9544       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9545       recursivelyDeleteUnusedNodes(LN0);
9546       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9547     }
9548   }
9549
9550   if (N0.getOpcode() == ISD::SETCC) {
9551     // For vectors:
9552     // aext(setcc) -> vsetcc
9553     // aext(setcc) -> truncate(vsetcc)
9554     // aext(setcc) -> aext(vsetcc)
9555     // Only do this before legalize for now.
9556     if (VT.isVector() && !LegalOperations) {
9557       EVT N00VT = N0.getOperand(0).getValueType();
9558       if (getSetCCResultType(N00VT) == N0.getValueType())
9559         return SDValue();
9560
9561       // We know that the # elements of the results is the same as the
9562       // # elements of the compare (and the # elements of the compare result
9563       // for that matter).  Check to see that they are the same size.  If so,
9564       // we know that the element size of the sext'd result matches the
9565       // element size of the compare operands.
9566       if (VT.getSizeInBits() == N00VT.getSizeInBits())
9567         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
9568                              N0.getOperand(1),
9569                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
9570
9571       // If the desired elements are smaller or larger than the source
9572       // elements we can use a matching integer vector type and then
9573       // truncate/any extend
9574       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9575       SDValue VsetCC =
9576         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
9577                       N0.getOperand(1),
9578                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
9579       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
9580     }
9581
9582     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9583     SDLoc DL(N);
9584     if (SDValue SCC = SimplifySelectCC(
9585             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9586             DAG.getConstant(0, DL, VT),
9587             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9588       return SCC;
9589   }
9590
9591   return SDValue();
9592 }
9593
9594 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
9595   unsigned Opcode = N->getOpcode();
9596   SDValue N0 = N->getOperand(0);
9597   SDValue N1 = N->getOperand(1);
9598   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
9599
9600   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
9601   if (N0.getOpcode() == Opcode &&
9602       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
9603     return N0;
9604
9605   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9606       N0.getOperand(0).getOpcode() == Opcode) {
9607     // We have an assert, truncate, assert sandwich. Make one stronger assert
9608     // by asserting on the smallest asserted type to the larger source type.
9609     // This eliminates the later assert:
9610     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
9611     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
9612     SDValue BigA = N0.getOperand(0);
9613     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9614     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9615            "Asserting zero/sign-extended bits to a type larger than the "
9616            "truncated destination does not provide information");
9617
9618     SDLoc DL(N);
9619     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
9620     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
9621     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9622                                     BigA.getOperand(0), MinAssertVTVal);
9623     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9624   }
9625
9626   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
9627   // than X. Just move the AssertZext in front of the truncate and drop the
9628   // AssertSExt.
9629   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
9630       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
9631       Opcode == ISD::AssertZext) {
9632     SDValue BigA = N0.getOperand(0);
9633     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
9634     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
9635            "Asserting zero/sign-extended bits to a type larger than the "
9636            "truncated destination does not provide information");
9637
9638     if (AssertVT.bitsLT(BigA_AssertVT)) {
9639       SDLoc DL(N);
9640       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
9641                                       BigA.getOperand(0), N1);
9642       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
9643     }
9644   }
9645
9646   return SDValue();
9647 }
9648
9649 /// If the result of a wider load is shifted to right of N  bits and then
9650 /// truncated to a narrower type and where N is a multiple of number of bits of
9651 /// the narrower type, transform it to a narrower load from address + N / num of
9652 /// bits of new type. Also narrow the load if the result is masked with an AND
9653 /// to effectively produce a smaller type. If the result is to be extended, also
9654 /// fold the extension to form a extending load.
9655 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
9656   unsigned Opc = N->getOpcode();
9657
9658   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
9659   SDValue N0 = N->getOperand(0);
9660   EVT VT = N->getValueType(0);
9661   EVT ExtVT = VT;
9662
9663   // This transformation isn't valid for vector loads.
9664   if (VT.isVector())
9665     return SDValue();
9666
9667   unsigned ShAmt = 0;
9668   bool HasShiftedOffset = false;
9669   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
9670   // extended to VT.
9671   if (Opc == ISD::SIGN_EXTEND_INREG) {
9672     ExtType = ISD::SEXTLOAD;
9673     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
9674   } else if (Opc == ISD::SRL) {
9675     // Another special-case: SRL is basically zero-extending a narrower value,
9676     // or it maybe shifting a higher subword, half or byte into the lowest
9677     // bits.
9678     ExtType = ISD::ZEXTLOAD;
9679     N0 = SDValue(N, 0);
9680
9681     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
9682     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
9683     if (!N01 || !LN0)
9684       return SDValue();
9685
9686     uint64_t ShiftAmt = N01->getZExtValue();
9687     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
9688     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
9689       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
9690     else
9691       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
9692                                 VT.getSizeInBits() - ShiftAmt);
9693   } else if (Opc == ISD::AND) {
9694     // An AND with a constant mask is the same as a truncate + zero-extend.
9695     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
9696     if (!AndC)
9697       return SDValue();
9698
9699     const APInt &Mask = AndC->getAPIntValue();
9700     unsigned ActiveBits = 0;
9701     if (Mask.isMask()) {
9702       ActiveBits = Mask.countTrailingOnes();
9703     } else if (Mask.isShiftedMask()) {
9704       ShAmt = Mask.countTrailingZeros();
9705       APInt ShiftedMask = Mask.lshr(ShAmt);
9706       ActiveBits = ShiftedMask.countTrailingOnes();
9707       HasShiftedOffset = true;
9708     } else
9709       return SDValue();
9710
9711     ExtType = ISD::ZEXTLOAD;
9712     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
9713   }
9714
9715   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
9716     SDValue SRL = N0;
9717     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
9718       ShAmt = ConstShift->getZExtValue();
9719       unsigned EVTBits = ExtVT.getSizeInBits();
9720       // Is the shift amount a multiple of size of VT?
9721       if ((ShAmt & (EVTBits-1)) == 0) {
9722         N0 = N0.getOperand(0);
9723         // Is the load width a multiple of size of VT?
9724         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
9725           return SDValue();
9726       }
9727
9728       // At this point, we must have a load or else we can't do the transform.
9729       if (!isa<LoadSDNode>(N0)) return SDValue();
9730
9731       auto *LN0 = cast<LoadSDNode>(N0);
9732
9733       // Because a SRL must be assumed to *need* to zero-extend the high bits
9734       // (as opposed to anyext the high bits), we can't combine the zextload
9735       // lowering of SRL and an sextload.
9736       if (LN0->getExtensionType() == ISD::SEXTLOAD)
9737         return SDValue();
9738
9739       // If the shift amount is larger than the input type then we're not
9740       // accessing any of the loaded bytes.  If the load was a zextload/extload
9741       // then the result of the shift+trunc is zero/undef (handled elsewhere).
9742       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
9743         return SDValue();
9744
9745       // If the SRL is only used by a masking AND, we may be able to adjust
9746       // the ExtVT to make the AND redundant.
9747       SDNode *Mask = *(SRL->use_begin());
9748       if (Mask->getOpcode() == ISD::AND &&
9749           isa<ConstantSDNode>(Mask->getOperand(1))) {
9750         const APInt &ShiftMask =
9751           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
9752         if (ShiftMask.isMask()) {
9753           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
9754                                            ShiftMask.countTrailingOnes());
9755           // If the mask is smaller, recompute the type.
9756           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
9757               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
9758             ExtVT = MaskedVT;
9759         }
9760       }
9761     }
9762   }
9763
9764   // If the load is shifted left (and the result isn't shifted back right),
9765   // we can fold the truncate through the shift.
9766   unsigned ShLeftAmt = 0;
9767   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
9768       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
9769     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
9770       ShLeftAmt = N01->getZExtValue();
9771       N0 = N0.getOperand(0);
9772     }
9773   }
9774
9775   // If we haven't found a load, we can't narrow it.
9776   if (!isa<LoadSDNode>(N0))
9777     return SDValue();
9778
9779   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9780   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
9781     return SDValue();
9782
9783   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
9784     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
9785     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
9786     return LVTStoreBits - EVTStoreBits - ShAmt;
9787   };
9788
9789   // For big endian targets, we need to adjust the offset to the pointer to
9790   // load the correct bytes.
9791   if (DAG.getDataLayout().isBigEndian())
9792     ShAmt = AdjustBigEndianShift(ShAmt);
9793
9794   EVT PtrType = N0.getOperand(1).getValueType();
9795   uint64_t PtrOff = ShAmt / 8;
9796   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
9797   SDLoc DL(LN0);
9798   // The original load itself didn't wrap, so an offset within it doesn't.
9799   SDNodeFlags Flags;
9800   Flags.setNoUnsignedWrap(true);
9801   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
9802                                PtrType, LN0->getBasePtr(),
9803                                DAG.getConstant(PtrOff, DL, PtrType),
9804                                Flags);
9805   AddToWorklist(NewPtr.getNode());
9806
9807   SDValue Load;
9808   if (ExtType == ISD::NON_EXTLOAD)
9809     Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
9810                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
9811                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9812   else
9813     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
9814                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
9815                           NewAlign, LN0->getMemOperand()->getFlags(),
9816                           LN0->getAAInfo());
9817
9818   // Replace the old load's chain with the new load's chain.
9819   WorklistRemover DeadNodes(*this);
9820   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
9821
9822   // Shift the result left, if we've swallowed a left shift.
9823   SDValue Result = Load;
9824   if (ShLeftAmt != 0) {
9825     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
9826     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
9827       ShImmTy = VT;
9828     // If the shift amount is as large as the result size (but, presumably,
9829     // no larger than the source) then the useful bits of the result are
9830     // zero; we can't simply return the shortened shift, because the result
9831     // of that operation is undefined.
9832     SDLoc DL(N0);
9833     if (ShLeftAmt >= VT.getSizeInBits())
9834       Result = DAG.getConstant(0, DL, VT);
9835     else
9836       Result = DAG.getNode(ISD::SHL, DL, VT,
9837                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
9838   }
9839
9840   if (HasShiftedOffset) {
9841     // Recalculate the shift amount after it has been altered to calculate
9842     // the offset.
9843     if (DAG.getDataLayout().isBigEndian())
9844       ShAmt = AdjustBigEndianShift(ShAmt);
9845
9846     // We're using a shifted mask, so the load now has an offset. This means
9847     // that data has been loaded into the lower bytes than it would have been
9848     // before, so we need to shl the loaded data into the correct position in the
9849     // register.
9850     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
9851     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
9852     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
9853   }
9854
9855   // Return the new loaded value.
9856   return Result;
9857 }
9858
9859 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
9860   SDValue N0 = N->getOperand(0);
9861   SDValue N1 = N->getOperand(1);
9862   EVT VT = N->getValueType(0);
9863   EVT EVT = cast<VTSDNode>(N1)->getVT();
9864   unsigned VTBits = VT.getScalarSizeInBits();
9865   unsigned EVTBits = EVT.getScalarSizeInBits();
9866
9867   if (N0.isUndef())
9868     return DAG.getUNDEF(VT);
9869
9870   // fold (sext_in_reg c1) -> c1
9871   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
9872     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
9873
9874   // If the input is already sign extended, just drop the extension.
9875   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
9876     return N0;
9877
9878   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
9879   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
9880       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
9881     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9882                        N0.getOperand(0), N1);
9883
9884   // fold (sext_in_reg (sext x)) -> (sext x)
9885   // fold (sext_in_reg (aext x)) -> (sext x)
9886   // if x is small enough or if we know that x has more than 1 sign bit and the
9887   // sign_extend_inreg is extending from one of them.
9888   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
9889     SDValue N00 = N0.getOperand(0);
9890     unsigned N00Bits = N00.getScalarValueSizeInBits();
9891     if ((N00Bits <= EVTBits ||
9892          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
9893         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9894       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
9895   }
9896
9897   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
9898   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
9899        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
9900        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
9901       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
9902     if (!LegalOperations ||
9903         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
9904       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
9905                          N0.getOperand(0));
9906   }
9907
9908   // fold (sext_in_reg (zext x)) -> (sext x)
9909   // iff we are extending the source sign bit.
9910   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
9911     SDValue N00 = N0.getOperand(0);
9912     if (N00.getScalarValueSizeInBits() == EVTBits &&
9913         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
9914       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
9915   }
9916
9917   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
9918   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
9919     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
9920
9921   // fold operands of sext_in_reg based on knowledge that the top bits are not
9922   // demanded.
9923   if (SimplifyDemandedBits(SDValue(N, 0)))
9924     return SDValue(N, 0);
9925
9926   // fold (sext_in_reg (load x)) -> (smaller sextload x)
9927   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
9928   if (SDValue NarrowLoad = ReduceLoadWidth(N))
9929     return NarrowLoad;
9930
9931   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
9932   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
9933   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
9934   if (N0.getOpcode() == ISD::SRL) {
9935     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
9936       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
9937         // We can turn this into an SRA iff the input to the SRL is already sign
9938         // extended enough.
9939         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
9940         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
9941           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
9942                              N0.getOperand(0), N0.getOperand(1));
9943       }
9944   }
9945
9946   // fold (sext_inreg (extload x)) -> (sextload x)
9947   // If sextload is not supported by target, we can only do the combine when
9948   // load has one use. Doing otherwise can block folding the extload with other
9949   // extends that the target does support.
9950   if (ISD::isEXTLoad(N0.getNode()) &&
9951       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9952       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9953       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
9954         N0.hasOneUse()) ||
9955        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9956     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9957     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9958                                      LN0->getChain(),
9959                                      LN0->getBasePtr(), EVT,
9960                                      LN0->getMemOperand());
9961     CombineTo(N, ExtLoad);
9962     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9963     AddToWorklist(ExtLoad.getNode());
9964     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9965   }
9966   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
9967   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
9968       N0.hasOneUse() &&
9969       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
9970       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
9971        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
9972     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9973     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
9974                                      LN0->getChain(),
9975                                      LN0->getBasePtr(), EVT,
9976                                      LN0->getMemOperand());
9977     CombineTo(N, ExtLoad);
9978     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
9979     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9980   }
9981
9982   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
9983   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
9984     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
9985                                            N0.getOperand(1), false))
9986       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
9987                          BSwap, N1);
9988   }
9989
9990   return SDValue();
9991 }
9992
9993 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
9994   SDValue N0 = N->getOperand(0);
9995   EVT VT = N->getValueType(0);
9996
9997   if (N0.isUndef())
9998     return DAG.getUNDEF(VT);
9999
10000   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10001     return Res;
10002
10003   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10004     return SDValue(N, 0);
10005
10006   return SDValue();
10007 }
10008
10009 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10010   SDValue N0 = N->getOperand(0);
10011   EVT VT = N->getValueType(0);
10012
10013   if (N0.isUndef())
10014     return DAG.getUNDEF(VT);
10015
10016   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10017     return Res;
10018
10019   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10020     return SDValue(N, 0);
10021
10022   return SDValue();
10023 }
10024
10025 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10026   SDValue N0 = N->getOperand(0);
10027   EVT VT = N->getValueType(0);
10028   EVT SrcVT = N0.getValueType();
10029   bool isLE = DAG.getDataLayout().isLittleEndian();
10030
10031   // noop truncate
10032   if (SrcVT == VT)
10033     return N0;
10034
10035   // fold (truncate (truncate x)) -> (truncate x)
10036   if (N0.getOpcode() == ISD::TRUNCATE)
10037     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10038
10039   // fold (truncate c1) -> c1
10040   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10041     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10042     if (C.getNode() != N)
10043       return C;
10044   }
10045
10046   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10047   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10048       N0.getOpcode() == ISD::SIGN_EXTEND ||
10049       N0.getOpcode() == ISD::ANY_EXTEND) {
10050     // if the source is smaller than the dest, we still need an extend.
10051     if (N0.getOperand(0).getValueType().bitsLT(VT))
10052       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10053     // if the source is larger than the dest, than we just need the truncate.
10054     if (N0.getOperand(0).getValueType().bitsGT(VT))
10055       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10056     // if the source and dest are the same type, we can drop both the extend
10057     // and the truncate.
10058     return N0.getOperand(0);
10059   }
10060
10061   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10062   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10063     return SDValue();
10064
10065   // Fold extract-and-trunc into a narrow extract. For example:
10066   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10067   //   i32 y = TRUNCATE(i64 x)
10068   //        -- becomes --
10069   //   v16i8 b = BITCAST (v2i64 val)
10070   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10071   //
10072   // Note: We only run this optimization after type legalization (which often
10073   // creates this pattern) and before operation legalization after which
10074   // we need to be more careful about the vector instructions that we generate.
10075   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10076       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10077     EVT VecTy = N0.getOperand(0).getValueType();
10078     EVT ExTy = N0.getValueType();
10079     EVT TrTy = N->getValueType(0);
10080
10081     unsigned NumElem = VecTy.getVectorNumElements();
10082     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10083
10084     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10085     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10086
10087     SDValue EltNo = N0->getOperand(1);
10088     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10089       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10090       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
10091       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10092
10093       SDLoc DL(N);
10094       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10095                          DAG.getBitcast(NVT, N0.getOperand(0)),
10096                          DAG.getConstant(Index, DL, IndexTy));
10097     }
10098   }
10099
10100   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10101   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10102     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10103         TLI.isTruncateFree(SrcVT, VT)) {
10104       SDLoc SL(N0);
10105       SDValue Cond = N0.getOperand(0);
10106       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10107       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10108       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10109     }
10110   }
10111
10112   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10113   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10114       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
10115       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10116     SDValue Amt = N0.getOperand(1);
10117     KnownBits Known = DAG.computeKnownBits(Amt);
10118     unsigned Size = VT.getScalarSizeInBits();
10119     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10120       SDLoc SL(N);
10121       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10122
10123       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10124       if (AmtVT != Amt.getValueType()) {
10125         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10126         AddToWorklist(Amt.getNode());
10127       }
10128       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10129     }
10130   }
10131
10132   // Attempt to pre-truncate BUILD_VECTOR sources.
10133   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10134       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10135     SDLoc DL(N);
10136     EVT SVT = VT.getScalarType();
10137     SmallVector<SDValue, 8> TruncOps;
10138     for (const SDValue &Op : N0->op_values()) {
10139       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10140       TruncOps.push_back(TruncOp);
10141     }
10142     return DAG.getBuildVector(VT, DL, TruncOps);
10143   }
10144
10145   // Fold a series of buildvector, bitcast, and truncate if possible.
10146   // For example fold
10147   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10148   //   (2xi32 (buildvector x, y)).
10149   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10150       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10151       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
10152       N0.getOperand(0).hasOneUse()) {
10153     SDValue BuildVect = N0.getOperand(0);
10154     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10155     EVT TruncVecEltTy = VT.getVectorElementType();
10156
10157     // Check that the element types match.
10158     if (BuildVectEltTy == TruncVecEltTy) {
10159       // Now we only need to compute the offset of the truncated elements.
10160       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
10161       unsigned TruncVecNumElts = VT.getVectorNumElements();
10162       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10163
10164       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10165              "Invalid number of elements");
10166
10167       SmallVector<SDValue, 8> Opnds;
10168       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10169         Opnds.push_back(BuildVect.getOperand(i));
10170
10171       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10172     }
10173   }
10174
10175   // See if we can simplify the input to this truncate through knowledge that
10176   // only the low bits are being used.
10177   // For example "trunc (or (shl x, 8), y)" // -> trunc y
10178   // Currently we only perform this optimization on scalars because vectors
10179   // may have different active low bits.
10180   if (!VT.isVector()) {
10181     APInt Mask =
10182         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
10183     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10184       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10185   }
10186
10187   // fold (truncate (load x)) -> (smaller load x)
10188   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10189   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
10190     if (SDValue Reduced = ReduceLoadWidth(N))
10191       return Reduced;
10192
10193     // Handle the case where the load remains an extending load even
10194     // after truncation.
10195     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
10196       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10197       if (!LN0->isVolatile() &&
10198           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
10199         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10200                                          VT, LN0->getChain(), LN0->getBasePtr(),
10201                                          LN0->getMemoryVT(),
10202                                          LN0->getMemOperand());
10203         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10204         return NewLoad;
10205       }
10206     }
10207   }
10208
10209   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10210   // where ... are all 'undef'.
10211   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10212     SmallVector<EVT, 8> VTs;
10213     SDValue V;
10214     unsigned Idx = 0;
10215     unsigned NumDefs = 0;
10216
10217     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10218       SDValue X = N0.getOperand(i);
10219       if (!X.isUndef()) {
10220         V = X;
10221         Idx = i;
10222         NumDefs++;
10223       }
10224       // Stop if more than one members are non-undef.
10225       if (NumDefs > 1)
10226         break;
10227       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10228                                      VT.getVectorElementType(),
10229                                      X.getValueType().getVectorNumElements()));
10230     }
10231
10232     if (NumDefs == 0)
10233       return DAG.getUNDEF(VT);
10234
10235     if (NumDefs == 1) {
10236       assert(V.getNode() && "The single defined operand is empty!");
10237       SmallVector<SDValue, 8> Opnds;
10238       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10239         if (i != Idx) {
10240           Opnds.push_back(DAG.getUNDEF(VTs[i]));
10241           continue;
10242         }
10243         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10244         AddToWorklist(NV.getNode());
10245         Opnds.push_back(NV);
10246       }
10247       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10248     }
10249   }
10250
10251   // Fold truncate of a bitcast of a vector to an extract of the low vector
10252   // element.
10253   //
10254   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10255   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10256     SDValue VecSrc = N0.getOperand(0);
10257     EVT SrcVT = VecSrc.getValueType();
10258     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10259         (!LegalOperations ||
10260          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
10261       SDLoc SL(N);
10262
10263       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10264       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10265       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10266                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10267     }
10268   }
10269
10270   // Simplify the operands using demanded-bits information.
10271   if (!VT.isVector() &&
10272       SimplifyDemandedBits(SDValue(N, 0)))
10273     return SDValue(N, 0);
10274
10275   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10276   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10277   // When the adde's carry is not used.
10278   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10279       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10280       // We only do for addcarry before legalize operation
10281       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
10282        TLI.isOperationLegal(N0.getOpcode(), VT))) {
10283     SDLoc SL(N);
10284     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10285     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10286     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10287     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10288   }
10289
10290   // fold (truncate (extract_subvector(ext x))) ->
10291   //      (extract_subvector x)
10292   // TODO: This can be generalized to cover cases where the truncate and extract
10293   // do not fully cancel each other out.
10294   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10295     SDValue N00 = N0.getOperand(0);
10296     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10297         N00.getOpcode() == ISD::ZERO_EXTEND ||
10298         N00.getOpcode() == ISD::ANY_EXTEND) {
10299       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10300           VT.getVectorElementType())
10301         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10302                            N00.getOperand(0), N0.getOperand(1));
10303     }
10304   }
10305
10306   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10307     return NewVSel;
10308
10309   // Narrow a suitable binary operation with a non-opaque constant operand by
10310   // moving it ahead of the truncate. This is limited to pre-legalization
10311   // because targets may prefer a wider type during later combines and invert
10312   // this transform.
10313   switch (N0.getOpcode()) {
10314   case ISD::ADD:
10315   case ISD::SUB:
10316   case ISD::MUL:
10317   case ISD::AND:
10318   case ISD::OR:
10319   case ISD::XOR:
10320     if (!LegalOperations && N0.hasOneUse() &&
10321         (isConstantOrConstantVector(N0.getOperand(0), true) ||
10322          isConstantOrConstantVector(N0.getOperand(1), true))) {
10323       // TODO: We already restricted this to pre-legalization, but for vectors
10324       // we are extra cautious to not create an unsupported operation.
10325       // Target-specific changes are likely needed to avoid regressions here.
10326       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10327         SDLoc DL(N);
10328         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10329         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10330         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10331       }
10332     }
10333   }
10334
10335   return SDValue();
10336 }
10337
10338 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10339   SDValue Elt = N->getOperand(i);
10340   if (Elt.getOpcode() != ISD::MERGE_VALUES)
10341     return Elt.getNode();
10342   return Elt.getOperand(Elt.getResNo()).getNode();
10343 }
10344
10345 /// build_pair (load, load) -> load
10346 /// if load locations are consecutive.
10347 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10348   assert(N->getOpcode() == ISD::BUILD_PAIR);
10349
10350   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10351   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10352
10353   // A BUILD_PAIR is always having the least significant part in elt 0 and the
10354   // most significant part in elt 1. So when combining into one large load, we
10355   // need to consider the endianness.
10356   if (DAG.getDataLayout().isBigEndian())
10357     std::swap(LD1, LD2);
10358
10359   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10360       LD1->getAddressSpace() != LD2->getAddressSpace())
10361     return SDValue();
10362   EVT LD1VT = LD1->getValueType(0);
10363   unsigned LD1Bytes = LD1VT.getStoreSize();
10364   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10365       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10366     unsigned Align = LD1->getAlignment();
10367     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10368         VT.getTypeForEVT(*DAG.getContext()));
10369
10370     if (NewAlign <= Align &&
10371         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10372       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10373                          LD1->getPointerInfo(), Align);
10374   }
10375
10376   return SDValue();
10377 }
10378
10379 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10380   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10381   // and Lo parts; on big-endian machines it doesn't.
10382   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10383 }
10384
10385 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10386                                     const TargetLowering &TLI) {
10387   // If this is not a bitcast to an FP type or if the target doesn't have
10388   // IEEE754-compliant FP logic, we're done.
10389   EVT VT = N->getValueType(0);
10390   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10391     return SDValue();
10392
10393   // TODO: Handle cases where the integer constant is a different scalar
10394   // bitwidth to the FP.
10395   SDValue N0 = N->getOperand(0);
10396   EVT SourceVT = N0.getValueType();
10397   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10398     return SDValue();
10399
10400   unsigned FPOpcode;
10401   APInt SignMask;
10402   switch (N0.getOpcode()) {
10403   case ISD::AND:
10404     FPOpcode = ISD::FABS;
10405     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10406     break;
10407   case ISD::XOR:
10408     FPOpcode = ISD::FNEG;
10409     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10410     break;
10411   case ISD::OR:
10412     FPOpcode = ISD::FABS;
10413     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10414     break;
10415   default:
10416     return SDValue();
10417   }
10418
10419   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10420   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10421   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10422   //   fneg (fabs X)
10423   SDValue LogicOp0 = N0.getOperand(0);
10424   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10425   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10426       LogicOp0.getOpcode() == ISD::BITCAST &&
10427       LogicOp0.getOperand(0).getValueType() == VT) {
10428     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10429     NumFPLogicOpsConv++;
10430     if (N0.getOpcode() == ISD::OR)
10431       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10432     return FPOp;
10433   }
10434
10435   return SDValue();
10436 }
10437
10438 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
10439   SDValue N0 = N->getOperand(0);
10440   EVT VT = N->getValueType(0);
10441
10442   if (N0.isUndef())
10443     return DAG.getUNDEF(VT);
10444
10445   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10446   // Only do this before legalize types, unless both types are integer and the
10447   // scalar type is legal. Only do this before legalize ops, since the target
10448   // maybe depending on the bitcast.
10449   // First check to see if this is all constant.
10450   // TODO: Support FP bitcasts after legalize types.
10451   if (VT.isVector() &&
10452       (!LegalTypes ||
10453        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
10454         TLI.isTypeLegal(VT.getVectorElementType()))) &&
10455       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
10456       cast<BuildVectorSDNode>(N0)->isConstant())
10457     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
10458                                              VT.getVectorElementType());
10459
10460   // If the input is a constant, let getNode fold it.
10461   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
10462     // If we can't allow illegal operations, we need to check that this is just
10463     // a fp -> int or int -> conversion and that the resulting operation will
10464     // be legal.
10465     if (!LegalOperations ||
10466         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
10467          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
10468         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
10469          TLI.isOperationLegal(ISD::Constant, VT))) {
10470       SDValue C = DAG.getBitcast(VT, N0);
10471       if (C.getNode() != N)
10472         return C;
10473     }
10474   }
10475
10476   // (conv (conv x, t1), t2) -> (conv x, t2)
10477   if (N0.getOpcode() == ISD::BITCAST)
10478     return DAG.getBitcast(VT, N0.getOperand(0));
10479
10480   // fold (conv (load x)) -> (load (conv*)x)
10481   // If the resultant load doesn't need a higher alignment than the original!
10482   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10483       // Do not remove the cast if the types differ in endian layout.
10484       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
10485           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
10486       // If the load is volatile, we only want to change the load type if the
10487       // resulting load is legal. Otherwise we might increase the number of
10488       // memory accesses. We don't care if the original type was legal or not
10489       // as we assume software couldn't rely on the number of accesses of an
10490       // illegal type.
10491       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10492        TLI.isOperationLegal(ISD::LOAD, VT)) &&
10493       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
10494     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10495     unsigned OrigAlign = LN0->getAlignment();
10496
10497     bool Fast = false;
10498     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
10499                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
10500         Fast) {
10501       SDValue Load =
10502           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
10503                       LN0->getPointerInfo(), OrigAlign,
10504                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10505       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10506       return Load;
10507     }
10508   }
10509
10510   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
10511     return V;
10512
10513   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10514   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10515   //
10516   // For ppc_fp128:
10517   // fold (bitcast (fneg x)) ->
10518   //     flipbit = signbit
10519   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10520   //
10521   // fold (bitcast (fabs x)) ->
10522   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
10523   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10524   // This often reduces constant pool loads.
10525   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
10526        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
10527       N0.getNode()->hasOneUse() && VT.isInteger() &&
10528       !VT.isVector() && !N0.getValueType().isVector()) {
10529     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
10530     AddToWorklist(NewConv.getNode());
10531
10532     SDLoc DL(N);
10533     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10534       assert(VT.getSizeInBits() == 128);
10535       SDValue SignBit = DAG.getConstant(
10536           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
10537       SDValue FlipBit;
10538       if (N0.getOpcode() == ISD::FNEG) {
10539         FlipBit = SignBit;
10540         AddToWorklist(FlipBit.getNode());
10541       } else {
10542         assert(N0.getOpcode() == ISD::FABS);
10543         SDValue Hi =
10544             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
10545                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10546                                               SDLoc(NewConv)));
10547         AddToWorklist(Hi.getNode());
10548         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
10549         AddToWorklist(FlipBit.getNode());
10550       }
10551       SDValue FlipBits =
10552           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10553       AddToWorklist(FlipBits.getNode());
10554       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
10555     }
10556     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10557     if (N0.getOpcode() == ISD::FNEG)
10558       return DAG.getNode(ISD::XOR, DL, VT,
10559                          NewConv, DAG.getConstant(SignBit, DL, VT));
10560     assert(N0.getOpcode() == ISD::FABS);
10561     return DAG.getNode(ISD::AND, DL, VT,
10562                        NewConv, DAG.getConstant(~SignBit, DL, VT));
10563   }
10564
10565   // fold (bitconvert (fcopysign cst, x)) ->
10566   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
10567   // Note that we don't handle (copysign x, cst) because this can always be
10568   // folded to an fneg or fabs.
10569   //
10570   // For ppc_fp128:
10571   // fold (bitcast (fcopysign cst, x)) ->
10572   //     flipbit = (and (extract_element
10573   //                     (xor (bitcast cst), (bitcast x)), 0),
10574   //                    signbit)
10575   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
10576   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
10577       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
10578       VT.isInteger() && !VT.isVector()) {
10579     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
10580     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
10581     if (isTypeLegal(IntXVT)) {
10582       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
10583       AddToWorklist(X.getNode());
10584
10585       // If X has a different width than the result/lhs, sext it or truncate it.
10586       unsigned VTWidth = VT.getSizeInBits();
10587       if (OrigXWidth < VTWidth) {
10588         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
10589         AddToWorklist(X.getNode());
10590       } else if (OrigXWidth > VTWidth) {
10591         // To get the sign bit in the right place, we have to shift it right
10592         // before truncating.
10593         SDLoc DL(X);
10594         X = DAG.getNode(ISD::SRL, DL,
10595                         X.getValueType(), X,
10596                         DAG.getConstant(OrigXWidth-VTWidth, DL,
10597                                         X.getValueType()));
10598         AddToWorklist(X.getNode());
10599         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
10600         AddToWorklist(X.getNode());
10601       }
10602
10603       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
10604         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
10605         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10606         AddToWorklist(Cst.getNode());
10607         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
10608         AddToWorklist(X.getNode());
10609         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
10610         AddToWorklist(XorResult.getNode());
10611         SDValue XorResult64 = DAG.getNode(
10612             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
10613             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
10614                                   SDLoc(XorResult)));
10615         AddToWorklist(XorResult64.getNode());
10616         SDValue FlipBit =
10617             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
10618                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
10619         AddToWorklist(FlipBit.getNode());
10620         SDValue FlipBits =
10621             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
10622         AddToWorklist(FlipBits.getNode());
10623         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
10624       }
10625       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
10626       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
10627                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
10628       AddToWorklist(X.getNode());
10629
10630       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
10631       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
10632                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
10633       AddToWorklist(Cst.getNode());
10634
10635       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
10636     }
10637   }
10638
10639   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
10640   if (N0.getOpcode() == ISD::BUILD_PAIR)
10641     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
10642       return CombineLD;
10643
10644   // Remove double bitcasts from shuffles - this is often a legacy of
10645   // XformToShuffleWithZero being used to combine bitmaskings (of
10646   // float vectors bitcast to integer vectors) into shuffles.
10647   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
10648   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
10649       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
10650       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
10651       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
10652     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
10653
10654     // If operands are a bitcast, peek through if it casts the original VT.
10655     // If operands are a constant, just bitcast back to original VT.
10656     auto PeekThroughBitcast = [&](SDValue Op) {
10657       if (Op.getOpcode() == ISD::BITCAST &&
10658           Op.getOperand(0).getValueType() == VT)
10659         return SDValue(Op.getOperand(0));
10660       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
10661           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
10662         return DAG.getBitcast(VT, Op);
10663       return SDValue();
10664     };
10665
10666     // FIXME: If either input vector is bitcast, try to convert the shuffle to
10667     // the result type of this bitcast. This would eliminate at least one
10668     // bitcast. See the transform in InstCombine.
10669     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
10670     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
10671     if (!(SV0 && SV1))
10672       return SDValue();
10673
10674     int MaskScale =
10675         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
10676     SmallVector<int, 8> NewMask;
10677     for (int M : SVN->getMask())
10678       for (int i = 0; i != MaskScale; ++i)
10679         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
10680
10681     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10682     if (!LegalMask) {
10683       std::swap(SV0, SV1);
10684       ShuffleVectorSDNode::commuteMask(NewMask);
10685       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
10686     }
10687
10688     if (LegalMask)
10689       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
10690   }
10691
10692   return SDValue();
10693 }
10694
10695 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
10696   EVT VT = N->getValueType(0);
10697   return CombineConsecutiveLoads(N, VT);
10698 }
10699
10700 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
10701 /// operands. DstEltVT indicates the destination element value type.
10702 SDValue DAGCombiner::
10703 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
10704   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
10705
10706   // If this is already the right type, we're done.
10707   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
10708
10709   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
10710   unsigned DstBitSize = DstEltVT.getSizeInBits();
10711
10712   // If this is a conversion of N elements of one type to N elements of another
10713   // type, convert each element.  This handles FP<->INT cases.
10714   if (SrcBitSize == DstBitSize) {
10715     SmallVector<SDValue, 8> Ops;
10716     for (SDValue Op : BV->op_values()) {
10717       // If the vector element type is not legal, the BUILD_VECTOR operands
10718       // are promoted and implicitly truncated.  Make that explicit here.
10719       if (Op.getValueType() != SrcEltVT)
10720         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
10721       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
10722       AddToWorklist(Ops.back().getNode());
10723     }
10724     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10725                               BV->getValueType(0).getVectorNumElements());
10726     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
10727   }
10728
10729   // Otherwise, we're growing or shrinking the elements.  To avoid having to
10730   // handle annoying details of growing/shrinking FP values, we convert them to
10731   // int first.
10732   if (SrcEltVT.isFloatingPoint()) {
10733     // Convert the input float vector to a int vector where the elements are the
10734     // same sizes.
10735     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
10736     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
10737     SrcEltVT = IntVT;
10738   }
10739
10740   // Now we know the input is an integer vector.  If the output is a FP type,
10741   // convert to integer first, then to FP of the right size.
10742   if (DstEltVT.isFloatingPoint()) {
10743     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
10744     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
10745
10746     // Next, convert to FP elements of the same size.
10747     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
10748   }
10749
10750   SDLoc DL(BV);
10751
10752   // Okay, we know the src/dst types are both integers of differing types.
10753   // Handling growing first.
10754   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
10755   if (SrcBitSize < DstBitSize) {
10756     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
10757
10758     SmallVector<SDValue, 8> Ops;
10759     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
10760          i += NumInputsPerOutput) {
10761       bool isLE = DAG.getDataLayout().isLittleEndian();
10762       APInt NewBits = APInt(DstBitSize, 0);
10763       bool EltIsUndef = true;
10764       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
10765         // Shift the previously computed bits over.
10766         NewBits <<= SrcBitSize;
10767         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
10768         if (Op.isUndef()) continue;
10769         EltIsUndef = false;
10770
10771         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
10772                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
10773       }
10774
10775       if (EltIsUndef)
10776         Ops.push_back(DAG.getUNDEF(DstEltVT));
10777       else
10778         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
10779     }
10780
10781     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
10782     return DAG.getBuildVector(VT, DL, Ops);
10783   }
10784
10785   // Finally, this must be the case where we are shrinking elements: each input
10786   // turns into multiple outputs.
10787   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
10788   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
10789                             NumOutputsPerInput*BV->getNumOperands());
10790   SmallVector<SDValue, 8> Ops;
10791
10792   for (const SDValue &Op : BV->op_values()) {
10793     if (Op.isUndef()) {
10794       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
10795       continue;
10796     }
10797
10798     APInt OpVal = cast<ConstantSDNode>(Op)->
10799                   getAPIntValue().zextOrTrunc(SrcBitSize);
10800
10801     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
10802       APInt ThisVal = OpVal.trunc(DstBitSize);
10803       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
10804       OpVal.lshrInPlace(DstBitSize);
10805     }
10806
10807     // For big endian targets, swap the order of the pieces of each element.
10808     if (DAG.getDataLayout().isBigEndian())
10809       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
10810   }
10811
10812   return DAG.getBuildVector(VT, DL, Ops);
10813 }
10814
10815 static bool isContractable(SDNode *N) {
10816   SDNodeFlags F = N->getFlags();
10817   return F.hasAllowContract() || F.hasAllowReassociation();
10818 }
10819
10820 /// Try to perform FMA combining on a given FADD node.
10821 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
10822   SDValue N0 = N->getOperand(0);
10823   SDValue N1 = N->getOperand(1);
10824   EVT VT = N->getValueType(0);
10825   SDLoc SL(N);
10826
10827   const TargetOptions &Options = DAG.getTarget().Options;
10828
10829   // Floating-point multiply-add with intermediate rounding.
10830   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
10831
10832   // Floating-point multiply-add without intermediate rounding.
10833   bool HasFMA =
10834       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
10835       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
10836
10837   // No valid opcode, do not combine.
10838   if (!HasFMAD && !HasFMA)
10839     return SDValue();
10840
10841   SDNodeFlags Flags = N->getFlags();
10842   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
10843   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
10844                               CanFuse || HasFMAD);
10845   // If the addition is not contractable, do not combine.
10846   if (!AllowFusionGlobally && !isContractable(N))
10847     return SDValue();
10848
10849   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
10850   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
10851     return SDValue();
10852
10853   // Always prefer FMAD to FMA for precision.
10854   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
10855   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
10856
10857   // Is the node an FMUL and contractable either due to global flags or
10858   // SDNodeFlags.
10859   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
10860     if (N.getOpcode() != ISD::FMUL)
10861       return false;
10862     return AllowFusionGlobally || isContractable(N.getNode());
10863   };
10864   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
10865   // prefer to fold the multiply with fewer uses.
10866   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
10867     if (N0.getNode()->use_size() > N1.getNode()->use_size())
10868       std::swap(N0, N1);
10869   }
10870
10871   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
10872   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
10873     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10874                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
10875   }
10876
10877   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
10878   // Note: Commutes FADD operands.
10879   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
10880     return DAG.getNode(PreferredFusedOpcode, SL, VT,
10881                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
10882   }
10883
10884   // Look through FP_EXTEND nodes to do more combining.
10885
10886   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
10887   if (N0.getOpcode() == ISD::FP_EXTEND) {
10888     SDValue N00 = N0.getOperand(0);
10889     if (isContractableFMUL(N00) &&
10890         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10891       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10892                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10893                                      N00.getOperand(0)),
10894                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10895                                      N00.getOperand(1)), N1, Flags);
10896     }
10897   }
10898
10899   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
10900   // Note: Commutes FADD operands.
10901   if (N1.getOpcode() == ISD::FP_EXTEND) {
10902     SDValue N10 = N1.getOperand(0);
10903     if (isContractableFMUL(N10) &&
10904         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
10905       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10906                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10907                                      N10.getOperand(0)),
10908                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
10909                                      N10.getOperand(1)), N0, Flags);
10910     }
10911   }
10912
10913   // More folding opportunities when target permits.
10914   if (Aggressive) {
10915     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
10916     if (CanFuse &&
10917         N0.getOpcode() == PreferredFusedOpcode &&
10918         N0.getOperand(2).getOpcode() == ISD::FMUL &&
10919         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
10920       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10921                          N0.getOperand(0), N0.getOperand(1),
10922                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10923                                      N0.getOperand(2).getOperand(0),
10924                                      N0.getOperand(2).getOperand(1),
10925                                      N1, Flags), Flags);
10926     }
10927
10928     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
10929     if (CanFuse &&
10930         N1->getOpcode() == PreferredFusedOpcode &&
10931         N1.getOperand(2).getOpcode() == ISD::FMUL &&
10932         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
10933       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10934                          N1.getOperand(0), N1.getOperand(1),
10935                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10936                                      N1.getOperand(2).getOperand(0),
10937                                      N1.getOperand(2).getOperand(1),
10938                                      N0, Flags), Flags);
10939     }
10940
10941
10942     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
10943     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
10944     auto FoldFAddFMAFPExtFMul = [&] (
10945       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10946       SDNodeFlags Flags) {
10947       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
10948                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10949                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10950                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10951                                      Z, Flags), Flags);
10952     };
10953     if (N0.getOpcode() == PreferredFusedOpcode) {
10954       SDValue N02 = N0.getOperand(2);
10955       if (N02.getOpcode() == ISD::FP_EXTEND) {
10956         SDValue N020 = N02.getOperand(0);
10957         if (isContractableFMUL(N020) &&
10958             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
10959           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
10960                                       N020.getOperand(0), N020.getOperand(1),
10961                                       N1, Flags);
10962         }
10963       }
10964     }
10965
10966     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
10967     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
10968     // FIXME: This turns two single-precision and one double-precision
10969     // operation into two double-precision operations, which might not be
10970     // interesting for all targets, especially GPUs.
10971     auto FoldFAddFPExtFMAFMul = [&] (
10972       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
10973       SDNodeFlags Flags) {
10974       return DAG.getNode(PreferredFusedOpcode, SL, VT,
10975                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
10976                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
10977                          DAG.getNode(PreferredFusedOpcode, SL, VT,
10978                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
10979                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
10980                                      Z, Flags), Flags);
10981     };
10982     if (N0.getOpcode() == ISD::FP_EXTEND) {
10983       SDValue N00 = N0.getOperand(0);
10984       if (N00.getOpcode() == PreferredFusedOpcode) {
10985         SDValue N002 = N00.getOperand(2);
10986         if (isContractableFMUL(N002) &&
10987             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
10988           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
10989                                       N002.getOperand(0), N002.getOperand(1),
10990                                       N1, Flags);
10991         }
10992       }
10993     }
10994
10995     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
10996     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
10997     if (N1.getOpcode() == PreferredFusedOpcode) {
10998       SDValue N12 = N1.getOperand(2);
10999       if (N12.getOpcode() == ISD::FP_EXTEND) {
11000         SDValue N120 = N12.getOperand(0);
11001         if (isContractableFMUL(N120) &&
11002             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11003           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11004                                       N120.getOperand(0), N120.getOperand(1),
11005                                       N0, Flags);
11006         }
11007       }
11008     }
11009
11010     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11011     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11012     // FIXME: This turns two single-precision and one double-precision
11013     // operation into two double-precision operations, which might not be
11014     // interesting for all targets, especially GPUs.
11015     if (N1.getOpcode() == ISD::FP_EXTEND) {
11016       SDValue N10 = N1.getOperand(0);
11017       if (N10.getOpcode() == PreferredFusedOpcode) {
11018         SDValue N102 = N10.getOperand(2);
11019         if (isContractableFMUL(N102) &&
11020             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11021           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11022                                       N102.getOperand(0), N102.getOperand(1),
11023                                       N0, Flags);
11024         }
11025       }
11026     }
11027   }
11028
11029   return SDValue();
11030 }
11031
11032 /// Try to perform FMA combining on a given FSUB node.
11033 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11034   SDValue N0 = N->getOperand(0);
11035   SDValue N1 = N->getOperand(1);
11036   EVT VT = N->getValueType(0);
11037   SDLoc SL(N);
11038
11039   const TargetOptions &Options = DAG.getTarget().Options;
11040   // Floating-point multiply-add with intermediate rounding.
11041   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11042
11043   // Floating-point multiply-add without intermediate rounding.
11044   bool HasFMA =
11045       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11046       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11047
11048   // No valid opcode, do not combine.
11049   if (!HasFMAD && !HasFMA)
11050     return SDValue();
11051
11052   const SDNodeFlags Flags = N->getFlags();
11053   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11054   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11055                               CanFuse || HasFMAD);
11056
11057   // If the subtraction is not contractable, do not combine.
11058   if (!AllowFusionGlobally && !isContractable(N))
11059     return SDValue();
11060
11061   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11062   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11063     return SDValue();
11064
11065   // Always prefer FMAD to FMA for precision.
11066   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11067   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11068
11069   // Is the node an FMUL and contractable either due to global flags or
11070   // SDNodeFlags.
11071   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11072     if (N.getOpcode() != ISD::FMUL)
11073       return false;
11074     return AllowFusionGlobally || isContractable(N.getNode());
11075   };
11076
11077   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11078   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11079     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11080                        N0.getOperand(0), N0.getOperand(1),
11081                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11082   }
11083
11084   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11085   // Note: Commutes FSUB operands.
11086   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11087     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11088                        DAG.getNode(ISD::FNEG, SL, VT,
11089                                    N1.getOperand(0)),
11090                        N1.getOperand(1), N0, Flags);
11091   }
11092
11093   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11094   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11095       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11096     SDValue N00 = N0.getOperand(0).getOperand(0);
11097     SDValue N01 = N0.getOperand(0).getOperand(1);
11098     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11099                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11100                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11101   }
11102
11103   // Look through FP_EXTEND nodes to do more combining.
11104
11105   // fold (fsub (fpext (fmul x, y)), z)
11106   //   -> (fma (fpext x), (fpext y), (fneg z))
11107   if (N0.getOpcode() == ISD::FP_EXTEND) {
11108     SDValue N00 = N0.getOperand(0);
11109     if (isContractableFMUL(N00) &&
11110         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11111       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11112                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11113                                      N00.getOperand(0)),
11114                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11115                                      N00.getOperand(1)),
11116                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11117     }
11118   }
11119
11120   // fold (fsub x, (fpext (fmul y, z)))
11121   //   -> (fma (fneg (fpext y)), (fpext z), x)
11122   // Note: Commutes FSUB operands.
11123   if (N1.getOpcode() == ISD::FP_EXTEND) {
11124     SDValue N10 = N1.getOperand(0);
11125     if (isContractableFMUL(N10) &&
11126         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11127       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11128                          DAG.getNode(ISD::FNEG, SL, VT,
11129                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
11130                                                  N10.getOperand(0))),
11131                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11132                                      N10.getOperand(1)),
11133                          N0, Flags);
11134     }
11135   }
11136
11137   // fold (fsub (fpext (fneg (fmul, x, y))), z)
11138   //   -> (fneg (fma (fpext x), (fpext y), z))
11139   // Note: This could be removed with appropriate canonicalization of the
11140   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11141   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11142   // from implementing the canonicalization in visitFSUB.
11143   if (N0.getOpcode() == ISD::FP_EXTEND) {
11144     SDValue N00 = N0.getOperand(0);
11145     if (N00.getOpcode() == ISD::FNEG) {
11146       SDValue N000 = N00.getOperand(0);
11147       if (isContractableFMUL(N000) &&
11148           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11149         return DAG.getNode(ISD::FNEG, SL, VT,
11150                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11151                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11152                                                    N000.getOperand(0)),
11153                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11154                                                    N000.getOperand(1)),
11155                                        N1, Flags));
11156       }
11157     }
11158   }
11159
11160   // fold (fsub (fneg (fpext (fmul, x, y))), z)
11161   //   -> (fneg (fma (fpext x)), (fpext y), z)
11162   // Note: This could be removed with appropriate canonicalization of the
11163   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11164   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11165   // from implementing the canonicalization in visitFSUB.
11166   if (N0.getOpcode() == ISD::FNEG) {
11167     SDValue N00 = N0.getOperand(0);
11168     if (N00.getOpcode() == ISD::FP_EXTEND) {
11169       SDValue N000 = N00.getOperand(0);
11170       if (isContractableFMUL(N000) &&
11171           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
11172         return DAG.getNode(ISD::FNEG, SL, VT,
11173                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11174                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11175                                                    N000.getOperand(0)),
11176                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11177                                                    N000.getOperand(1)),
11178                                        N1, Flags));
11179       }
11180     }
11181   }
11182
11183   // More folding opportunities when target permits.
11184   if (Aggressive) {
11185     // fold (fsub (fma x, y, (fmul u, v)), z)
11186     //   -> (fma x, y (fma u, v, (fneg z)))
11187     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
11188         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
11189         N0.getOperand(2)->hasOneUse()) {
11190       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11191                          N0.getOperand(0), N0.getOperand(1),
11192                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11193                                      N0.getOperand(2).getOperand(0),
11194                                      N0.getOperand(2).getOperand(1),
11195                                      DAG.getNode(ISD::FNEG, SL, VT,
11196                                                  N1), Flags), Flags);
11197     }
11198
11199     // fold (fsub x, (fma y, z, (fmul u, v)))
11200     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
11201     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
11202         isContractableFMUL(N1.getOperand(2))) {
11203       SDValue N20 = N1.getOperand(2).getOperand(0);
11204       SDValue N21 = N1.getOperand(2).getOperand(1);
11205       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11206                          DAG.getNode(ISD::FNEG, SL, VT,
11207                                      N1.getOperand(0)),
11208                          N1.getOperand(1),
11209                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11210                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
11211                                      N21, N0, Flags), Flags);
11212     }
11213
11214
11215     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11216     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11217     if (N0.getOpcode() == PreferredFusedOpcode) {
11218       SDValue N02 = N0.getOperand(2);
11219       if (N02.getOpcode() == ISD::FP_EXTEND) {
11220         SDValue N020 = N02.getOperand(0);
11221         if (isContractableFMUL(N020) &&
11222             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11223           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11224                              N0.getOperand(0), N0.getOperand(1),
11225                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11226                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11227                                                      N020.getOperand(0)),
11228                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11229                                                      N020.getOperand(1)),
11230                                          DAG.getNode(ISD::FNEG, SL, VT,
11231                                                      N1), Flags), Flags);
11232         }
11233       }
11234     }
11235
11236     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11237     //   -> (fma (fpext x), (fpext y),
11238     //           (fma (fpext u), (fpext v), (fneg z)))
11239     // FIXME: This turns two single-precision and one double-precision
11240     // operation into two double-precision operations, which might not be
11241     // interesting for all targets, especially GPUs.
11242     if (N0.getOpcode() == ISD::FP_EXTEND) {
11243       SDValue N00 = N0.getOperand(0);
11244       if (N00.getOpcode() == PreferredFusedOpcode) {
11245         SDValue N002 = N00.getOperand(2);
11246         if (isContractableFMUL(N002) &&
11247             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11248           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11249                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11250                                          N00.getOperand(0)),
11251                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11252                                          N00.getOperand(1)),
11253                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11254                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11255                                                      N002.getOperand(0)),
11256                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11257                                                      N002.getOperand(1)),
11258                                          DAG.getNode(ISD::FNEG, SL, VT,
11259                                                      N1), Flags), Flags);
11260         }
11261       }
11262     }
11263
11264     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11265     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11266     if (N1.getOpcode() == PreferredFusedOpcode &&
11267         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11268       SDValue N120 = N1.getOperand(2).getOperand(0);
11269       if (isContractableFMUL(N120) &&
11270           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11271         SDValue N1200 = N120.getOperand(0);
11272         SDValue N1201 = N120.getOperand(1);
11273         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11274                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11275                            N1.getOperand(1),
11276                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11277                                        DAG.getNode(ISD::FNEG, SL, VT,
11278                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11279                                                                VT, N1200)),
11280                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11281                                                    N1201),
11282                                        N0, Flags), Flags);
11283       }
11284     }
11285
11286     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11287     //   -> (fma (fneg (fpext y)), (fpext z),
11288     //           (fma (fneg (fpext u)), (fpext v), x))
11289     // FIXME: This turns two single-precision and one double-precision
11290     // operation into two double-precision operations, which might not be
11291     // interesting for all targets, especially GPUs.
11292     if (N1.getOpcode() == ISD::FP_EXTEND &&
11293         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11294       SDValue CvtSrc = N1.getOperand(0);
11295       SDValue N100 = CvtSrc.getOperand(0);
11296       SDValue N101 = CvtSrc.getOperand(1);
11297       SDValue N102 = CvtSrc.getOperand(2);
11298       if (isContractableFMUL(N102) &&
11299           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11300         SDValue N1020 = N102.getOperand(0);
11301         SDValue N1021 = N102.getOperand(1);
11302         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11303                            DAG.getNode(ISD::FNEG, SL, VT,
11304                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11305                                                    N100)),
11306                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11307                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11308                                        DAG.getNode(ISD::FNEG, SL, VT,
11309                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11310                                                                VT, N1020)),
11311                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11312                                                    N1021),
11313                                        N0, Flags), Flags);
11314       }
11315     }
11316   }
11317
11318   return SDValue();
11319 }
11320
11321 /// Try to perform FMA combining on a given FMUL node based on the distributive
11322 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11323 /// subtraction instead of addition).
11324 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11325   SDValue N0 = N->getOperand(0);
11326   SDValue N1 = N->getOperand(1);
11327   EVT VT = N->getValueType(0);
11328   SDLoc SL(N);
11329   const SDNodeFlags Flags = N->getFlags();
11330
11331   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11332
11333   const TargetOptions &Options = DAG.getTarget().Options;
11334
11335   // The transforms below are incorrect when x == 0 and y == inf, because the
11336   // intermediate multiplication produces a nan.
11337   if (!Options.NoInfsFPMath)
11338     return SDValue();
11339
11340   // Floating-point multiply-add without intermediate rounding.
11341   bool HasFMA =
11342       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11343       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11344       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11345
11346   // Floating-point multiply-add with intermediate rounding. This can result
11347   // in a less precise result due to the changed rounding order.
11348   bool HasFMAD = Options.UnsafeFPMath &&
11349                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11350
11351   // No valid opcode, do not combine.
11352   if (!HasFMAD && !HasFMA)
11353     return SDValue();
11354
11355   // Always prefer FMAD to FMA for precision.
11356   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11357   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11358
11359   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11360   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11361   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11362     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11363       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11364         if (C->isExactlyValue(+1.0))
11365           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11366                              Y, Flags);
11367         if (C->isExactlyValue(-1.0))
11368           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11369                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11370       }
11371     }
11372     return SDValue();
11373   };
11374
11375   if (SDValue FMA = FuseFADD(N0, N1, Flags))
11376     return FMA;
11377   if (SDValue FMA = FuseFADD(N1, N0, Flags))
11378     return FMA;
11379
11380   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11381   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11382   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11383   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11384   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11385     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11386       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11387         if (C0->isExactlyValue(+1.0))
11388           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11389                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11390                              Y, Flags);
11391         if (C0->isExactlyValue(-1.0))
11392           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11393                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11394                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11395       }
11396       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11397         if (C1->isExactlyValue(+1.0))
11398           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11399                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11400         if (C1->isExactlyValue(-1.0))
11401           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11402                              Y, Flags);
11403       }
11404     }
11405     return SDValue();
11406   };
11407
11408   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11409     return FMA;
11410   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11411     return FMA;
11412
11413   return SDValue();
11414 }
11415
11416 SDValue DAGCombiner::visitFADD(SDNode *N) {
11417   SDValue N0 = N->getOperand(0);
11418   SDValue N1 = N->getOperand(1);
11419   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11420   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11421   EVT VT = N->getValueType(0);
11422   SDLoc DL(N);
11423   const TargetOptions &Options = DAG.getTarget().Options;
11424   const SDNodeFlags Flags = N->getFlags();
11425
11426   // fold vector ops
11427   if (VT.isVector())
11428     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11429       return FoldedVOp;
11430
11431   // fold (fadd c1, c2) -> c1 + c2
11432   if (N0CFP && N1CFP)
11433     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11434
11435   // canonicalize constant to RHS
11436   if (N0CFP && !N1CFP)
11437     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11438
11439   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11440   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
11441   if (N1C && N1C->isZero())
11442     if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
11443       return N0;
11444
11445   if (SDValue NewSel = foldBinOpIntoSelect(N))
11446     return NewSel;
11447
11448   // fold (fadd A, (fneg B)) -> (fsub A, B)
11449   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11450       isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
11451     return DAG.getNode(ISD::FSUB, DL, VT, N0,
11452                        GetNegatedExpression(N1, DAG, LegalOperations,
11453                                             ForCodeSize), Flags);
11454
11455   // fold (fadd (fneg A), B) -> (fsub B, A)
11456   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11457       isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
11458     return DAG.getNode(ISD::FSUB, DL, VT, N1,
11459                        GetNegatedExpression(N0, DAG, LegalOperations,
11460                                             ForCodeSize), Flags);
11461
11462   auto isFMulNegTwo = [](SDValue FMul) {
11463     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
11464       return false;
11465     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
11466     return C && C->isExactlyValue(-2.0);
11467   };
11468
11469   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11470   if (isFMulNegTwo(N0)) {
11471     SDValue B = N0.getOperand(0);
11472     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11473     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
11474   }
11475   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11476   if (isFMulNegTwo(N1)) {
11477     SDValue B = N1.getOperand(0);
11478     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11479     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
11480   }
11481
11482   // No FP constant should be created after legalization as Instruction
11483   // Selection pass has a hard time dealing with FP constants.
11484   bool AllowNewConst = (Level < AfterLegalizeDAG);
11485
11486   // If 'unsafe math' or nnan is enabled, fold lots of things.
11487   if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
11488     // If allowed, fold (fadd (fneg x), x) -> 0.0
11489     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
11490       return DAG.getConstantFP(0.0, DL, VT);
11491
11492     // If allowed, fold (fadd x, (fneg x)) -> 0.0
11493     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
11494       return DAG.getConstantFP(0.0, DL, VT);
11495   }
11496
11497   // If 'unsafe math' or reassoc and nsz, fold lots of things.
11498   // TODO: break out portions of the transformations below for which Unsafe is
11499   //       considered and which do not require both nsz and reassoc
11500   if ((Options.UnsafeFPMath ||
11501        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
11502       AllowNewConst) {
11503     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11504     if (N1CFP && N0.getOpcode() == ISD::FADD &&
11505         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11506       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
11507       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
11508     }
11509
11510     // We can fold chains of FADD's of the same value into multiplications.
11511     // This transform is not safe in general because we are reducing the number
11512     // of rounding steps.
11513     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
11514       if (N0.getOpcode() == ISD::FMUL) {
11515         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11516         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
11517
11518         // (fadd (fmul x, c), x) -> (fmul x, c+1)
11519         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
11520           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11521                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11522           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
11523         }
11524
11525         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11526         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
11527             N1.getOperand(0) == N1.getOperand(1) &&
11528             N0.getOperand(0) == N1.getOperand(0)) {
11529           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11530                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11531           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
11532         }
11533       }
11534
11535       if (N1.getOpcode() == ISD::FMUL) {
11536         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11537         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
11538
11539         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
11540         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
11541           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11542                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11543           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
11544         }
11545
11546         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
11547         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
11548             N0.getOperand(0) == N0.getOperand(1) &&
11549             N1.getOperand(0) == N0.getOperand(0)) {
11550           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
11551                                        DAG.getConstantFP(2.0, DL, VT), Flags);
11552           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
11553         }
11554       }
11555
11556       if (N0.getOpcode() == ISD::FADD) {
11557         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11558         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
11559         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
11560             (N0.getOperand(0) == N1)) {
11561           return DAG.getNode(ISD::FMUL, DL, VT,
11562                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
11563         }
11564       }
11565
11566       if (N1.getOpcode() == ISD::FADD) {
11567         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
11568         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
11569         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
11570             N1.getOperand(0) == N0) {
11571           return DAG.getNode(ISD::FMUL, DL, VT,
11572                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
11573         }
11574       }
11575
11576       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
11577       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
11578           N0.getOperand(0) == N0.getOperand(1) &&
11579           N1.getOperand(0) == N1.getOperand(1) &&
11580           N0.getOperand(0) == N1.getOperand(0)) {
11581         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
11582                            DAG.getConstantFP(4.0, DL, VT), Flags);
11583       }
11584     }
11585   } // enable-unsafe-fp-math
11586
11587   // FADD -> FMA combines:
11588   if (SDValue Fused = visitFADDForFMACombine(N)) {
11589     AddToWorklist(Fused.getNode());
11590     return Fused;
11591   }
11592   return SDValue();
11593 }
11594
11595 SDValue DAGCombiner::visitFSUB(SDNode *N) {
11596   SDValue N0 = N->getOperand(0);
11597   SDValue N1 = N->getOperand(1);
11598   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11599   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11600   EVT VT = N->getValueType(0);
11601   SDLoc DL(N);
11602   const TargetOptions &Options = DAG.getTarget().Options;
11603   const SDNodeFlags Flags = N->getFlags();
11604
11605   // fold vector ops
11606   if (VT.isVector())
11607     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11608       return FoldedVOp;
11609
11610   // fold (fsub c1, c2) -> c1-c2
11611   if (N0CFP && N1CFP)
11612     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
11613
11614   if (SDValue NewSel = foldBinOpIntoSelect(N))
11615     return NewSel;
11616
11617   // (fsub A, 0) -> A
11618   if (N1CFP && N1CFP->isZero()) {
11619     if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
11620         Flags.hasNoSignedZeros()) {
11621       return N0;
11622     }
11623   }
11624
11625   if (N0 == N1) {
11626     // (fsub x, x) -> 0.0
11627     if (Options.UnsafeFPMath || Flags.hasNoNaNs())
11628       return DAG.getConstantFP(0.0f, DL, VT);
11629   }
11630
11631   // (fsub -0.0, N1) -> -N1
11632   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
11633   //       FSUB does not specify the sign bit of a NaN. Also note that for
11634   //       the same reason, the inverse transform is not safe, unless fast math
11635   //       flags are in play.
11636   if (N0CFP && N0CFP->isZero()) {
11637     if (N0CFP->isNegative() ||
11638         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
11639       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
11640         return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
11641       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11642         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
11643     }
11644   }
11645
11646   if ((Options.UnsafeFPMath ||
11647       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
11648       && N1.getOpcode() == ISD::FADD) {
11649     // X - (X + Y) -> -Y
11650     if (N0 == N1->getOperand(0))
11651       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
11652     // X - (Y + X) -> -Y
11653     if (N0 == N1->getOperand(1))
11654       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
11655   }
11656
11657   // fold (fsub A, (fneg B)) -> (fadd A, B)
11658   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
11659     return DAG.getNode(ISD::FADD, DL, VT, N0,
11660                        GetNegatedExpression(N1, DAG, LegalOperations,
11661                                             ForCodeSize), Flags);
11662
11663   // FSUB -> FMA combines:
11664   if (SDValue Fused = visitFSUBForFMACombine(N)) {
11665     AddToWorklist(Fused.getNode());
11666     return Fused;
11667   }
11668
11669   return SDValue();
11670 }
11671
11672 SDValue DAGCombiner::visitFMUL(SDNode *N) {
11673   SDValue N0 = N->getOperand(0);
11674   SDValue N1 = N->getOperand(1);
11675   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
11676   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
11677   EVT VT = N->getValueType(0);
11678   SDLoc DL(N);
11679   const TargetOptions &Options = DAG.getTarget().Options;
11680   const SDNodeFlags Flags = N->getFlags();
11681
11682   // fold vector ops
11683   if (VT.isVector()) {
11684     // This just handles C1 * C2 for vectors. Other vector folds are below.
11685     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11686       return FoldedVOp;
11687   }
11688
11689   // fold (fmul c1, c2) -> c1*c2
11690   if (N0CFP && N1CFP)
11691     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
11692
11693   // canonicalize constant to RHS
11694   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11695      !isConstantFPBuildVectorOrConstantFP(N1))
11696     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
11697
11698   // fold (fmul A, 1.0) -> A
11699   if (N1CFP && N1CFP->isExactlyValue(1.0))
11700     return N0;
11701
11702   if (SDValue NewSel = foldBinOpIntoSelect(N))
11703     return NewSel;
11704
11705   if (Options.UnsafeFPMath ||
11706       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
11707     // fold (fmul A, 0) -> 0
11708     if (N1CFP && N1CFP->isZero())
11709       return N1;
11710   }
11711
11712   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
11713     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
11714     if (isConstantFPBuildVectorOrConstantFP(N1) &&
11715         N0.getOpcode() == ISD::FMUL) {
11716       SDValue N00 = N0.getOperand(0);
11717       SDValue N01 = N0.getOperand(1);
11718       // Avoid an infinite loop by making sure that N00 is not a constant
11719       // (the inner multiply has not been constant folded yet).
11720       if (isConstantFPBuildVectorOrConstantFP(N01) &&
11721           !isConstantFPBuildVectorOrConstantFP(N00)) {
11722         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
11723         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
11724       }
11725     }
11726
11727     // Match a special-case: we convert X * 2.0 into fadd.
11728     // fmul (fadd X, X), C -> fmul X, 2.0 * C
11729     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
11730         N0.getOperand(0) == N0.getOperand(1)) {
11731       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
11732       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
11733       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
11734     }
11735   }
11736
11737   // fold (fmul X, 2.0) -> (fadd X, X)
11738   if (N1CFP && N1CFP->isExactlyValue(+2.0))
11739     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
11740
11741   // fold (fmul X, -1.0) -> (fneg X)
11742   if (N1CFP && N1CFP->isExactlyValue(-1.0))
11743     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
11744       return DAG.getNode(ISD::FNEG, DL, VT, N0);
11745
11746   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
11747   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
11748                                        ForCodeSize)) {
11749     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
11750                                          ForCodeSize)) {
11751       // Both can be negated for free, check to see if at least one is cheaper
11752       // negated.
11753       if (LHSNeg == 2 || RHSNeg == 2)
11754         return DAG.getNode(ISD::FMUL, DL, VT,
11755                            GetNegatedExpression(N0, DAG, LegalOperations,
11756                                                 ForCodeSize),
11757                            GetNegatedExpression(N1, DAG, LegalOperations,
11758                                                 ForCodeSize),
11759                            Flags);
11760     }
11761   }
11762
11763   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
11764   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
11765   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
11766       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
11767       TLI.isOperationLegal(ISD::FABS, VT)) {
11768     SDValue Select = N0, X = N1;
11769     if (Select.getOpcode() != ISD::SELECT)
11770       std::swap(Select, X);
11771
11772     SDValue Cond = Select.getOperand(0);
11773     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
11774     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
11775
11776     if (TrueOpnd && FalseOpnd &&
11777         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
11778         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
11779         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
11780       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
11781       switch (CC) {
11782       default: break;
11783       case ISD::SETOLT:
11784       case ISD::SETULT:
11785       case ISD::SETOLE:
11786       case ISD::SETULE:
11787       case ISD::SETLT:
11788       case ISD::SETLE:
11789         std::swap(TrueOpnd, FalseOpnd);
11790         LLVM_FALLTHROUGH;
11791       case ISD::SETOGT:
11792       case ISD::SETUGT:
11793       case ISD::SETOGE:
11794       case ISD::SETUGE:
11795       case ISD::SETGT:
11796       case ISD::SETGE:
11797         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
11798             TLI.isOperationLegal(ISD::FNEG, VT))
11799           return DAG.getNode(ISD::FNEG, DL, VT,
11800                    DAG.getNode(ISD::FABS, DL, VT, X));
11801         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
11802           return DAG.getNode(ISD::FABS, DL, VT, X);
11803
11804         break;
11805       }
11806     }
11807   }
11808
11809   // FMUL -> FMA combines:
11810   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
11811     AddToWorklist(Fused.getNode());
11812     return Fused;
11813   }
11814
11815   return SDValue();
11816 }
11817
11818 SDValue DAGCombiner::visitFMA(SDNode *N) {
11819   SDValue N0 = N->getOperand(0);
11820   SDValue N1 = N->getOperand(1);
11821   SDValue N2 = N->getOperand(2);
11822   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
11823   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
11824   EVT VT = N->getValueType(0);
11825   SDLoc DL(N);
11826   const TargetOptions &Options = DAG.getTarget().Options;
11827
11828   // FMA nodes have flags that propagate to the created nodes.
11829   const SDNodeFlags Flags = N->getFlags();
11830   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
11831
11832   // Constant fold FMA.
11833   if (isa<ConstantFPSDNode>(N0) &&
11834       isa<ConstantFPSDNode>(N1) &&
11835       isa<ConstantFPSDNode>(N2)) {
11836     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
11837   }
11838
11839   if (UnsafeFPMath) {
11840     if (N0CFP && N0CFP->isZero())
11841       return N2;
11842     if (N1CFP && N1CFP->isZero())
11843       return N2;
11844   }
11845   // TODO: The FMA node should have flags that propagate to these nodes.
11846   if (N0CFP && N0CFP->isExactlyValue(1.0))
11847     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
11848   if (N1CFP && N1CFP->isExactlyValue(1.0))
11849     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
11850
11851   // Canonicalize (fma c, x, y) -> (fma x, c, y)
11852   if (isConstantFPBuildVectorOrConstantFP(N0) &&
11853      !isConstantFPBuildVectorOrConstantFP(N1))
11854     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
11855
11856   if (UnsafeFPMath) {
11857     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
11858     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
11859         isConstantFPBuildVectorOrConstantFP(N1) &&
11860         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
11861       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11862                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
11863                                      Flags), Flags);
11864     }
11865
11866     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
11867     if (N0.getOpcode() == ISD::FMUL &&
11868         isConstantFPBuildVectorOrConstantFP(N1) &&
11869         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11870       return DAG.getNode(ISD::FMA, DL, VT,
11871                          N0.getOperand(0),
11872                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
11873                                      Flags),
11874                          N2);
11875     }
11876   }
11877
11878   // (fma x, 1, y) -> (fadd x, y)
11879   // (fma x, -1, y) -> (fadd (fneg x), y)
11880   if (N1CFP) {
11881     if (N1CFP->isExactlyValue(1.0))
11882       // TODO: The FMA node should have flags that propagate to this node.
11883       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
11884
11885     if (N1CFP->isExactlyValue(-1.0) &&
11886         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
11887       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
11888       AddToWorklist(RHSNeg.getNode());
11889       // TODO: The FMA node should have flags that propagate to this node.
11890       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
11891     }
11892
11893     // fma (fneg x), K, y -> fma x -K, y
11894     if (N0.getOpcode() == ISD::FNEG &&
11895         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
11896          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
11897                                               ForCodeSize)))) {
11898       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
11899                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
11900     }
11901   }
11902
11903   if (UnsafeFPMath) {
11904     // (fma x, c, x) -> (fmul x, (c+1))
11905     if (N1CFP && N0 == N2) {
11906       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11907                          DAG.getNode(ISD::FADD, DL, VT, N1,
11908                                      DAG.getConstantFP(1.0, DL, VT), Flags),
11909                          Flags);
11910     }
11911
11912     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
11913     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
11914       return DAG.getNode(ISD::FMUL, DL, VT, N0,
11915                          DAG.getNode(ISD::FADD, DL, VT, N1,
11916                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
11917                          Flags);
11918     }
11919   }
11920
11921   return SDValue();
11922 }
11923
11924 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11925 // reciprocal.
11926 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
11927 // Notice that this is not always beneficial. One reason is different targets
11928 // may have different costs for FDIV and FMUL, so sometimes the cost of two
11929 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
11930 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
11931 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
11932   // TODO: Limit this transform based on optsize/minsize - it always creates at
11933   //       least 1 extra instruction. But the perf win may be substantial enough
11934   //       that only minsize should restrict this.
11935   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
11936   const SDNodeFlags Flags = N->getFlags();
11937   if (!UnsafeMath && !Flags.hasAllowReciprocal())
11938     return SDValue();
11939
11940   // Skip if current node is a reciprocal.
11941   SDValue N0 = N->getOperand(0);
11942   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
11943   if (N0CFP && N0CFP->isExactlyValue(1.0))
11944     return SDValue();
11945
11946   // Exit early if the target does not want this transform or if there can't
11947   // possibly be enough uses of the divisor to make the transform worthwhile.
11948   SDValue N1 = N->getOperand(1);
11949   unsigned MinUses = TLI.combineRepeatedFPDivisors();
11950
11951   // For splat vectors, scale the number of uses by the splat factor. If we can
11952   // convert the division into a scalar op, that will likely be much faster.
11953   unsigned NumElts = 1;
11954   EVT VT = N->getValueType(0);
11955   if (VT.isVector() && DAG.isSplatValue(N1))
11956     NumElts = VT.getVectorNumElements();
11957
11958   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
11959     return SDValue();
11960
11961   // Find all FDIV users of the same divisor.
11962   // Use a set because duplicates may be present in the user list.
11963   SetVector<SDNode *> Users;
11964   for (auto *U : N1->uses()) {
11965     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
11966       // This division is eligible for optimization only if global unsafe math
11967       // is enabled or if this division allows reciprocal formation.
11968       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
11969         Users.insert(U);
11970     }
11971   }
11972
11973   // Now that we have the actual number of divisor uses, make sure it meets
11974   // the minimum threshold specified by the target.
11975   if ((Users.size() * NumElts) < MinUses)
11976     return SDValue();
11977
11978   SDLoc DL(N);
11979   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
11980   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
11981
11982   // Dividend / Divisor -> Dividend * Reciprocal
11983   for (auto *U : Users) {
11984     SDValue Dividend = U->getOperand(0);
11985     if (Dividend != FPOne) {
11986       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
11987                                     Reciprocal, Flags);
11988       CombineTo(U, NewNode);
11989     } else if (U != Reciprocal.getNode()) {
11990       // In the absence of fast-math-flags, this user node is always the
11991       // same node as Reciprocal, but with FMF they may be different nodes.
11992       CombineTo(U, Reciprocal);
11993     }
11994   }
11995   return SDValue(N, 0);  // N was replaced.
11996 }
11997
11998 SDValue DAGCombiner::visitFDIV(SDNode *N) {
11999   SDValue N0 = N->getOperand(0);
12000   SDValue N1 = N->getOperand(1);
12001   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12002   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12003   EVT VT = N->getValueType(0);
12004   SDLoc DL(N);
12005   const TargetOptions &Options = DAG.getTarget().Options;
12006   SDNodeFlags Flags = N->getFlags();
12007
12008   // fold vector ops
12009   if (VT.isVector())
12010     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12011       return FoldedVOp;
12012
12013   // fold (fdiv c1, c2) -> c1/c2
12014   if (N0CFP && N1CFP)
12015     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12016
12017   if (SDValue NewSel = foldBinOpIntoSelect(N))
12018     return NewSel;
12019
12020   if (SDValue V = combineRepeatedFPDivisors(N))
12021     return V;
12022
12023   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12024     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12025     if (N1CFP) {
12026       // Compute the reciprocal 1.0 / c2.
12027       const APFloat &N1APF = N1CFP->getValueAPF();
12028       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12029       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12030       // Only do the transform if the reciprocal is a legal fp immediate that
12031       // isn't too nasty (eg NaN, denormal, ...).
12032       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12033           (!LegalOperations ||
12034            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12035            // backend)... we should handle this gracefully after Legalize.
12036            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12037            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12038            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12039         return DAG.getNode(ISD::FMUL, DL, VT, N0,
12040                            DAG.getConstantFP(Recip, DL, VT), Flags);
12041     }
12042
12043     // If this FDIV is part of a reciprocal square root, it may be folded
12044     // into a target-specific square root estimate instruction.
12045     if (N1.getOpcode() == ISD::FSQRT) {
12046       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
12047         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12048       }
12049     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12050                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12051       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12052                                           Flags)) {
12053         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12054         AddToWorklist(RV.getNode());
12055         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12056       }
12057     } else if (N1.getOpcode() == ISD::FP_ROUND &&
12058                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12059       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12060                                           Flags)) {
12061         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12062         AddToWorklist(RV.getNode());
12063         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12064       }
12065     } else if (N1.getOpcode() == ISD::FMUL) {
12066       // Look through an FMUL. Even though this won't remove the FDIV directly,
12067       // it's still worthwhile to get rid of the FSQRT if possible.
12068       SDValue SqrtOp;
12069       SDValue OtherOp;
12070       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12071         SqrtOp = N1.getOperand(0);
12072         OtherOp = N1.getOperand(1);
12073       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12074         SqrtOp = N1.getOperand(1);
12075         OtherOp = N1.getOperand(0);
12076       }
12077       if (SqrtOp.getNode()) {
12078         // We found a FSQRT, so try to make this fold:
12079         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12080         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12081           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12082           AddToWorklist(RV.getNode());
12083           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12084         }
12085       }
12086     }
12087
12088     // Fold into a reciprocal estimate and multiply instead of a real divide.
12089     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
12090       AddToWorklist(RV.getNode());
12091       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12092     }
12093   }
12094
12095   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12096   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12097                                        ForCodeSize)) {
12098     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12099                                          ForCodeSize)) {
12100       // Both can be negated for free, check to see if at least one is cheaper
12101       // negated.
12102       if (LHSNeg == 2 || RHSNeg == 2)
12103         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
12104                            GetNegatedExpression(N0, DAG, LegalOperations,
12105                                                 ForCodeSize),
12106                            GetNegatedExpression(N1, DAG, LegalOperations,
12107                                                 ForCodeSize),
12108                            Flags);
12109     }
12110   }
12111
12112   return SDValue();
12113 }
12114
12115 SDValue DAGCombiner::visitFREM(SDNode *N) {
12116   SDValue N0 = N->getOperand(0);
12117   SDValue N1 = N->getOperand(1);
12118   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12119   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12120   EVT VT = N->getValueType(0);
12121
12122   // fold (frem c1, c2) -> fmod(c1,c2)
12123   if (N0CFP && N1CFP)
12124     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12125
12126   if (SDValue NewSel = foldBinOpIntoSelect(N))
12127     return NewSel;
12128
12129   return SDValue();
12130 }
12131
12132 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12133   SDNodeFlags Flags = N->getFlags();
12134   if (!DAG.getTarget().Options.UnsafeFPMath &&
12135       !Flags.hasApproximateFuncs())
12136     return SDValue();
12137
12138   SDValue N0 = N->getOperand(0);
12139   if (TLI.isFsqrtCheap(N0, DAG))
12140     return SDValue();
12141
12142   // FSQRT nodes have flags that propagate to the created nodes.
12143   return buildSqrtEstimate(N0, Flags);
12144 }
12145
12146 /// copysign(x, fp_extend(y)) -> copysign(x, y)
12147 /// copysign(x, fp_round(y)) -> copysign(x, y)
12148 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
12149   SDValue N1 = N->getOperand(1);
12150   if ((N1.getOpcode() == ISD::FP_EXTEND ||
12151        N1.getOpcode() == ISD::FP_ROUND)) {
12152     // Do not optimize out type conversion of f128 type yet.
12153     // For some targets like x86_64, configuration is changed to keep one f128
12154     // value in one SSE register, but instruction selection cannot handle
12155     // FCOPYSIGN on SSE registers yet.
12156     EVT N1VT = N1->getValueType(0);
12157     EVT N1Op0VT = N1->getOperand(0).getValueType();
12158     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12159   }
12160   return false;
12161 }
12162
12163 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12164   SDValue N0 = N->getOperand(0);
12165   SDValue N1 = N->getOperand(1);
12166   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12167   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12168   EVT VT = N->getValueType(0);
12169
12170   if (N0CFP && N1CFP) // Constant fold
12171     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12172
12173   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12174     const APFloat &V = N1C->getValueAPF();
12175     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
12176     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12177     if (!V.isNegative()) {
12178       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
12179         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12180     } else {
12181       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12182         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12183                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12184     }
12185   }
12186
12187   // copysign(fabs(x), y) -> copysign(x, y)
12188   // copysign(fneg(x), y) -> copysign(x, y)
12189   // copysign(copysign(x,z), y) -> copysign(x, y)
12190   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
12191       N0.getOpcode() == ISD::FCOPYSIGN)
12192     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12193
12194   // copysign(x, abs(y)) -> abs(x)
12195   if (N1.getOpcode() == ISD::FABS)
12196     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12197
12198   // copysign(x, copysign(y,z)) -> copysign(x, z)
12199   if (N1.getOpcode() == ISD::FCOPYSIGN)
12200     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12201
12202   // copysign(x, fp_extend(y)) -> copysign(x, y)
12203   // copysign(x, fp_round(y)) -> copysign(x, y)
12204   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
12205     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12206
12207   return SDValue();
12208 }
12209
12210 SDValue DAGCombiner::visitFPOW(SDNode *N) {
12211   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12212   if (!ExponentC)
12213     return SDValue();
12214
12215   // Try to convert x ** (1/3) into cube root.
12216   // TODO: Handle the various flavors of long double.
12217   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12218   //       Some range near 1/3 should be fine.
12219   EVT VT = N->getValueType(0);
12220   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
12221       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
12222     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12223     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12224     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
12225     // For regular numbers, rounding may cause the results to differ.
12226     // Therefore, we require { nsz ninf nnan afn } for this transform.
12227     // TODO: We could select out the special cases if we don't have nsz/ninf.
12228     SDNodeFlags Flags = N->getFlags();
12229     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
12230         !Flags.hasApproximateFuncs())
12231       return SDValue();
12232
12233     // Do not create a cbrt() libcall if the target does not have it, and do not
12234     // turn a pow that has lowering support into a cbrt() libcall.
12235     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12236         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12237          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
12238       return SDValue();
12239
12240     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12241   }
12242
12243   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12244   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12245   // TODO: This could be extended (using a target hook) to handle smaller
12246   // power-of-2 fractional exponents.
12247   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12248   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12249   if (ExponentIs025 || ExponentIs075) {
12250     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12251     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
12252     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12253     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
12254     // For regular numbers, rounding may cause the results to differ.
12255     // Therefore, we require { nsz ninf afn } for this transform.
12256     // TODO: We could select out the special cases if we don't have nsz/ninf.
12257     SDNodeFlags Flags = N->getFlags();
12258
12259     // We only need no signed zeros for the 0.25 case.
12260     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12261         !Flags.hasApproximateFuncs())
12262       return SDValue();
12263
12264     // Don't double the number of libcalls. We are trying to inline fast code.
12265     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12266       return SDValue();
12267
12268     // Assume that libcalls are the smallest code.
12269     // TODO: This restriction should probably be lifted for vectors.
12270     if (DAG.getMachineFunction().getFunction().hasOptSize())
12271       return SDValue();
12272
12273     // pow(X, 0.25) --> sqrt(sqrt(X))
12274     SDLoc DL(N);
12275     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12276     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12277     if (ExponentIs025)
12278       return SqrtSqrt;
12279     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12280     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12281   }
12282
12283   return SDValue();
12284 }
12285
12286 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12287                                const TargetLowering &TLI) {
12288   // This optimization is guarded by a function attribute because it may produce
12289   // unexpected results. Ie, programs may be relying on the platform-specific
12290   // undefined behavior when the float-to-int conversion overflows.
12291   const Function &F = DAG.getMachineFunction().getFunction();
12292   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12293   if (StrictOverflow.getValueAsString().equals("false"))
12294     return SDValue();
12295
12296   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12297   // replacing casts with a libcall. We also must be allowed to ignore -0.0
12298   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12299   // conversions would return +0.0.
12300   // FIXME: We should be able to use node-level FMF here.
12301   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12302   EVT VT = N->getValueType(0);
12303   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12304       !DAG.getTarget().Options.NoSignedZerosFPMath)
12305     return SDValue();
12306
12307   // fptosi/fptoui round towards zero, so converting from FP to integer and
12308   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12309   SDValue N0 = N->getOperand(0);
12310   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12311       N0.getOperand(0).getValueType() == VT)
12312     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12313
12314   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12315       N0.getOperand(0).getValueType() == VT)
12316     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12317
12318   return SDValue();
12319 }
12320
12321 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12322   SDValue N0 = N->getOperand(0);
12323   EVT VT = N->getValueType(0);
12324   EVT OpVT = N0.getValueType();
12325
12326   // fold (sint_to_fp c1) -> c1fp
12327   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12328       // ...but only if the target supports immediate floating-point values
12329       (!LegalOperations ||
12330        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12331     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12332
12333   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12334   // but UINT_TO_FP is legal on this target, try to convert.
12335   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12336       hasOperation(ISD::UINT_TO_FP, OpVT)) {
12337     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12338     if (DAG.SignBitIsZero(N0))
12339       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12340   }
12341
12342   // The next optimizations are desirable only if SELECT_CC can be lowered.
12343   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12344     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12345     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12346         !VT.isVector() &&
12347         (!LegalOperations ||
12348          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12349       SDLoc DL(N);
12350       SDValue Ops[] =
12351         { N0.getOperand(0), N0.getOperand(1),
12352           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12353           N0.getOperand(2) };
12354       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12355     }
12356
12357     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12358     //      (select_cc x, y, 1.0, 0.0,, cc)
12359     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12360         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12361         (!LegalOperations ||
12362          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12363       SDLoc DL(N);
12364       SDValue Ops[] =
12365         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12366           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12367           N0.getOperand(0).getOperand(2) };
12368       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12369     }
12370   }
12371
12372   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12373     return FTrunc;
12374
12375   return SDValue();
12376 }
12377
12378 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12379   SDValue N0 = N->getOperand(0);
12380   EVT VT = N->getValueType(0);
12381   EVT OpVT = N0.getValueType();
12382
12383   // fold (uint_to_fp c1) -> c1fp
12384   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12385       // ...but only if the target supports immediate floating-point values
12386       (!LegalOperations ||
12387        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12388     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12389
12390   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12391   // but SINT_TO_FP is legal on this target, try to convert.
12392   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12393       hasOperation(ISD::SINT_TO_FP, OpVT)) {
12394     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12395     if (DAG.SignBitIsZero(N0))
12396       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12397   }
12398
12399   // The next optimizations are desirable only if SELECT_CC can be lowered.
12400   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12401     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12402     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12403         (!LegalOperations ||
12404          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12405       SDLoc DL(N);
12406       SDValue Ops[] =
12407         { N0.getOperand(0), N0.getOperand(1),
12408           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12409           N0.getOperand(2) };
12410       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12411     }
12412   }
12413
12414   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12415     return FTrunc;
12416
12417   return SDValue();
12418 }
12419
12420 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12421 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12422   SDValue N0 = N->getOperand(0);
12423   EVT VT = N->getValueType(0);
12424
12425   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12426     return SDValue();
12427
12428   SDValue Src = N0.getOperand(0);
12429   EVT SrcVT = Src.getValueType();
12430   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12431   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12432
12433   // We can safely assume the conversion won't overflow the output range,
12434   // because (for example) (uint8_t)18293.f is undefined behavior.
12435
12436   // Since we can assume the conversion won't overflow, our decision as to
12437   // whether the input will fit in the float should depend on the minimum
12438   // of the input range and output range.
12439
12440   // This means this is also safe for a signed input and unsigned output, since
12441   // a negative input would lead to undefined behavior.
12442   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
12443   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
12444   unsigned ActualSize = std::min(InputSize, OutputSize);
12445   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
12446
12447   // We can only fold away the float conversion if the input range can be
12448   // represented exactly in the float range.
12449   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
12450     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
12451       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
12452                                                        : ISD::ZERO_EXTEND;
12453       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
12454     }
12455     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
12456       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
12457     return DAG.getBitcast(VT, Src);
12458   }
12459   return SDValue();
12460 }
12461
12462 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
12463   SDValue N0 = N->getOperand(0);
12464   EVT VT = N->getValueType(0);
12465
12466   // fold (fp_to_sint c1fp) -> c1
12467   if (isConstantFPBuildVectorOrConstantFP(N0))
12468     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
12469
12470   return FoldIntToFPToInt(N, DAG);
12471 }
12472
12473 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
12474   SDValue N0 = N->getOperand(0);
12475   EVT VT = N->getValueType(0);
12476
12477   // fold (fp_to_uint c1fp) -> c1
12478   if (isConstantFPBuildVectorOrConstantFP(N0))
12479     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
12480
12481   return FoldIntToFPToInt(N, DAG);
12482 }
12483
12484 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
12485   SDValue N0 = N->getOperand(0);
12486   SDValue N1 = N->getOperand(1);
12487   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12488   EVT VT = N->getValueType(0);
12489
12490   // fold (fp_round c1fp) -> c1fp
12491   if (N0CFP)
12492     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
12493
12494   // fold (fp_round (fp_extend x)) -> x
12495   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
12496     return N0.getOperand(0);
12497
12498   // fold (fp_round (fp_round x)) -> (fp_round x)
12499   if (N0.getOpcode() == ISD::FP_ROUND) {
12500     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
12501     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
12502
12503     // Skip this folding if it results in an fp_round from f80 to f16.
12504     //
12505     // f80 to f16 always generates an expensive (and as yet, unimplemented)
12506     // libcall to __truncxfhf2 instead of selecting native f16 conversion
12507     // instructions from f32 or f64.  Moreover, the first (value-preserving)
12508     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12509     // x86.
12510     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
12511       return SDValue();
12512
12513     // If the first fp_round isn't a value preserving truncation, it might
12514     // introduce a tie in the second fp_round, that wouldn't occur in the
12515     // single-step fp_round we want to fold to.
12516     // In other words, double rounding isn't the same as rounding.
12517     // Also, this is a value preserving truncation iff both fp_round's are.
12518     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
12519       SDLoc DL(N);
12520       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
12521                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
12522     }
12523   }
12524
12525   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
12526   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
12527     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
12528                               N0.getOperand(0), N1);
12529     AddToWorklist(Tmp.getNode());
12530     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
12531                        Tmp, N0.getOperand(1));
12532   }
12533
12534   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12535     return NewVSel;
12536
12537   return SDValue();
12538 }
12539
12540 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
12541   SDValue N0 = N->getOperand(0);
12542   EVT VT = N->getValueType(0);
12543   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
12544   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12545
12546   // fold (fp_round_inreg c1fp) -> c1fp
12547   if (N0CFP && isTypeLegal(EVT)) {
12548     SDLoc DL(N);
12549     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
12550     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
12551   }
12552
12553   return SDValue();
12554 }
12555
12556 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
12557   SDValue N0 = N->getOperand(0);
12558   EVT VT = N->getValueType(0);
12559
12560   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
12561   if (N->hasOneUse() &&
12562       N->use_begin()->getOpcode() == ISD::FP_ROUND)
12563     return SDValue();
12564
12565   // fold (fp_extend c1fp) -> c1fp
12566   if (isConstantFPBuildVectorOrConstantFP(N0))
12567     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
12568
12569   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
12570   if (N0.getOpcode() == ISD::FP16_TO_FP &&
12571       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
12572     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
12573
12574   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
12575   // value of X.
12576   if (N0.getOpcode() == ISD::FP_ROUND
12577       && N0.getConstantOperandVal(1) == 1) {
12578     SDValue In = N0.getOperand(0);
12579     if (In.getValueType() == VT) return In;
12580     if (VT.bitsLT(In.getValueType()))
12581       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
12582                          In, N0.getOperand(1));
12583     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
12584   }
12585
12586   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
12587   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
12588        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
12589     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
12590     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
12591                                      LN0->getChain(),
12592                                      LN0->getBasePtr(), N0.getValueType(),
12593                                      LN0->getMemOperand());
12594     CombineTo(N, ExtLoad);
12595     CombineTo(N0.getNode(),
12596               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
12597                           N0.getValueType(), ExtLoad,
12598                           DAG.getIntPtrConstant(1, SDLoc(N0))),
12599               ExtLoad.getValue(1));
12600     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
12601   }
12602
12603   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
12604     return NewVSel;
12605
12606   return SDValue();
12607 }
12608
12609 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
12610   SDValue N0 = N->getOperand(0);
12611   EVT VT = N->getValueType(0);
12612
12613   // fold (fceil c1) -> fceil(c1)
12614   if (isConstantFPBuildVectorOrConstantFP(N0))
12615     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
12616
12617   return SDValue();
12618 }
12619
12620 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
12621   SDValue N0 = N->getOperand(0);
12622   EVT VT = N->getValueType(0);
12623
12624   // fold (ftrunc c1) -> ftrunc(c1)
12625   if (isConstantFPBuildVectorOrConstantFP(N0))
12626     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
12627
12628   // fold ftrunc (known rounded int x) -> x
12629   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
12630   // likely to be generated to extract integer from a rounded floating value.
12631   switch (N0.getOpcode()) {
12632   default: break;
12633   case ISD::FRINT:
12634   case ISD::FTRUNC:
12635   case ISD::FNEARBYINT:
12636   case ISD::FFLOOR:
12637   case ISD::FCEIL:
12638     return N0;
12639   }
12640
12641   return SDValue();
12642 }
12643
12644 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
12645   SDValue N0 = N->getOperand(0);
12646   EVT VT = N->getValueType(0);
12647
12648   // fold (ffloor c1) -> ffloor(c1)
12649   if (isConstantFPBuildVectorOrConstantFP(N0))
12650     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
12651
12652   return SDValue();
12653 }
12654
12655 // FIXME: FNEG and FABS have a lot in common; refactor.
12656 SDValue DAGCombiner::visitFNEG(SDNode *N) {
12657   SDValue N0 = N->getOperand(0);
12658   EVT VT = N->getValueType(0);
12659
12660   // Constant fold FNEG.
12661   if (isConstantFPBuildVectorOrConstantFP(N0))
12662     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
12663
12664   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
12665                          &DAG.getTarget().Options, ForCodeSize))
12666     return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12667
12668   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
12669   // constant pool values.
12670   if (!TLI.isFNegFree(VT) &&
12671       N0.getOpcode() == ISD::BITCAST &&
12672       N0.getNode()->hasOneUse()) {
12673     SDValue Int = N0.getOperand(0);
12674     EVT IntVT = Int.getValueType();
12675     if (IntVT.isInteger() && !IntVT.isVector()) {
12676       APInt SignMask;
12677       if (N0.getValueType().isVector()) {
12678         // For a vector, get a mask such as 0x80... per scalar element
12679         // and splat it.
12680         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
12681         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12682       } else {
12683         // For a scalar, just generate 0x80...
12684         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
12685       }
12686       SDLoc DL0(N0);
12687       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
12688                         DAG.getConstant(SignMask, DL0, IntVT));
12689       AddToWorklist(Int.getNode());
12690       return DAG.getBitcast(VT, Int);
12691     }
12692   }
12693
12694   // (fneg (fmul c, x)) -> (fmul -c, x)
12695   if (N0.getOpcode() == ISD::FMUL &&
12696       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
12697     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
12698     if (CFP1) {
12699       APFloat CVal = CFP1->getValueAPF();
12700       CVal.changeSign();
12701       if (Level >= AfterLegalizeDAG &&
12702           (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
12703            TLI.isOperationLegal(ISD::ConstantFP, VT)))
12704         return DAG.getNode(
12705             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
12706             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
12707             N0->getFlags());
12708     }
12709   }
12710
12711   return SDValue();
12712 }
12713
12714 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
12715                             APFloat (*Op)(const APFloat &, const APFloat &)) {
12716   SDValue N0 = N->getOperand(0);
12717   SDValue N1 = N->getOperand(1);
12718   EVT VT = N->getValueType(0);
12719   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
12720   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
12721
12722   if (N0CFP && N1CFP) {
12723     const APFloat &C0 = N0CFP->getValueAPF();
12724     const APFloat &C1 = N1CFP->getValueAPF();
12725     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
12726   }
12727
12728   // Canonicalize to constant on RHS.
12729   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12730       !isConstantFPBuildVectorOrConstantFP(N1))
12731     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
12732
12733   return SDValue();
12734 }
12735
12736 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
12737   return visitFMinMax(DAG, N, minnum);
12738 }
12739
12740 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
12741   return visitFMinMax(DAG, N, maxnum);
12742 }
12743
12744 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
12745   return visitFMinMax(DAG, N, minimum);
12746 }
12747
12748 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
12749   return visitFMinMax(DAG, N, maximum);
12750 }
12751
12752 SDValue DAGCombiner::visitFABS(SDNode *N) {
12753   SDValue N0 = N->getOperand(0);
12754   EVT VT = N->getValueType(0);
12755
12756   // fold (fabs c1) -> fabs(c1)
12757   if (isConstantFPBuildVectorOrConstantFP(N0))
12758     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12759
12760   // fold (fabs (fabs x)) -> (fabs x)
12761   if (N0.getOpcode() == ISD::FABS)
12762     return N->getOperand(0);
12763
12764   // fold (fabs (fneg x)) -> (fabs x)
12765   // fold (fabs (fcopysign x, y)) -> (fabs x)
12766   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
12767     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
12768
12769   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
12770   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
12771     SDValue Int = N0.getOperand(0);
12772     EVT IntVT = Int.getValueType();
12773     if (IntVT.isInteger() && !IntVT.isVector()) {
12774       APInt SignMask;
12775       if (N0.getValueType().isVector()) {
12776         // For a vector, get a mask such as 0x7f... per scalar element
12777         // and splat it.
12778         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
12779         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
12780       } else {
12781         // For a scalar, just generate 0x7f...
12782         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
12783       }
12784       SDLoc DL(N0);
12785       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
12786                         DAG.getConstant(SignMask, DL, IntVT));
12787       AddToWorklist(Int.getNode());
12788       return DAG.getBitcast(N->getValueType(0), Int);
12789     }
12790   }
12791
12792   return SDValue();
12793 }
12794
12795 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
12796   SDValue Chain = N->getOperand(0);
12797   SDValue N1 = N->getOperand(1);
12798   SDValue N2 = N->getOperand(2);
12799
12800   // If N is a constant we could fold this into a fallthrough or unconditional
12801   // branch. However that doesn't happen very often in normal code, because
12802   // Instcombine/SimplifyCFG should have handled the available opportunities.
12803   // If we did this folding here, it would be necessary to update the
12804   // MachineBasicBlock CFG, which is awkward.
12805
12806   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
12807   // on the target.
12808   if (N1.getOpcode() == ISD::SETCC &&
12809       TLI.isOperationLegalOrCustom(ISD::BR_CC,
12810                                    N1.getOperand(0).getValueType())) {
12811     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12812                        Chain, N1.getOperand(2),
12813                        N1.getOperand(0), N1.getOperand(1), N2);
12814   }
12815
12816   if (N1.hasOneUse()) {
12817     if (SDValue NewN1 = rebuildSetCC(N1))
12818       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
12819   }
12820
12821   return SDValue();
12822 }
12823
12824 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
12825   if (N.getOpcode() == ISD::SRL ||
12826       (N.getOpcode() == ISD::TRUNCATE &&
12827        (N.getOperand(0).hasOneUse() &&
12828         N.getOperand(0).getOpcode() == ISD::SRL))) {
12829     // Look pass the truncate.
12830     if (N.getOpcode() == ISD::TRUNCATE)
12831       N = N.getOperand(0);
12832
12833     // Match this pattern so that we can generate simpler code:
12834     //
12835     //   %a = ...
12836     //   %b = and i32 %a, 2
12837     //   %c = srl i32 %b, 1
12838     //   brcond i32 %c ...
12839     //
12840     // into
12841     //
12842     //   %a = ...
12843     //   %b = and i32 %a, 2
12844     //   %c = setcc eq %b, 0
12845     //   brcond %c ...
12846     //
12847     // This applies only when the AND constant value has one bit set and the
12848     // SRL constant is equal to the log2 of the AND constant. The back-end is
12849     // smart enough to convert the result into a TEST/JMP sequence.
12850     SDValue Op0 = N.getOperand(0);
12851     SDValue Op1 = N.getOperand(1);
12852
12853     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
12854       SDValue AndOp1 = Op0.getOperand(1);
12855
12856       if (AndOp1.getOpcode() == ISD::Constant) {
12857         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
12858
12859         if (AndConst.isPowerOf2() &&
12860             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
12861           SDLoc DL(N);
12862           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
12863                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
12864                               ISD::SETNE);
12865         }
12866       }
12867     }
12868   }
12869
12870   // Transform br(xor(x, y)) -> br(x != y)
12871   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
12872   if (N.getOpcode() == ISD::XOR) {
12873     // Because we may call this on a speculatively constructed
12874     // SimplifiedSetCC Node, we need to simplify this node first.
12875     // Ideally this should be folded into SimplifySetCC and not
12876     // here. For now, grab a handle to N so we don't lose it from
12877     // replacements interal to the visit.
12878     HandleSDNode XORHandle(N);
12879     while (N.getOpcode() == ISD::XOR) {
12880       SDValue Tmp = visitXOR(N.getNode());
12881       // No simplification done.
12882       if (!Tmp.getNode())
12883         break;
12884       // Returning N is form in-visit replacement that may invalidated
12885       // N. Grab value from Handle.
12886       if (Tmp.getNode() == N.getNode())
12887         N = XORHandle.getValue();
12888       else // Node simplified. Try simplifying again.
12889         N = Tmp;
12890     }
12891
12892     if (N.getOpcode() != ISD::XOR)
12893       return N;
12894
12895     SDNode *TheXor = N.getNode();
12896
12897     SDValue Op0 = TheXor->getOperand(0);
12898     SDValue Op1 = TheXor->getOperand(1);
12899
12900     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
12901       bool Equal = false;
12902       if (isOneConstant(Op0) && Op0.hasOneUse() &&
12903           Op0.getOpcode() == ISD::XOR) {
12904         TheXor = Op0.getNode();
12905         Equal = true;
12906       }
12907
12908       EVT SetCCVT = N.getValueType();
12909       if (LegalTypes)
12910         SetCCVT = getSetCCResultType(SetCCVT);
12911       // Replace the uses of XOR with SETCC
12912       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
12913                           Equal ? ISD::SETEQ : ISD::SETNE);
12914     }
12915   }
12916
12917   return SDValue();
12918 }
12919
12920 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
12921 //
12922 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
12923   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
12924   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
12925
12926   // If N is a constant we could fold this into a fallthrough or unconditional
12927   // branch. However that doesn't happen very often in normal code, because
12928   // Instcombine/SimplifyCFG should have handled the available opportunities.
12929   // If we did this folding here, it would be necessary to update the
12930   // MachineBasicBlock CFG, which is awkward.
12931
12932   // Use SimplifySetCC to simplify SETCC's.
12933   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
12934                                CondLHS, CondRHS, CC->get(), SDLoc(N),
12935                                false);
12936   if (Simp.getNode()) AddToWorklist(Simp.getNode());
12937
12938   // fold to a simpler setcc
12939   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
12940     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
12941                        N->getOperand(0), Simp.getOperand(2),
12942                        Simp.getOperand(0), Simp.getOperand(1),
12943                        N->getOperand(4));
12944
12945   return SDValue();
12946 }
12947
12948 /// Return true if 'Use' is a load or a store that uses N as its base pointer
12949 /// and that N may be folded in the load / store addressing mode.
12950 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
12951                                     SelectionDAG &DAG,
12952                                     const TargetLowering &TLI) {
12953   EVT VT;
12954   unsigned AS;
12955
12956   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
12957     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
12958       return false;
12959     VT = LD->getMemoryVT();
12960     AS = LD->getAddressSpace();
12961   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
12962     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
12963       return false;
12964     VT = ST->getMemoryVT();
12965     AS = ST->getAddressSpace();
12966   } else
12967     return false;
12968
12969   TargetLowering::AddrMode AM;
12970   if (N->getOpcode() == ISD::ADD) {
12971     AM.HasBaseReg = true;
12972     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12973     if (Offset)
12974       // [reg +/- imm]
12975       AM.BaseOffs = Offset->getSExtValue();
12976     else
12977       // [reg +/- reg]
12978       AM.Scale = 1;
12979   } else if (N->getOpcode() == ISD::SUB) {
12980     AM.HasBaseReg = true;
12981     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
12982     if (Offset)
12983       // [reg +/- imm]
12984       AM.BaseOffs = -Offset->getSExtValue();
12985     else
12986       // [reg +/- reg]
12987       AM.Scale = 1;
12988   } else
12989     return false;
12990
12991   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
12992                                    VT.getTypeForEVT(*DAG.getContext()), AS);
12993 }
12994
12995 /// Try turning a load/store into a pre-indexed load/store when the base
12996 /// pointer is an add or subtract and it has other uses besides the load/store.
12997 /// After the transformation, the new indexed load/store has effectively folded
12998 /// the add/subtract in and all of its other uses are redirected to the
12999 /// new load/store.
13000 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13001   if (Level < AfterLegalizeDAG)
13002     return false;
13003
13004   bool isLoad = true;
13005   SDValue Ptr;
13006   EVT VT;
13007   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13008     if (LD->isIndexed())
13009       return false;
13010     VT = LD->getMemoryVT();
13011     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
13012         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
13013       return false;
13014     Ptr = LD->getBasePtr();
13015   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13016     if (ST->isIndexed())
13017       return false;
13018     VT = ST->getMemoryVT();
13019     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
13020         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
13021       return false;
13022     Ptr = ST->getBasePtr();
13023     isLoad = false;
13024   } else {
13025     return false;
13026   }
13027
13028   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13029   // out.  There is no reason to make this a preinc/predec.
13030   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13031       Ptr.getNode()->hasOneUse())
13032     return false;
13033
13034   // Ask the target to do addressing mode selection.
13035   SDValue BasePtr;
13036   SDValue Offset;
13037   ISD::MemIndexedMode AM = ISD::UNINDEXED;
13038   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13039     return false;
13040
13041   // Backends without true r+i pre-indexed forms may need to pass a
13042   // constant base with a variable offset so that constant coercion
13043   // will work with the patterns in canonical form.
13044   bool Swapped = false;
13045   if (isa<ConstantSDNode>(BasePtr)) {
13046     std::swap(BasePtr, Offset);
13047     Swapped = true;
13048   }
13049
13050   // Don't create a indexed load / store with zero offset.
13051   if (isNullConstant(Offset))
13052     return false;
13053
13054   // Try turning it into a pre-indexed load / store except when:
13055   // 1) The new base ptr is a frame index.
13056   // 2) If N is a store and the new base ptr is either the same as or is a
13057   //    predecessor of the value being stored.
13058   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13059   //    that would create a cycle.
13060   // 4) All uses are load / store ops that use it as old base ptr.
13061
13062   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
13063   // (plus the implicit offset) to a register to preinc anyway.
13064   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13065     return false;
13066
13067   // Check #2.
13068   if (!isLoad) {
13069     SDValue Val = cast<StoreSDNode>(N)->getValue();
13070
13071     // Would require a copy.
13072     if (Val == BasePtr)
13073       return false;
13074
13075     // Would create a cycle.
13076     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13077       return false;
13078   }
13079
13080   // Caches for hasPredecessorHelper.
13081   SmallPtrSet<const SDNode *, 32> Visited;
13082   SmallVector<const SDNode *, 16> Worklist;
13083   Worklist.push_back(N);
13084
13085   // If the offset is a constant, there may be other adds of constants that
13086   // can be folded with this one. We should do this to avoid having to keep
13087   // a copy of the original base pointer.
13088   SmallVector<SDNode *, 16> OtherUses;
13089   if (isa<ConstantSDNode>(Offset))
13090     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13091                               UE = BasePtr.getNode()->use_end();
13092          UI != UE; ++UI) {
13093       SDUse &Use = UI.getUse();
13094       // Skip the use that is Ptr and uses of other results from BasePtr's
13095       // node (important for nodes that return multiple results).
13096       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13097         continue;
13098
13099       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13100         continue;
13101
13102       if (Use.getUser()->getOpcode() != ISD::ADD &&
13103           Use.getUser()->getOpcode() != ISD::SUB) {
13104         OtherUses.clear();
13105         break;
13106       }
13107
13108       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13109       if (!isa<ConstantSDNode>(Op1)) {
13110         OtherUses.clear();
13111         break;
13112       }
13113
13114       // FIXME: In some cases, we can be smarter about this.
13115       if (Op1.getValueType() != Offset.getValueType()) {
13116         OtherUses.clear();
13117         break;
13118       }
13119
13120       OtherUses.push_back(Use.getUser());
13121     }
13122
13123   if (Swapped)
13124     std::swap(BasePtr, Offset);
13125
13126   // Now check for #3 and #4.
13127   bool RealUse = false;
13128
13129   for (SDNode *Use : Ptr.getNode()->uses()) {
13130     if (Use == N)
13131       continue;
13132     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13133       return false;
13134
13135     // If Ptr may be folded in addressing mode of other use, then it's
13136     // not profitable to do this transformation.
13137     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13138       RealUse = true;
13139   }
13140
13141   if (!RealUse)
13142     return false;
13143
13144   SDValue Result;
13145   if (isLoad)
13146     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13147                                 BasePtr, Offset, AM);
13148   else
13149     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13150                                  BasePtr, Offset, AM);
13151   ++PreIndexedNodes;
13152   ++NodesCombined;
13153   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13154              Result.getNode()->dump(&DAG); dbgs() << '\n');
13155   WorklistRemover DeadNodes(*this);
13156   if (isLoad) {
13157     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13158     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13159   } else {
13160     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13161   }
13162
13163   // Finally, since the node is now dead, remove it from the graph.
13164   deleteAndRecombine(N);
13165
13166   if (Swapped)
13167     std::swap(BasePtr, Offset);
13168
13169   // Replace other uses of BasePtr that can be updated to use Ptr
13170   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
13171     unsigned OffsetIdx = 1;
13172     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13173       OffsetIdx = 0;
13174     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13175            BasePtr.getNode() && "Expected BasePtr operand");
13176
13177     // We need to replace ptr0 in the following expression:
13178     //   x0 * offset0 + y0 * ptr0 = t0
13179     // knowing that
13180     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13181     //
13182     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13183     // indexed load/store and the expression that needs to be re-written.
13184     //
13185     // Therefore, we have:
13186     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13187
13188     ConstantSDNode *CN =
13189       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13190     int X0, X1, Y0, Y1;
13191     const APInt &Offset0 = CN->getAPIntValue();
13192     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13193
13194     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
13195     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
13196     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
13197     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
13198
13199     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
13200
13201     APInt CNV = Offset0;
13202     if (X0 < 0) CNV = -CNV;
13203     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
13204     else CNV = CNV - Offset1;
13205
13206     SDLoc DL(OtherUses[i]);
13207
13208     // We can now generate the new expression.
13209     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
13210     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
13211
13212     SDValue NewUse = DAG.getNode(Opcode,
13213                                  DL,
13214                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
13215     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
13216     deleteAndRecombine(OtherUses[i]);
13217   }
13218
13219   // Replace the uses of Ptr with uses of the updated base value.
13220   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
13221   deleteAndRecombine(Ptr.getNode());
13222   AddToWorklist(Result.getNode());
13223
13224   return true;
13225 }
13226
13227 /// Try to combine a load/store with a add/sub of the base pointer node into a
13228 /// post-indexed load/store. The transformation folded the add/subtract into the
13229 /// new indexed load/store effectively and all of its uses are redirected to the
13230 /// new load/store.
13231 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
13232   if (Level < AfterLegalizeDAG)
13233     return false;
13234
13235   bool isLoad = true;
13236   SDValue Ptr;
13237   EVT VT;
13238   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13239     if (LD->isIndexed())
13240       return false;
13241     VT = LD->getMemoryVT();
13242     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13243         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
13244       return false;
13245     Ptr = LD->getBasePtr();
13246   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13247     if (ST->isIndexed())
13248       return false;
13249     VT = ST->getMemoryVT();
13250     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13251         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
13252       return false;
13253     Ptr = ST->getBasePtr();
13254     isLoad = false;
13255   } else {
13256     return false;
13257   }
13258
13259   if (Ptr.getNode()->hasOneUse())
13260     return false;
13261
13262   for (SDNode *Op : Ptr.getNode()->uses()) {
13263     if (Op == N ||
13264         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
13265       continue;
13266
13267     SDValue BasePtr;
13268     SDValue Offset;
13269     ISD::MemIndexedMode AM = ISD::UNINDEXED;
13270     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13271       // Don't create a indexed load / store with zero offset.
13272       if (isNullConstant(Offset))
13273         continue;
13274
13275       // Try turning it into a post-indexed load / store except when
13276       // 1) All uses are load / store ops that use it as base ptr (and
13277       //    it may be folded as addressing mmode).
13278       // 2) Op must be independent of N, i.e. Op is neither a predecessor
13279       //    nor a successor of N. Otherwise, if Op is folded that would
13280       //    create a cycle.
13281
13282       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13283         continue;
13284
13285       // Check for #1.
13286       bool TryNext = false;
13287       for (SDNode *Use : BasePtr.getNode()->uses()) {
13288         if (Use == Ptr.getNode())
13289           continue;
13290
13291         // If all the uses are load / store addresses, then don't do the
13292         // transformation.
13293         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13294           bool RealUse = false;
13295           for (SDNode *UseUse : Use->uses()) {
13296             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13297               RealUse = true;
13298           }
13299
13300           if (!RealUse) {
13301             TryNext = true;
13302             break;
13303           }
13304         }
13305       }
13306
13307       if (TryNext)
13308         continue;
13309
13310       // Check for #2.
13311       SmallPtrSet<const SDNode *, 32> Visited;
13312       SmallVector<const SDNode *, 8> Worklist;
13313       // Ptr is predecessor to both N and Op.
13314       Visited.insert(Ptr.getNode());
13315       Worklist.push_back(N);
13316       Worklist.push_back(Op);
13317       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13318           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13319         SDValue Result = isLoad
13320           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13321                                BasePtr, Offset, AM)
13322           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13323                                 BasePtr, Offset, AM);
13324         ++PostIndexedNodes;
13325         ++NodesCombined;
13326         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13327                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13328                    dbgs() << '\n');
13329         WorklistRemover DeadNodes(*this);
13330         if (isLoad) {
13331           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13332           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13333         } else {
13334           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13335         }
13336
13337         // Finally, since the node is now dead, remove it from the graph.
13338         deleteAndRecombine(N);
13339
13340         // Replace the uses of Use with uses of the updated base value.
13341         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13342                                       Result.getValue(isLoad ? 1 : 0));
13343         deleteAndRecombine(Op);
13344         return true;
13345       }
13346     }
13347   }
13348
13349   return false;
13350 }
13351
13352 /// Return the base-pointer arithmetic from an indexed \p LD.
13353 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13354   ISD::MemIndexedMode AM = LD->getAddressingMode();
13355   assert(AM != ISD::UNINDEXED);
13356   SDValue BP = LD->getOperand(1);
13357   SDValue Inc = LD->getOperand(2);
13358
13359   // Some backends use TargetConstants for load offsets, but don't expect
13360   // TargetConstants in general ADD nodes. We can convert these constants into
13361   // regular Constants (if the constant is not opaque).
13362   assert((Inc.getOpcode() != ISD::TargetConstant ||
13363           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13364          "Cannot split out indexing using opaque target constants");
13365   if (Inc.getOpcode() == ISD::TargetConstant) {
13366     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13367     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13368                           ConstInc->getValueType(0));
13369   }
13370
13371   unsigned Opc =
13372       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13373   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13374 }
13375
13376 static inline int numVectorEltsOrZero(EVT T) {
13377   return T.isVector() ? T.getVectorNumElements() : 0;
13378 }
13379
13380 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13381   Val = ST->getValue();
13382   EVT STType = Val.getValueType();
13383   EVT STMemType = ST->getMemoryVT();
13384   if (STType == STMemType)
13385     return true;
13386   if (isTypeLegal(STMemType))
13387     return false; // fail.
13388   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13389       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13390     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13391     return true;
13392   }
13393   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13394       STType.isInteger() && STMemType.isInteger()) {
13395     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13396     return true;
13397   }
13398   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13399     Val = DAG.getBitcast(STMemType, Val);
13400     return true;
13401   }
13402   return false; // fail.
13403 }
13404
13405 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13406   EVT LDMemType = LD->getMemoryVT();
13407   EVT LDType = LD->getValueType(0);
13408   assert(Val.getValueType() == LDMemType &&
13409          "Attempting to extend value of non-matching type");
13410   if (LDType == LDMemType)
13411     return true;
13412   if (LDMemType.isInteger() && LDType.isInteger()) {
13413     switch (LD->getExtensionType()) {
13414     case ISD::NON_EXTLOAD:
13415       Val = DAG.getBitcast(LDType, Val);
13416       return true;
13417     case ISD::EXTLOAD:
13418       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13419       return true;
13420     case ISD::SEXTLOAD:
13421       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13422       return true;
13423     case ISD::ZEXTLOAD:
13424       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
13425       return true;
13426     }
13427   }
13428   return false;
13429 }
13430
13431 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
13432   if (OptLevel == CodeGenOpt::None || LD->isVolatile())
13433     return SDValue();
13434   SDValue Chain = LD->getOperand(0);
13435   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
13436   if (!ST || ST->isVolatile())
13437     return SDValue();
13438
13439   EVT LDType = LD->getValueType(0);
13440   EVT LDMemType = LD->getMemoryVT();
13441   EVT STMemType = ST->getMemoryVT();
13442   EVT STType = ST->getValue().getValueType();
13443
13444   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
13445   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
13446   int64_t Offset;
13447   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
13448     return SDValue();
13449
13450   // Normalize for Endianness. After this Offset=0 will denote that the least
13451   // significant bit in the loaded value maps to the least significant bit in
13452   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13453   // n:th least significant byte of the stored value.
13454   if (DAG.getDataLayout().isBigEndian())
13455     Offset = (STMemType.getStoreSizeInBits() -
13456               LDMemType.getStoreSizeInBits()) / 8 - Offset;
13457
13458   // Check that the stored value cover all bits that are loaded.
13459   bool STCoversLD =
13460       (Offset >= 0) &&
13461       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
13462
13463   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
13464     if (LD->isIndexed()) {
13465       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
13466                     LD->getAddressingMode() == ISD::POST_DEC);
13467       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
13468       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
13469                              LD->getOperand(1), LD->getOperand(2));
13470       SDValue Ops[] = {Val, Idx, Chain};
13471       return CombineTo(LD, Ops, 3);
13472     }
13473     return CombineTo(LD, Val, Chain);
13474   };
13475
13476   if (!STCoversLD)
13477     return SDValue();
13478
13479   // Memory as copy space (potentially masked).
13480   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
13481     // Simple case: Direct non-truncating forwarding
13482     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
13483       return ReplaceLd(LD, ST->getValue(), Chain);
13484     // Can we model the truncate and extension with an and mask?
13485     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
13486         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
13487       // Mask to size of LDMemType
13488       auto Mask =
13489           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
13490                                                STMemType.getSizeInBits()),
13491                           SDLoc(ST), STType);
13492       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
13493       return ReplaceLd(LD, Val, Chain);
13494     }
13495   }
13496
13497   // TODO: Deal with nonzero offset.
13498   if (LD->getBasePtr().isUndef() || Offset != 0)
13499     return SDValue();
13500   // Model necessary truncations / extenstions.
13501   SDValue Val;
13502   // Truncate Value To Stored Memory Size.
13503   do {
13504     if (!getTruncatedStoreValue(ST, Val))
13505       continue;
13506     if (!isTypeLegal(LDMemType))
13507       continue;
13508     if (STMemType != LDMemType) {
13509       // TODO: Support vectors? This requires extract_subvector/bitcast.
13510       if (!STMemType.isVector() && !LDMemType.isVector() &&
13511           STMemType.isInteger() && LDMemType.isInteger())
13512         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
13513       else
13514         continue;
13515     }
13516     if (!extendLoadedValueToExtension(LD, Val))
13517       continue;
13518     return ReplaceLd(LD, Val, Chain);
13519   } while (false);
13520
13521   // On failure, cleanup dead nodes we may have created.
13522   if (Val->use_empty())
13523     deleteAndRecombine(Val.getNode());
13524   return SDValue();
13525 }
13526
13527 SDValue DAGCombiner::visitLOAD(SDNode *N) {
13528   LoadSDNode *LD  = cast<LoadSDNode>(N);
13529   SDValue Chain = LD->getChain();
13530   SDValue Ptr   = LD->getBasePtr();
13531
13532   // If load is not volatile and there are no uses of the loaded value (and
13533   // the updated indexed value in case of indexed loads), change uses of the
13534   // chain value into uses of the chain input (i.e. delete the dead load).
13535   if (!LD->isVolatile()) {
13536     if (N->getValueType(1) == MVT::Other) {
13537       // Unindexed loads.
13538       if (!N->hasAnyUseOfValue(0)) {
13539         // It's not safe to use the two value CombineTo variant here. e.g.
13540         // v1, chain2 = load chain1, loc
13541         // v2, chain3 = load chain2, loc
13542         // v3         = add v2, c
13543         // Now we replace use of chain2 with chain1.  This makes the second load
13544         // isomorphic to the one we are deleting, and thus makes this load live.
13545         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
13546                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
13547                    dbgs() << "\n");
13548         WorklistRemover DeadNodes(*this);
13549         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
13550         AddUsersToWorklist(Chain.getNode());
13551         if (N->use_empty())
13552           deleteAndRecombine(N);
13553
13554         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13555       }
13556     } else {
13557       // Indexed loads.
13558       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
13559
13560       // If this load has an opaque TargetConstant offset, then we cannot split
13561       // the indexing into an add/sub directly (that TargetConstant may not be
13562       // valid for a different type of node, and we cannot convert an opaque
13563       // target constant into a regular constant).
13564       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
13565                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
13566
13567       if (!N->hasAnyUseOfValue(0) &&
13568           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
13569         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
13570         SDValue Index;
13571         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
13572           Index = SplitIndexingFromLoad(LD);
13573           // Try to fold the base pointer arithmetic into subsequent loads and
13574           // stores.
13575           AddUsersToWorklist(N);
13576         } else
13577           Index = DAG.getUNDEF(N->getValueType(1));
13578         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
13579                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
13580                    dbgs() << " and 2 other values\n");
13581         WorklistRemover DeadNodes(*this);
13582         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
13583         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
13584         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
13585         deleteAndRecombine(N);
13586         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13587       }
13588     }
13589   }
13590
13591   // If this load is directly stored, replace the load value with the stored
13592   // value.
13593   if (auto V = ForwardStoreValueToDirectLoad(LD))
13594     return V;
13595
13596   // Try to infer better alignment information than the load already has.
13597   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
13598     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
13599       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
13600         SDValue NewLoad = DAG.getExtLoad(
13601             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
13602             LD->getPointerInfo(), LD->getMemoryVT(), Align,
13603             LD->getMemOperand()->getFlags(), LD->getAAInfo());
13604         // NewLoad will always be N as we are only refining the alignment
13605         assert(NewLoad.getNode() == N);
13606         (void)NewLoad;
13607       }
13608     }
13609   }
13610
13611   if (LD->isUnindexed()) {
13612     // Walk up chain skipping non-aliasing memory nodes.
13613     SDValue BetterChain = FindBetterChain(LD, Chain);
13614
13615     // If there is a better chain.
13616     if (Chain != BetterChain) {
13617       SDValue ReplLoad;
13618
13619       // Replace the chain to void dependency.
13620       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
13621         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
13622                                BetterChain, Ptr, LD->getMemOperand());
13623       } else {
13624         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
13625                                   LD->getValueType(0),
13626                                   BetterChain, Ptr, LD->getMemoryVT(),
13627                                   LD->getMemOperand());
13628       }
13629
13630       // Create token factor to keep old chain connected.
13631       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
13632                                   MVT::Other, Chain, ReplLoad.getValue(1));
13633
13634       // Replace uses with load result and token factor
13635       return CombineTo(N, ReplLoad.getValue(0), Token);
13636     }
13637   }
13638
13639   // Try transforming N to an indexed load.
13640   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
13641     return SDValue(N, 0);
13642
13643   // Try to slice up N to more direct loads if the slices are mapped to
13644   // different register banks or pairing can take place.
13645   if (SliceUpLoad(N))
13646     return SDValue(N, 0);
13647
13648   return SDValue();
13649 }
13650
13651 namespace {
13652
13653 /// Helper structure used to slice a load in smaller loads.
13654 /// Basically a slice is obtained from the following sequence:
13655 /// Origin = load Ty1, Base
13656 /// Shift = srl Ty1 Origin, CstTy Amount
13657 /// Inst = trunc Shift to Ty2
13658 ///
13659 /// Then, it will be rewritten into:
13660 /// Slice = load SliceTy, Base + SliceOffset
13661 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
13662 ///
13663 /// SliceTy is deduced from the number of bits that are actually used to
13664 /// build Inst.
13665 struct LoadedSlice {
13666   /// Helper structure used to compute the cost of a slice.
13667   struct Cost {
13668     /// Are we optimizing for code size.
13669     bool ForCodeSize;
13670
13671     /// Various cost.
13672     unsigned Loads = 0;
13673     unsigned Truncates = 0;
13674     unsigned CrossRegisterBanksCopies = 0;
13675     unsigned ZExts = 0;
13676     unsigned Shift = 0;
13677
13678     Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
13679
13680     /// Get the cost of one isolated slice.
13681     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
13682         : ForCodeSize(ForCodeSize), Loads(1) {
13683       EVT TruncType = LS.Inst->getValueType(0);
13684       EVT LoadedType = LS.getLoadedType();
13685       if (TruncType != LoadedType &&
13686           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
13687         ZExts = 1;
13688     }
13689
13690     /// Account for slicing gain in the current cost.
13691     /// Slicing provide a few gains like removing a shift or a
13692     /// truncate. This method allows to grow the cost of the original
13693     /// load with the gain from this slice.
13694     void addSliceGain(const LoadedSlice &LS) {
13695       // Each slice saves a truncate.
13696       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
13697       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
13698                               LS.Inst->getValueType(0)))
13699         ++Truncates;
13700       // If there is a shift amount, this slice gets rid of it.
13701       if (LS.Shift)
13702         ++Shift;
13703       // If this slice can merge a cross register bank copy, account for it.
13704       if (LS.canMergeExpensiveCrossRegisterBankCopy())
13705         ++CrossRegisterBanksCopies;
13706     }
13707
13708     Cost &operator+=(const Cost &RHS) {
13709       Loads += RHS.Loads;
13710       Truncates += RHS.Truncates;
13711       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
13712       ZExts += RHS.ZExts;
13713       Shift += RHS.Shift;
13714       return *this;
13715     }
13716
13717     bool operator==(const Cost &RHS) const {
13718       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
13719              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
13720              ZExts == RHS.ZExts && Shift == RHS.Shift;
13721     }
13722
13723     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
13724
13725     bool operator<(const Cost &RHS) const {
13726       // Assume cross register banks copies are as expensive as loads.
13727       // FIXME: Do we want some more target hooks?
13728       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
13729       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
13730       // Unless we are optimizing for code size, consider the
13731       // expensive operation first.
13732       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
13733         return ExpensiveOpsLHS < ExpensiveOpsRHS;
13734       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
13735              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
13736     }
13737
13738     bool operator>(const Cost &RHS) const { return RHS < *this; }
13739
13740     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
13741
13742     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
13743   };
13744
13745   // The last instruction that represent the slice. This should be a
13746   // truncate instruction.
13747   SDNode *Inst;
13748
13749   // The original load instruction.
13750   LoadSDNode *Origin;
13751
13752   // The right shift amount in bits from the original load.
13753   unsigned Shift;
13754
13755   // The DAG from which Origin came from.
13756   // This is used to get some contextual information about legal types, etc.
13757   SelectionDAG *DAG;
13758
13759   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
13760               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
13761       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
13762
13763   /// Get the bits used in a chunk of bits \p BitWidth large.
13764   /// \return Result is \p BitWidth and has used bits set to 1 and
13765   ///         not used bits set to 0.
13766   APInt getUsedBits() const {
13767     // Reproduce the trunc(lshr) sequence:
13768     // - Start from the truncated value.
13769     // - Zero extend to the desired bit width.
13770     // - Shift left.
13771     assert(Origin && "No original load to compare against.");
13772     unsigned BitWidth = Origin->getValueSizeInBits(0);
13773     assert(Inst && "This slice is not bound to an instruction");
13774     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
13775            "Extracted slice is bigger than the whole type!");
13776     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
13777     UsedBits.setAllBits();
13778     UsedBits = UsedBits.zext(BitWidth);
13779     UsedBits <<= Shift;
13780     return UsedBits;
13781   }
13782
13783   /// Get the size of the slice to be loaded in bytes.
13784   unsigned getLoadedSize() const {
13785     unsigned SliceSize = getUsedBits().countPopulation();
13786     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
13787     return SliceSize / 8;
13788   }
13789
13790   /// Get the type that will be loaded for this slice.
13791   /// Note: This may not be the final type for the slice.
13792   EVT getLoadedType() const {
13793     assert(DAG && "Missing context");
13794     LLVMContext &Ctxt = *DAG->getContext();
13795     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
13796   }
13797
13798   /// Get the alignment of the load used for this slice.
13799   unsigned getAlignment() const {
13800     unsigned Alignment = Origin->getAlignment();
13801     unsigned Offset = getOffsetFromBase();
13802     if (Offset != 0)
13803       Alignment = MinAlign(Alignment, Alignment + Offset);
13804     return Alignment;
13805   }
13806
13807   /// Check if this slice can be rewritten with legal operations.
13808   bool isLegal() const {
13809     // An invalid slice is not legal.
13810     if (!Origin || !Inst || !DAG)
13811       return false;
13812
13813     // Offsets are for indexed load only, we do not handle that.
13814     if (!Origin->getOffset().isUndef())
13815       return false;
13816
13817     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13818
13819     // Check that the type is legal.
13820     EVT SliceType = getLoadedType();
13821     if (!TLI.isTypeLegal(SliceType))
13822       return false;
13823
13824     // Check that the load is legal for this type.
13825     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
13826       return false;
13827
13828     // Check that the offset can be computed.
13829     // 1. Check its type.
13830     EVT PtrType = Origin->getBasePtr().getValueType();
13831     if (PtrType == MVT::Untyped || PtrType.isExtended())
13832       return false;
13833
13834     // 2. Check that it fits in the immediate.
13835     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
13836       return false;
13837
13838     // 3. Check that the computation is legal.
13839     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
13840       return false;
13841
13842     // Check that the zext is legal if it needs one.
13843     EVT TruncateType = Inst->getValueType(0);
13844     if (TruncateType != SliceType &&
13845         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
13846       return false;
13847
13848     return true;
13849   }
13850
13851   /// Get the offset in bytes of this slice in the original chunk of
13852   /// bits.
13853   /// \pre DAG != nullptr.
13854   uint64_t getOffsetFromBase() const {
13855     assert(DAG && "Missing context.");
13856     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
13857     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
13858     uint64_t Offset = Shift / 8;
13859     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
13860     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
13861            "The size of the original loaded type is not a multiple of a"
13862            " byte.");
13863     // If Offset is bigger than TySizeInBytes, it means we are loading all
13864     // zeros. This should have been optimized before in the process.
13865     assert(TySizeInBytes > Offset &&
13866            "Invalid shift amount for given loaded size");
13867     if (IsBigEndian)
13868       Offset = TySizeInBytes - Offset - getLoadedSize();
13869     return Offset;
13870   }
13871
13872   /// Generate the sequence of instructions to load the slice
13873   /// represented by this object and redirect the uses of this slice to
13874   /// this new sequence of instructions.
13875   /// \pre this->Inst && this->Origin are valid Instructions and this
13876   /// object passed the legal check: LoadedSlice::isLegal returned true.
13877   /// \return The last instruction of the sequence used to load the slice.
13878   SDValue loadSlice() const {
13879     assert(Inst && Origin && "Unable to replace a non-existing slice.");
13880     const SDValue &OldBaseAddr = Origin->getBasePtr();
13881     SDValue BaseAddr = OldBaseAddr;
13882     // Get the offset in that chunk of bytes w.r.t. the endianness.
13883     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
13884     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
13885     if (Offset) {
13886       // BaseAddr = BaseAddr + Offset.
13887       EVT ArithType = BaseAddr.getValueType();
13888       SDLoc DL(Origin);
13889       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
13890                               DAG->getConstant(Offset, DL, ArithType));
13891     }
13892
13893     // Create the type of the loaded slice according to its size.
13894     EVT SliceType = getLoadedType();
13895
13896     // Create the load for the slice.
13897     SDValue LastInst =
13898         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
13899                      Origin->getPointerInfo().getWithOffset(Offset),
13900                      getAlignment(), Origin->getMemOperand()->getFlags());
13901     // If the final type is not the same as the loaded type, this means that
13902     // we have to pad with zero. Create a zero extend for that.
13903     EVT FinalType = Inst->getValueType(0);
13904     if (SliceType != FinalType)
13905       LastInst =
13906           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
13907     return LastInst;
13908   }
13909
13910   /// Check if this slice can be merged with an expensive cross register
13911   /// bank copy. E.g.,
13912   /// i = load i32
13913   /// f = bitcast i32 i to float
13914   bool canMergeExpensiveCrossRegisterBankCopy() const {
13915     if (!Inst || !Inst->hasOneUse())
13916       return false;
13917     SDNode *Use = *Inst->use_begin();
13918     if (Use->getOpcode() != ISD::BITCAST)
13919       return false;
13920     assert(DAG && "Missing context");
13921     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
13922     EVT ResVT = Use->getValueType(0);
13923     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
13924     const TargetRegisterClass *ArgRC =
13925         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
13926     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
13927       return false;
13928
13929     // At this point, we know that we perform a cross-register-bank copy.
13930     // Check if it is expensive.
13931     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
13932     // Assume bitcasts are cheap, unless both register classes do not
13933     // explicitly share a common sub class.
13934     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
13935       return false;
13936
13937     // Check if it will be merged with the load.
13938     // 1. Check the alignment constraint.
13939     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
13940         ResVT.getTypeForEVT(*DAG->getContext()));
13941
13942     if (RequiredAlignment > getAlignment())
13943       return false;
13944
13945     // 2. Check that the load is a legal operation for that type.
13946     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
13947       return false;
13948
13949     // 3. Check that we do not have a zext in the way.
13950     if (Inst->getValueType(0) != getLoadedType())
13951       return false;
13952
13953     return true;
13954   }
13955 };
13956
13957 } // end anonymous namespace
13958
13959 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
13960 /// \p UsedBits looks like 0..0 1..1 0..0.
13961 static bool areUsedBitsDense(const APInt &UsedBits) {
13962   // If all the bits are one, this is dense!
13963   if (UsedBits.isAllOnesValue())
13964     return true;
13965
13966   // Get rid of the unused bits on the right.
13967   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
13968   // Get rid of the unused bits on the left.
13969   if (NarrowedUsedBits.countLeadingZeros())
13970     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
13971   // Check that the chunk of bits is completely used.
13972   return NarrowedUsedBits.isAllOnesValue();
13973 }
13974
13975 /// Check whether or not \p First and \p Second are next to each other
13976 /// in memory. This means that there is no hole between the bits loaded
13977 /// by \p First and the bits loaded by \p Second.
13978 static bool areSlicesNextToEachOther(const LoadedSlice &First,
13979                                      const LoadedSlice &Second) {
13980   assert(First.Origin == Second.Origin && First.Origin &&
13981          "Unable to match different memory origins.");
13982   APInt UsedBits = First.getUsedBits();
13983   assert((UsedBits & Second.getUsedBits()) == 0 &&
13984          "Slices are not supposed to overlap.");
13985   UsedBits |= Second.getUsedBits();
13986   return areUsedBitsDense(UsedBits);
13987 }
13988
13989 /// Adjust the \p GlobalLSCost according to the target
13990 /// paring capabilities and the layout of the slices.
13991 /// \pre \p GlobalLSCost should account for at least as many loads as
13992 /// there is in the slices in \p LoadedSlices.
13993 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
13994                                  LoadedSlice::Cost &GlobalLSCost) {
13995   unsigned NumberOfSlices = LoadedSlices.size();
13996   // If there is less than 2 elements, no pairing is possible.
13997   if (NumberOfSlices < 2)
13998     return;
13999
14000   // Sort the slices so that elements that are likely to be next to each
14001   // other in memory are next to each other in the list.
14002   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14003     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14004     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14005   });
14006   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14007   // First (resp. Second) is the first (resp. Second) potentially candidate
14008   // to be placed in a paired load.
14009   const LoadedSlice *First = nullptr;
14010   const LoadedSlice *Second = nullptr;
14011   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14012                 // Set the beginning of the pair.
14013                                                            First = Second) {
14014     Second = &LoadedSlices[CurrSlice];
14015
14016     // If First is NULL, it means we start a new pair.
14017     // Get to the next slice.
14018     if (!First)
14019       continue;
14020
14021     EVT LoadedType = First->getLoadedType();
14022
14023     // If the types of the slices are different, we cannot pair them.
14024     if (LoadedType != Second->getLoadedType())
14025       continue;
14026
14027     // Check if the target supplies paired loads for this type.
14028     unsigned RequiredAlignment = 0;
14029     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14030       // move to the next pair, this type is hopeless.
14031       Second = nullptr;
14032       continue;
14033     }
14034     // Check if we meet the alignment requirement.
14035     if (RequiredAlignment > First->getAlignment())
14036       continue;
14037
14038     // Check that both loads are next to each other in memory.
14039     if (!areSlicesNextToEachOther(*First, *Second))
14040       continue;
14041
14042     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14043     --GlobalLSCost.Loads;
14044     // Move to the next pair.
14045     Second = nullptr;
14046   }
14047 }
14048
14049 /// Check the profitability of all involved LoadedSlice.
14050 /// Currently, it is considered profitable if there is exactly two
14051 /// involved slices (1) which are (2) next to each other in memory, and
14052 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14053 ///
14054 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
14055 /// the elements themselves.
14056 ///
14057 /// FIXME: When the cost model will be mature enough, we can relax
14058 /// constraints (1) and (2).
14059 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14060                                 const APInt &UsedBits, bool ForCodeSize) {
14061   unsigned NumberOfSlices = LoadedSlices.size();
14062   if (StressLoadSlicing)
14063     return NumberOfSlices > 1;
14064
14065   // Check (1).
14066   if (NumberOfSlices != 2)
14067     return false;
14068
14069   // Check (2).
14070   if (!areUsedBitsDense(UsedBits))
14071     return false;
14072
14073   // Check (3).
14074   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14075   // The original code has one big load.
14076   OrigCost.Loads = 1;
14077   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14078     const LoadedSlice &LS = LoadedSlices[CurrSlice];
14079     // Accumulate the cost of all the slices.
14080     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14081     GlobalSlicingCost += SliceCost;
14082
14083     // Account as cost in the original configuration the gain obtained
14084     // with the current slices.
14085     OrigCost.addSliceGain(LS);
14086   }
14087
14088   // If the target supports paired load, adjust the cost accordingly.
14089   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14090   return OrigCost > GlobalSlicingCost;
14091 }
14092
14093 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
14094 /// operations, split it in the various pieces being extracted.
14095 ///
14096 /// This sort of thing is introduced by SROA.
14097 /// This slicing takes care not to insert overlapping loads.
14098 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
14099 bool DAGCombiner::SliceUpLoad(SDNode *N) {
14100   if (Level < AfterLegalizeDAG)
14101     return false;
14102
14103   LoadSDNode *LD = cast<LoadSDNode>(N);
14104   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
14105       !LD->getValueType(0).isInteger())
14106     return false;
14107
14108   // Keep track of already used bits to detect overlapping values.
14109   // In that case, we will just abort the transformation.
14110   APInt UsedBits(LD->getValueSizeInBits(0), 0);
14111
14112   SmallVector<LoadedSlice, 4> LoadedSlices;
14113
14114   // Check if this load is used as several smaller chunks of bits.
14115   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14116   // of computation for each trunc.
14117   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14118        UI != UIEnd; ++UI) {
14119     // Skip the uses of the chain.
14120     if (UI.getUse().getResNo() != 0)
14121       continue;
14122
14123     SDNode *User = *UI;
14124     unsigned Shift = 0;
14125
14126     // Check if this is a trunc(lshr).
14127     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
14128         isa<ConstantSDNode>(User->getOperand(1))) {
14129       Shift = User->getConstantOperandVal(1);
14130       User = *User->use_begin();
14131     }
14132
14133     // At this point, User is a Truncate, iff we encountered, trunc or
14134     // trunc(lshr).
14135     if (User->getOpcode() != ISD::TRUNCATE)
14136       return false;
14137
14138     // The width of the type must be a power of 2 and greater than 8-bits.
14139     // Otherwise the load cannot be represented in LLVM IR.
14140     // Moreover, if we shifted with a non-8-bits multiple, the slice
14141     // will be across several bytes. We do not support that.
14142     unsigned Width = User->getValueSizeInBits(0);
14143     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
14144       return false;
14145
14146     // Build the slice for this chain of computations.
14147     LoadedSlice LS(User, LD, Shift, &DAG);
14148     APInt CurrentUsedBits = LS.getUsedBits();
14149
14150     // Check if this slice overlaps with another.
14151     if ((CurrentUsedBits & UsedBits) != 0)
14152       return false;
14153     // Update the bits used globally.
14154     UsedBits |= CurrentUsedBits;
14155
14156     // Check if the new slice would be legal.
14157     if (!LS.isLegal())
14158       return false;
14159
14160     // Record the slice.
14161     LoadedSlices.push_back(LS);
14162   }
14163
14164   // Abort slicing if it does not seem to be profitable.
14165   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
14166     return false;
14167
14168   ++SlicedLoads;
14169
14170   // Rewrite each chain to use an independent load.
14171   // By construction, each chain can be represented by a unique load.
14172
14173   // Prepare the argument for the new token factor for all the slices.
14174   SmallVector<SDValue, 8> ArgChains;
14175   for (SmallVectorImpl<LoadedSlice>::const_iterator
14176            LSIt = LoadedSlices.begin(),
14177            LSItEnd = LoadedSlices.end();
14178        LSIt != LSItEnd; ++LSIt) {
14179     SDValue SliceInst = LSIt->loadSlice();
14180     CombineTo(LSIt->Inst, SliceInst, true);
14181     if (SliceInst.getOpcode() != ISD::LOAD)
14182       SliceInst = SliceInst.getOperand(0);
14183     assert(SliceInst->getOpcode() == ISD::LOAD &&
14184            "It takes more than a zext to get to the loaded slice!!");
14185     ArgChains.push_back(SliceInst.getValue(1));
14186   }
14187
14188   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
14189                               ArgChains);
14190   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14191   AddToWorklist(Chain.getNode());
14192   return true;
14193 }
14194
14195 /// Check to see if V is (and load (ptr), imm), where the load is having
14196 /// specific bytes cleared out.  If so, return the byte size being masked out
14197 /// and the shift amount.
14198 static std::pair<unsigned, unsigned>
14199 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
14200   std::pair<unsigned, unsigned> Result(0, 0);
14201
14202   // Check for the structure we're looking for.
14203   if (V->getOpcode() != ISD::AND ||
14204       !isa<ConstantSDNode>(V->getOperand(1)) ||
14205       !ISD::isNormalLoad(V->getOperand(0).getNode()))
14206     return Result;
14207
14208   // Check the chain and pointer.
14209   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14210   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
14211
14212   // This only handles simple types.
14213   if (V.getValueType() != MVT::i16 &&
14214       V.getValueType() != MVT::i32 &&
14215       V.getValueType() != MVT::i64)
14216     return Result;
14217
14218   // Check the constant mask.  Invert it so that the bits being masked out are
14219   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
14220   // follow the sign bit for uniformity.
14221   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
14222   unsigned NotMaskLZ = countLeadingZeros(NotMask);
14223   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
14224   unsigned NotMaskTZ = countTrailingZeros(NotMask);
14225   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
14226   if (NotMaskLZ == 64) return Result;  // All zero mask.
14227
14228   // See if we have a continuous run of bits.  If so, we have 0*1+0*
14229   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
14230     return Result;
14231
14232   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14233   if (V.getValueType() != MVT::i64 && NotMaskLZ)
14234     NotMaskLZ -= 64-V.getValueSizeInBits();
14235
14236   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14237   switch (MaskedBytes) {
14238   case 1:
14239   case 2:
14240   case 4: break;
14241   default: return Result; // All one mask, or 5-byte mask.
14242   }
14243
14244   // Verify that the first bit starts at a multiple of mask so that the access
14245   // is aligned the same as the access width.
14246   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
14247
14248   // For narrowing to be valid, it must be the case that the load the
14249   // immediately preceding memory operation before the store.
14250   if (LD == Chain.getNode())
14251     ; // ok.
14252   else if (Chain->getOpcode() == ISD::TokenFactor &&
14253            SDValue(LD, 1).hasOneUse()) {
14254     // LD has only 1 chain use so they are no indirect dependencies.
14255     bool isOk = false;
14256     for (const SDValue &ChainOp : Chain->op_values())
14257       if (ChainOp.getNode() == LD) {
14258         isOk = true;
14259         break;
14260       }
14261     if (!isOk)
14262       return Result;
14263   } else
14264     return Result; // Fail.
14265
14266   Result.first = MaskedBytes;
14267   Result.second = NotMaskTZ/8;
14268   return Result;
14269 }
14270
14271 /// Check to see if IVal is something that provides a value as specified by
14272 /// MaskInfo. If so, replace the specified store with a narrower store of
14273 /// truncated IVal.
14274 static SDNode *
14275 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14276                                 SDValue IVal, StoreSDNode *St,
14277                                 DAGCombiner *DC) {
14278   unsigned NumBytes = MaskInfo.first;
14279   unsigned ByteShift = MaskInfo.second;
14280   SelectionDAG &DAG = DC->getDAG();
14281
14282   // Check to see if IVal is all zeros in the part being masked in by the 'or'
14283   // that uses this.  If not, this is not a replacement.
14284   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14285                                   ByteShift*8, (ByteShift+NumBytes)*8);
14286   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
14287
14288   // Check that it is legal on the target to do this.  It is legal if the new
14289   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14290   // legalization.
14291   MVT VT = MVT::getIntegerVT(NumBytes*8);
14292   if (!DC->isTypeLegal(VT))
14293     return nullptr;
14294
14295   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
14296   // shifted by ByteShift and truncated down to NumBytes.
14297   if (ByteShift) {
14298     SDLoc DL(IVal);
14299     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14300                        DAG.getConstant(ByteShift*8, DL,
14301                                     DC->getShiftAmountTy(IVal.getValueType())));
14302   }
14303
14304   // Figure out the offset for the store and the alignment of the access.
14305   unsigned StOffset;
14306   unsigned NewAlign = St->getAlignment();
14307
14308   if (DAG.getDataLayout().isLittleEndian())
14309     StOffset = ByteShift;
14310   else
14311     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14312
14313   SDValue Ptr = St->getBasePtr();
14314   if (StOffset) {
14315     SDLoc DL(IVal);
14316     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14317                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14318     NewAlign = MinAlign(NewAlign, StOffset);
14319   }
14320
14321   // Truncate down to the new size.
14322   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14323
14324   ++OpsNarrowed;
14325   return DAG
14326       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14327                 St->getPointerInfo().getWithOffset(StOffset), NewAlign)
14328       .getNode();
14329 }
14330
14331 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14332 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14333 /// narrowing the load and store if it would end up being a win for performance
14334 /// or code size.
14335 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14336   StoreSDNode *ST  = cast<StoreSDNode>(N);
14337   if (ST->isVolatile())
14338     return SDValue();
14339
14340   SDValue Chain = ST->getChain();
14341   SDValue Value = ST->getValue();
14342   SDValue Ptr   = ST->getBasePtr();
14343   EVT VT = Value.getValueType();
14344
14345   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14346     return SDValue();
14347
14348   unsigned Opc = Value.getOpcode();
14349
14350   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14351   // is a byte mask indicating a consecutive number of bytes, check to see if
14352   // Y is known to provide just those bytes.  If so, we try to replace the
14353   // load + replace + store sequence with a single (narrower) store, which makes
14354   // the load dead.
14355   if (Opc == ISD::OR) {
14356     std::pair<unsigned, unsigned> MaskedLoad;
14357     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14358     if (MaskedLoad.first)
14359       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14360                                                   Value.getOperand(1), ST,this))
14361         return SDValue(NewST, 0);
14362
14363     // Or is commutative, so try swapping X and Y.
14364     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14365     if (MaskedLoad.first)
14366       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14367                                                   Value.getOperand(0), ST,this))
14368         return SDValue(NewST, 0);
14369   }
14370
14371   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14372       Value.getOperand(1).getOpcode() != ISD::Constant)
14373     return SDValue();
14374
14375   SDValue N0 = Value.getOperand(0);
14376   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14377       Chain == SDValue(N0.getNode(), 1)) {
14378     LoadSDNode *LD = cast<LoadSDNode>(N0);
14379     if (LD->getBasePtr() != Ptr ||
14380         LD->getPointerInfo().getAddrSpace() !=
14381         ST->getPointerInfo().getAddrSpace())
14382       return SDValue();
14383
14384     // Find the type to narrow it the load / op / store to.
14385     SDValue N1 = Value.getOperand(1);
14386     unsigned BitWidth = N1.getValueSizeInBits();
14387     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14388     if (Opc == ISD::AND)
14389       Imm ^= APInt::getAllOnesValue(BitWidth);
14390     if (Imm == 0 || Imm.isAllOnesValue())
14391       return SDValue();
14392     unsigned ShAmt = Imm.countTrailingZeros();
14393     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14394     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14395     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14396     // The narrowing should be profitable, the load/store operation should be
14397     // legal (or custom) and the store size should be equal to the NewVT width.
14398     while (NewBW < BitWidth &&
14399            (NewVT.getStoreSizeInBits() != NewBW ||
14400             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14401             !TLI.isNarrowingProfitable(VT, NewVT))) {
14402       NewBW = NextPowerOf2(NewBW);
14403       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14404     }
14405     if (NewBW >= BitWidth)
14406       return SDValue();
14407
14408     // If the lsb changed does not start at the type bitwidth boundary,
14409     // start at the previous one.
14410     if (ShAmt % NewBW)
14411       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14412     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14413                                    std::min(BitWidth, ShAmt + NewBW));
14414     if ((Imm & Mask) == Imm) {
14415       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14416       if (Opc == ISD::AND)
14417         NewImm ^= APInt::getAllOnesValue(NewBW);
14418       uint64_t PtrOff = ShAmt / 8;
14419       // For big endian targets, we need to adjust the offset to the pointer to
14420       // load the correct bytes.
14421       if (DAG.getDataLayout().isBigEndian())
14422         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14423
14424       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14425       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14426       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14427         return SDValue();
14428
14429       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
14430                                    Ptr.getValueType(), Ptr,
14431                                    DAG.getConstant(PtrOff, SDLoc(LD),
14432                                                    Ptr.getValueType()));
14433       SDValue NewLD =
14434           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
14435                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14436                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
14437       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
14438                                    DAG.getConstant(NewImm, SDLoc(Value),
14439                                                    NewVT));
14440       SDValue NewST =
14441           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
14442                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
14443
14444       AddToWorklist(NewPtr.getNode());
14445       AddToWorklist(NewLD.getNode());
14446       AddToWorklist(NewVal.getNode());
14447       WorklistRemover DeadNodes(*this);
14448       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
14449       ++OpsNarrowed;
14450       return NewST;
14451     }
14452   }
14453
14454   return SDValue();
14455 }
14456
14457 /// For a given floating point load / store pair, if the load value isn't used
14458 /// by any other operations, then consider transforming the pair to integer
14459 /// load / store operations if the target deems the transformation profitable.
14460 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
14461   StoreSDNode *ST  = cast<StoreSDNode>(N);
14462   SDValue Chain = ST->getChain();
14463   SDValue Value = ST->getValue();
14464   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
14465       Value.hasOneUse() &&
14466       Chain == SDValue(Value.getNode(), 1)) {
14467     LoadSDNode *LD = cast<LoadSDNode>(Value);
14468     EVT VT = LD->getMemoryVT();
14469     if (!VT.isFloatingPoint() ||
14470         VT != ST->getMemoryVT() ||
14471         LD->isNonTemporal() ||
14472         ST->isNonTemporal() ||
14473         LD->getPointerInfo().getAddrSpace() != 0 ||
14474         ST->getPointerInfo().getAddrSpace() != 0)
14475       return SDValue();
14476
14477     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
14478     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
14479         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
14480         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
14481         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
14482       return SDValue();
14483
14484     unsigned LDAlign = LD->getAlignment();
14485     unsigned STAlign = ST->getAlignment();
14486     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
14487     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
14488     if (LDAlign < ABIAlign || STAlign < ABIAlign)
14489       return SDValue();
14490
14491     SDValue NewLD =
14492         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
14493                     LD->getPointerInfo(), LDAlign);
14494
14495     SDValue NewST =
14496         DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
14497                      ST->getPointerInfo(), STAlign);
14498
14499     AddToWorklist(NewLD.getNode());
14500     AddToWorklist(NewST.getNode());
14501     WorklistRemover DeadNodes(*this);
14502     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
14503     ++LdStFP2Int;
14504     return NewST;
14505   }
14506
14507   return SDValue();
14508 }
14509
14510 // This is a helper function for visitMUL to check the profitability
14511 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14512 // MulNode is the original multiply, AddNode is (add x, c1),
14513 // and ConstNode is c2.
14514 //
14515 // If the (add x, c1) has multiple uses, we could increase
14516 // the number of adds if we make this transformation.
14517 // It would only be worth doing this if we can remove a
14518 // multiply in the process. Check for that here.
14519 // To illustrate:
14520 //     (A + c1) * c3
14521 //     (A + c2) * c3
14522 // We're checking for cases where we have common "c3 * A" expressions.
14523 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
14524                                               SDValue &AddNode,
14525                                               SDValue &ConstNode) {
14526   APInt Val;
14527
14528   // If the add only has one use, this would be OK to do.
14529   if (AddNode.getNode()->hasOneUse())
14530     return true;
14531
14532   // Walk all the users of the constant with which we're multiplying.
14533   for (SDNode *Use : ConstNode->uses()) {
14534     if (Use == MulNode) // This use is the one we're on right now. Skip it.
14535       continue;
14536
14537     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
14538       SDNode *OtherOp;
14539       SDNode *MulVar = AddNode.getOperand(0).getNode();
14540
14541       // OtherOp is what we're multiplying against the constant.
14542       if (Use->getOperand(0) == ConstNode)
14543         OtherOp = Use->getOperand(1).getNode();
14544       else
14545         OtherOp = Use->getOperand(0).getNode();
14546
14547       // Check to see if multiply is with the same operand of our "add".
14548       //
14549       //     ConstNode  = CONST
14550       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
14551       //     ...
14552       //     AddNode  = (A + c1)  <-- MulVar is A.
14553       //         = AddNode * ConstNode   <-- current visiting instruction.
14554       //
14555       // If we make this transformation, we will have a common
14556       // multiply (ConstNode * A) that we can save.
14557       if (OtherOp == MulVar)
14558         return true;
14559
14560       // Now check to see if a future expansion will give us a common
14561       // multiply.
14562       //
14563       //     ConstNode  = CONST
14564       //     AddNode    = (A + c1)
14565       //     ...   = AddNode * ConstNode <-- current visiting instruction.
14566       //     ...
14567       //     OtherOp = (A + c2)
14568       //     Use     = OtherOp * ConstNode <-- visiting Use.
14569       //
14570       // If we make this transformation, we will have a common
14571       // multiply (CONST * A) after we also do the same transformation
14572       // to the "t2" instruction.
14573       if (OtherOp->getOpcode() == ISD::ADD &&
14574           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
14575           OtherOp->getOperand(0).getNode() == MulVar)
14576         return true;
14577     }
14578   }
14579
14580   // Didn't find a case where this would be profitable.
14581   return false;
14582 }
14583
14584 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
14585                                          unsigned NumStores) {
14586   SmallVector<SDValue, 8> Chains;
14587   SmallPtrSet<const SDNode *, 8> Visited;
14588   SDLoc StoreDL(StoreNodes[0].MemNode);
14589
14590   for (unsigned i = 0; i < NumStores; ++i) {
14591     Visited.insert(StoreNodes[i].MemNode);
14592   }
14593
14594   // don't include nodes that are children or repeated nodes.
14595   for (unsigned i = 0; i < NumStores; ++i) {
14596     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
14597       Chains.push_back(StoreNodes[i].MemNode->getChain());
14598   }
14599
14600   assert(Chains.size() > 0 && "Chain should have generated a chain");
14601   return DAG.getTokenFactor(StoreDL, Chains);
14602 }
14603
14604 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
14605     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
14606     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
14607   // Make sure we have something to merge.
14608   if (NumStores < 2)
14609     return false;
14610
14611   // The latest Node in the DAG.
14612   SDLoc DL(StoreNodes[0].MemNode);
14613
14614   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
14615   unsigned SizeInBits = NumStores * ElementSizeBits;
14616   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14617
14618   EVT StoreTy;
14619   if (UseVector) {
14620     unsigned Elts = NumStores * NumMemElts;
14621     // Get the type for the merged vector store.
14622     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
14623   } else
14624     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
14625
14626   SDValue StoredVal;
14627   if (UseVector) {
14628     if (IsConstantSrc) {
14629       SmallVector<SDValue, 8> BuildVector;
14630       for (unsigned I = 0; I != NumStores; ++I) {
14631         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
14632         SDValue Val = St->getValue();
14633         // If constant is of the wrong type, convert it now.
14634         if (MemVT != Val.getValueType()) {
14635           Val = peekThroughBitcasts(Val);
14636           // Deal with constants of wrong size.
14637           if (ElementSizeBits != Val.getValueSizeInBits()) {
14638             EVT IntMemVT =
14639                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
14640             if (isa<ConstantFPSDNode>(Val)) {
14641               // Not clear how to truncate FP values.
14642               return false;
14643             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
14644               Val = DAG.getConstant(C->getAPIntValue()
14645                                         .zextOrTrunc(Val.getValueSizeInBits())
14646                                         .zextOrTrunc(ElementSizeBits),
14647                                     SDLoc(C), IntMemVT);
14648           }
14649           // Make sure correctly size type is the correct type.
14650           Val = DAG.getBitcast(MemVT, Val);
14651         }
14652         BuildVector.push_back(Val);
14653       }
14654       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14655                                                : ISD::BUILD_VECTOR,
14656                               DL, StoreTy, BuildVector);
14657     } else {
14658       SmallVector<SDValue, 8> Ops;
14659       for (unsigned i = 0; i < NumStores; ++i) {
14660         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
14661         SDValue Val = peekThroughBitcasts(St->getValue());
14662         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
14663         // type MemVT. If the underlying value is not the correct
14664         // type, but it is an extraction of an appropriate vector we
14665         // can recast Val to be of the correct type. This may require
14666         // converting between EXTRACT_VECTOR_ELT and
14667         // EXTRACT_SUBVECTOR.
14668         if ((MemVT != Val.getValueType()) &&
14669             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14670              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
14671           EVT MemVTScalarTy = MemVT.getScalarType();
14672           // We may need to add a bitcast here to get types to line up.
14673           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
14674             Val = DAG.getBitcast(MemVT, Val);
14675           } else {
14676             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
14677                                             : ISD::EXTRACT_VECTOR_ELT;
14678             SDValue Vec = Val.getOperand(0);
14679             SDValue Idx = Val.getOperand(1);
14680             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
14681           }
14682         }
14683         Ops.push_back(Val);
14684       }
14685
14686       // Build the extracted vector elements back into a vector.
14687       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
14688                                                : ISD::BUILD_VECTOR,
14689                               DL, StoreTy, Ops);
14690     }
14691   } else {
14692     // We should always use a vector store when merging extracted vector
14693     // elements, so this path implies a store of constants.
14694     assert(IsConstantSrc && "Merged vector elements should use vector store");
14695
14696     APInt StoreInt(SizeInBits, 0);
14697
14698     // Construct a single integer constant which is made of the smaller
14699     // constant inputs.
14700     bool IsLE = DAG.getDataLayout().isLittleEndian();
14701     for (unsigned i = 0; i < NumStores; ++i) {
14702       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
14703       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
14704
14705       SDValue Val = St->getValue();
14706       Val = peekThroughBitcasts(Val);
14707       StoreInt <<= ElementSizeBits;
14708       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
14709         StoreInt |= C->getAPIntValue()
14710                         .zextOrTrunc(ElementSizeBits)
14711                         .zextOrTrunc(SizeInBits);
14712       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
14713         StoreInt |= C->getValueAPF()
14714                         .bitcastToAPInt()
14715                         .zextOrTrunc(ElementSizeBits)
14716                         .zextOrTrunc(SizeInBits);
14717         // If fp truncation is necessary give up for now.
14718         if (MemVT.getSizeInBits() != ElementSizeBits)
14719           return false;
14720       } else {
14721         llvm_unreachable("Invalid constant element type");
14722       }
14723     }
14724
14725     // Create the new Load and Store operations.
14726     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
14727   }
14728
14729   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
14730   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
14731
14732   // make sure we use trunc store if it's necessary to be legal.
14733   SDValue NewStore;
14734   if (!UseTrunc) {
14735     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
14736                             FirstInChain->getPointerInfo(),
14737                             FirstInChain->getAlignment());
14738   } else { // Must be realized as a trunc store
14739     EVT LegalizedStoredValTy =
14740         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
14741     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
14742     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
14743     SDValue ExtendedStoreVal =
14744         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
14745                         LegalizedStoredValTy);
14746     NewStore = DAG.getTruncStore(
14747         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
14748         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
14749         FirstInChain->getAlignment(),
14750         FirstInChain->getMemOperand()->getFlags());
14751   }
14752
14753   // Replace all merged stores with the new store.
14754   for (unsigned i = 0; i < NumStores; ++i)
14755     CombineTo(StoreNodes[i].MemNode, NewStore);
14756
14757   AddToWorklist(NewChain.getNode());
14758   return true;
14759 }
14760
14761 void DAGCombiner::getStoreMergeCandidates(
14762     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
14763     SDNode *&RootNode) {
14764   // This holds the base pointer, index, and the offset in bytes from the base
14765   // pointer.
14766   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
14767   EVT MemVT = St->getMemoryVT();
14768
14769   SDValue Val = peekThroughBitcasts(St->getValue());
14770   // We must have a base and an offset.
14771   if (!BasePtr.getBase().getNode())
14772     return;
14773
14774   // Do not handle stores to undef base pointers.
14775   if (BasePtr.getBase().isUndef())
14776     return;
14777
14778   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
14779   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14780                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14781   bool IsLoadSrc = isa<LoadSDNode>(Val);
14782   BaseIndexOffset LBasePtr;
14783   // Match on loadbaseptr if relevant.
14784   EVT LoadVT;
14785   if (IsLoadSrc) {
14786     auto *Ld = cast<LoadSDNode>(Val);
14787     LBasePtr = BaseIndexOffset::match(Ld, DAG);
14788     LoadVT = Ld->getMemoryVT();
14789     // Load and store should be the same type.
14790     if (MemVT != LoadVT)
14791       return;
14792     // Loads must only have one use.
14793     if (!Ld->hasNUsesOfValue(1, 0))
14794       return;
14795     // The memory operands must not be volatile.
14796     if (Ld->isVolatile() || Ld->isIndexed())
14797       return;
14798   }
14799   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
14800                             int64_t &Offset) -> bool {
14801     if (Other->isVolatile() || Other->isIndexed())
14802       return false;
14803     SDValue Val = peekThroughBitcasts(Other->getValue());
14804     // Allow merging constants of different types as integers.
14805     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
14806                                            : Other->getMemoryVT() != MemVT;
14807     if (IsLoadSrc) {
14808       if (NoTypeMatch)
14809         return false;
14810       // The Load's Base Ptr must also match
14811       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
14812         auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
14813         if (LoadVT != OtherLd->getMemoryVT())
14814           return false;
14815         // Loads must only have one use.
14816         if (!OtherLd->hasNUsesOfValue(1, 0))
14817           return false;
14818         // The memory operands must not be volatile.
14819         if (OtherLd->isVolatile() || OtherLd->isIndexed())
14820           return false;
14821         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
14822           return false;
14823       } else
14824         return false;
14825     }
14826     if (IsConstantSrc) {
14827       if (NoTypeMatch)
14828         return false;
14829       if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
14830         return false;
14831     }
14832     if (IsExtractVecSrc) {
14833       // Do not merge truncated stores here.
14834       if (Other->isTruncatingStore())
14835         return false;
14836       if (!MemVT.bitsEq(Val.getValueType()))
14837         return false;
14838       if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
14839           Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
14840         return false;
14841     }
14842     Ptr = BaseIndexOffset::match(Other, DAG);
14843     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
14844   };
14845
14846   // We looking for a root node which is an ancestor to all mergable
14847   // stores. We search up through a load, to our root and then down
14848   // through all children. For instance we will find Store{1,2,3} if
14849   // St is Store1, Store2. or Store3 where the root is not a load
14850   // which always true for nonvolatile ops. TODO: Expand
14851   // the search to find all valid candidates through multiple layers of loads.
14852   //
14853   // Root
14854   // |-------|-------|
14855   // Load    Load    Store3
14856   // |       |
14857   // Store1   Store2
14858   //
14859   // FIXME: We should be able to climb and
14860   // descend TokenFactors to find candidates as well.
14861
14862   RootNode = St->getChain().getNode();
14863
14864   unsigned NumNodesExplored = 0;
14865   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
14866     RootNode = Ldn->getChain().getNode();
14867     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
14868          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
14869       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
14870         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
14871           if (I2.getOperandNo() == 0)
14872             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
14873               BaseIndexOffset Ptr;
14874               int64_t PtrDiff;
14875               if (CandidateMatch(OtherST, Ptr, PtrDiff))
14876                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14877             }
14878   } else
14879     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
14880          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
14881       if (I.getOperandNo() == 0)
14882         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
14883           BaseIndexOffset Ptr;
14884           int64_t PtrDiff;
14885           if (CandidateMatch(OtherST, Ptr, PtrDiff))
14886             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
14887         }
14888 }
14889
14890 // We need to check that merging these stores does not cause a loop in
14891 // the DAG. Any store candidate may depend on another candidate
14892 // indirectly through its operand (we already consider dependencies
14893 // through the chain). Check in parallel by searching up from
14894 // non-chain operands of candidates.
14895 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
14896     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
14897     SDNode *RootNode) {
14898   // FIXME: We should be able to truncate a full search of
14899   // predecessors by doing a BFS and keeping tabs the originating
14900   // stores from which worklist nodes come from in a similar way to
14901   // TokenFactor simplfication.
14902
14903   SmallPtrSet<const SDNode *, 32> Visited;
14904   SmallVector<const SDNode *, 8> Worklist;
14905
14906   // RootNode is a predecessor to all candidates so we need not search
14907   // past it. Add RootNode (peeking through TokenFactors). Do not count
14908   // these towards size check.
14909
14910   Worklist.push_back(RootNode);
14911   while (!Worklist.empty()) {
14912     auto N = Worklist.pop_back_val();
14913     if (!Visited.insert(N).second)
14914       continue; // Already present in Visited.
14915     if (N->getOpcode() == ISD::TokenFactor) {
14916       for (SDValue Op : N->ops())
14917         Worklist.push_back(Op.getNode());
14918     }
14919   }
14920
14921   // Don't count pruning nodes towards max.
14922   unsigned int Max = 1024 + Visited.size();
14923   // Search Ops of store candidates.
14924   for (unsigned i = 0; i < NumStores; ++i) {
14925     SDNode *N = StoreNodes[i].MemNode;
14926     // Of the 4 Store Operands:
14927     //   * Chain (Op 0) -> We have already considered these
14928     //                    in candidate selection and can be
14929     //                    safely ignored
14930     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
14931     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
14932     //                       but aren't necessarily fromt the same base node, so
14933     //                       cycles possible (e.g. via indexed store).
14934     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
14935     //               non-indexed stores). Not constant on all targets (e.g. ARM)
14936     //               and so can participate in a cycle.
14937     for (unsigned j = 1; j < N->getNumOperands(); ++j)
14938       Worklist.push_back(N->getOperand(j).getNode());
14939   }
14940   // Search through DAG. We can stop early if we find a store node.
14941   for (unsigned i = 0; i < NumStores; ++i)
14942     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
14943                                      Max))
14944       return false;
14945   return true;
14946 }
14947
14948 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
14949   if (OptLevel == CodeGenOpt::None)
14950     return false;
14951
14952   EVT MemVT = St->getMemoryVT();
14953   int64_t ElementSizeBytes = MemVT.getStoreSize();
14954   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
14955
14956   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
14957     return false;
14958
14959   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
14960       Attribute::NoImplicitFloat);
14961
14962   // This function cannot currently deal with non-byte-sized memory sizes.
14963   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
14964     return false;
14965
14966   if (!MemVT.isSimple())
14967     return false;
14968
14969   // Perform an early exit check. Do not bother looking at stored values that
14970   // are not constants, loads, or extracted vector elements.
14971   SDValue StoredVal = peekThroughBitcasts(St->getValue());
14972   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
14973   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
14974                        isa<ConstantFPSDNode>(StoredVal);
14975   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
14976                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
14977
14978   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
14979     return false;
14980
14981   SmallVector<MemOpLink, 8> StoreNodes;
14982   SDNode *RootNode;
14983   // Find potential store merge candidates by searching through chain sub-DAG
14984   getStoreMergeCandidates(St, StoreNodes, RootNode);
14985
14986   // Check if there is anything to merge.
14987   if (StoreNodes.size() < 2)
14988     return false;
14989
14990   // Sort the memory operands according to their distance from the
14991   // base pointer.
14992   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
14993     return LHS.OffsetFromBase < RHS.OffsetFromBase;
14994   });
14995
14996   // Store Merge attempts to merge the lowest stores. This generally
14997   // works out as if successful, as the remaining stores are checked
14998   // after the first collection of stores is merged. However, in the
14999   // case that a non-mergeable store is found first, e.g., {p[-2],
15000   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15001   // mergeable cases. To prevent this, we prune such stores from the
15002   // front of StoreNodes here.
15003
15004   bool RV = false;
15005   while (StoreNodes.size() > 1) {
15006     unsigned StartIdx = 0;
15007     while ((StartIdx + 1 < StoreNodes.size()) &&
15008            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15009                StoreNodes[StartIdx + 1].OffsetFromBase)
15010       ++StartIdx;
15011
15012     // Bail if we don't have enough candidates to merge.
15013     if (StartIdx + 1 >= StoreNodes.size())
15014       return RV;
15015
15016     if (StartIdx)
15017       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15018
15019     // Scan the memory operations on the chain and find the first
15020     // non-consecutive store memory address.
15021     unsigned NumConsecutiveStores = 1;
15022     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15023     // Check that the addresses are consecutive starting from the second
15024     // element in the list of stores.
15025     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15026       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15027       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15028         break;
15029       NumConsecutiveStores = i + 1;
15030     }
15031
15032     if (NumConsecutiveStores < 2) {
15033       StoreNodes.erase(StoreNodes.begin(),
15034                        StoreNodes.begin() + NumConsecutiveStores);
15035       continue;
15036     }
15037
15038     // The node with the lowest store address.
15039     LLVMContext &Context = *DAG.getContext();
15040     const DataLayout &DL = DAG.getDataLayout();
15041
15042     // Store the constants into memory as one consecutive store.
15043     if (IsConstantSrc) {
15044       while (NumConsecutiveStores >= 2) {
15045         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15046         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15047         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15048         unsigned LastLegalType = 1;
15049         unsigned LastLegalVectorType = 1;
15050         bool LastIntegerTrunc = false;
15051         bool NonZero = false;
15052         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15053         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15054           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15055           SDValue StoredVal = ST->getValue();
15056           bool IsElementZero = false;
15057           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15058             IsElementZero = C->isNullValue();
15059           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15060             IsElementZero = C->getConstantFPValue()->isNullValue();
15061           if (IsElementZero) {
15062             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
15063               FirstZeroAfterNonZero = i;
15064           }
15065           NonZero |= !IsElementZero;
15066
15067           // Find a legal type for the constant store.
15068           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15069           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15070           bool IsFast = false;
15071
15072           // Break early when size is too large to be legal.
15073           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15074             break;
15075
15076           if (TLI.isTypeLegal(StoreTy) &&
15077               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15078               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15079                                      FirstStoreAlign, &IsFast) &&
15080               IsFast) {
15081             LastIntegerTrunc = false;
15082             LastLegalType = i + 1;
15083             // Or check whether a truncstore is legal.
15084           } else if (TLI.getTypeAction(Context, StoreTy) ==
15085                      TargetLowering::TypePromoteInteger) {
15086             EVT LegalizedStoredValTy =
15087                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15088             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15089                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15090                 TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15091                                        FirstStoreAlign, &IsFast) &&
15092                 IsFast) {
15093               LastIntegerTrunc = true;
15094               LastLegalType = i + 1;
15095             }
15096           }
15097
15098           // We only use vectors if the constant is known to be zero or the
15099           // target allows it and the function is not marked with the
15100           // noimplicitfloat attribute.
15101           if ((!NonZero ||
15102                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
15103               !NoVectors) {
15104             // Find a legal type for the vector store.
15105             unsigned Elts = (i + 1) * NumMemElts;
15106             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15107             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
15108                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15109                 TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
15110                                        FirstStoreAlign, &IsFast) &&
15111                 IsFast)
15112               LastLegalVectorType = i + 1;
15113           }
15114         }
15115
15116         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
15117         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
15118
15119         // Check if we found a legal integer type that creates a meaningful
15120         // merge.
15121         if (NumElem < 2) {
15122           // We know that candidate stores are in order and of correct
15123           // shape. While there is no mergeable sequence from the
15124           // beginning one may start later in the sequence. The only
15125           // reason a merge of size N could have failed where another of
15126           // the same size would not have, is if the alignment has
15127           // improved or we've dropped a non-zero value. Drop as many
15128           // candidates as we can here.
15129           unsigned NumSkip = 1;
15130           while (
15131               (NumSkip < NumConsecutiveStores) &&
15132               (NumSkip < FirstZeroAfterNonZero) &&
15133               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15134             NumSkip++;
15135
15136           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15137           NumConsecutiveStores -= NumSkip;
15138           continue;
15139         }
15140
15141         // Check that we can merge these candidates without causing a cycle.
15142         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15143                                                       RootNode)) {
15144           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15145           NumConsecutiveStores -= NumElem;
15146           continue;
15147         }
15148
15149         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15150                                               UseVector, LastIntegerTrunc);
15151
15152         // Remove merged stores for next iteration.
15153         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15154         NumConsecutiveStores -= NumElem;
15155       }
15156       continue;
15157     }
15158
15159     // When extracting multiple vector elements, try to store them
15160     // in one vector store rather than a sequence of scalar stores.
15161     if (IsExtractVecSrc) {
15162       // Loop on Consecutive Stores on success.
15163       while (NumConsecutiveStores >= 2) {
15164         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15165         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15166         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15167         unsigned NumStoresToMerge = 1;
15168         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15169           // Find a legal type for the vector store.
15170           unsigned Elts = (i + 1) * NumMemElts;
15171           EVT Ty =
15172               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15173           bool IsFast;
15174
15175           // Break early when size is too large to be legal.
15176           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
15177             break;
15178
15179           if (TLI.isTypeLegal(Ty) &&
15180               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15181               TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
15182                                      FirstStoreAlign, &IsFast) &&
15183               IsFast)
15184             NumStoresToMerge = i + 1;
15185         }
15186
15187         // Check if we found a legal integer type creating a meaningful
15188         // merge.
15189         if (NumStoresToMerge < 2) {
15190           // We know that candidate stores are in order and of correct
15191           // shape. While there is no mergeable sequence from the
15192           // beginning one may start later in the sequence. The only
15193           // reason a merge of size N could have failed where another of
15194           // the same size would not have, is if the alignment has
15195           // improved. Drop as many candidates as we can here.
15196           unsigned NumSkip = 1;
15197           while (
15198               (NumSkip < NumConsecutiveStores) &&
15199               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15200             NumSkip++;
15201
15202           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15203           NumConsecutiveStores -= NumSkip;
15204           continue;
15205         }
15206
15207         // Check that we can merge these candidates without causing a cycle.
15208         if (!checkMergeStoreCandidatesForDependencies(
15209                 StoreNodes, NumStoresToMerge, RootNode)) {
15210           StoreNodes.erase(StoreNodes.begin(),
15211                            StoreNodes.begin() + NumStoresToMerge);
15212           NumConsecutiveStores -= NumStoresToMerge;
15213           continue;
15214         }
15215
15216         RV |= MergeStoresOfConstantsOrVecElts(
15217             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
15218
15219         StoreNodes.erase(StoreNodes.begin(),
15220                          StoreNodes.begin() + NumStoresToMerge);
15221         NumConsecutiveStores -= NumStoresToMerge;
15222       }
15223       continue;
15224     }
15225
15226     // Below we handle the case of multiple consecutive stores that
15227     // come from multiple consecutive loads. We merge them into a single
15228     // wide load and a single wide store.
15229
15230     // Look for load nodes which are used by the stored values.
15231     SmallVector<MemOpLink, 8> LoadNodes;
15232
15233     // Find acceptable loads. Loads need to have the same chain (token factor),
15234     // must not be zext, volatile, indexed, and they must be consecutive.
15235     BaseIndexOffset LdBasePtr;
15236
15237     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15238       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15239       SDValue Val = peekThroughBitcasts(St->getValue());
15240       LoadSDNode *Ld = cast<LoadSDNode>(Val);
15241
15242       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15243       // If this is not the first ptr that we check.
15244       int64_t LdOffset = 0;
15245       if (LdBasePtr.getBase().getNode()) {
15246         // The base ptr must be the same.
15247         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15248           break;
15249       } else {
15250         // Check that all other base pointers are the same as this one.
15251         LdBasePtr = LdPtr;
15252       }
15253
15254       // We found a potential memory operand to merge.
15255       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15256     }
15257
15258     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
15259       // If we have load/store pair instructions and we only have two values,
15260       // don't bother merging.
15261       unsigned RequiredAlignment;
15262       if (LoadNodes.size() == 2 &&
15263           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
15264           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
15265         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15266         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15267         break;
15268       }
15269       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15270       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15271       unsigned FirstStoreAlign = FirstInChain->getAlignment();
15272       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15273       unsigned FirstLoadAS = FirstLoad->getAddressSpace();
15274       unsigned FirstLoadAlign = FirstLoad->getAlignment();
15275
15276       // Scan the memory operations on the chain and find the first
15277       // non-consecutive load memory address. These variables hold the index in
15278       // the store node array.
15279
15280       unsigned LastConsecutiveLoad = 1;
15281
15282       // This variable refers to the size and not index in the array.
15283       unsigned LastLegalVectorType = 1;
15284       unsigned LastLegalIntegerType = 1;
15285       bool isDereferenceable = true;
15286       bool DoIntegerTruncate = false;
15287       StartAddress = LoadNodes[0].OffsetFromBase;
15288       SDValue FirstChain = FirstLoad->getChain();
15289       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15290         // All loads must share the same chain.
15291         if (LoadNodes[i].MemNode->getChain() != FirstChain)
15292           break;
15293
15294         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15295         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15296           break;
15297         LastConsecutiveLoad = i;
15298
15299         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15300           isDereferenceable = false;
15301
15302         // Find a legal type for the vector store.
15303         unsigned Elts = (i + 1) * NumMemElts;
15304         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15305
15306         // Break early when size is too large to be legal.
15307         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15308           break;
15309
15310         bool IsFastSt, IsFastLd;
15311         if (TLI.isTypeLegal(StoreTy) &&
15312             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15313             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15314                                    FirstStoreAlign, &IsFastSt) &&
15315             IsFastSt &&
15316             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15317                                    FirstLoadAlign, &IsFastLd) &&
15318             IsFastLd) {
15319           LastLegalVectorType = i + 1;
15320         }
15321
15322         // Find a legal type for the integer store.
15323         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15324         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15325         if (TLI.isTypeLegal(StoreTy) &&
15326             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15327             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15328                                    FirstStoreAlign, &IsFastSt) &&
15329             IsFastSt &&
15330             TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15331                                    FirstLoadAlign, &IsFastLd) &&
15332             IsFastLd) {
15333           LastLegalIntegerType = i + 1;
15334           DoIntegerTruncate = false;
15335           // Or check whether a truncstore and extload is legal.
15336         } else if (TLI.getTypeAction(Context, StoreTy) ==
15337                    TargetLowering::TypePromoteInteger) {
15338           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15339           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15340               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15341               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15342                                  StoreTy) &&
15343               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15344                                  StoreTy) &&
15345               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15346               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
15347                                      FirstStoreAlign, &IsFastSt) &&
15348               IsFastSt &&
15349               TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
15350                                      FirstLoadAlign, &IsFastLd) &&
15351               IsFastLd) {
15352             LastLegalIntegerType = i + 1;
15353             DoIntegerTruncate = true;
15354           }
15355         }
15356       }
15357
15358       // Only use vector types if the vector type is larger than the integer
15359       // type. If they are the same, use integers.
15360       bool UseVectorTy =
15361           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15362       unsigned LastLegalType =
15363           std::max(LastLegalVectorType, LastLegalIntegerType);
15364
15365       // We add +1 here because the LastXXX variables refer to location while
15366       // the NumElem refers to array/index size.
15367       unsigned NumElem =
15368           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15369       NumElem = std::min(LastLegalType, NumElem);
15370
15371       if (NumElem < 2) {
15372         // We know that candidate stores are in order and of correct
15373         // shape. While there is no mergeable sequence from the
15374         // beginning one may start later in the sequence. The only
15375         // reason a merge of size N could have failed where another of
15376         // the same size would not have is if the alignment or either
15377         // the load or store has improved. Drop as many candidates as we
15378         // can here.
15379         unsigned NumSkip = 1;
15380         while ((NumSkip < LoadNodes.size()) &&
15381                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
15382                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15383           NumSkip++;
15384         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15385         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15386         NumConsecutiveStores -= NumSkip;
15387         continue;
15388       }
15389
15390       // Check that we can merge these candidates without causing a cycle.
15391       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15392                                                     RootNode)) {
15393         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15394         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15395         NumConsecutiveStores -= NumElem;
15396         continue;
15397       }
15398
15399       // Find if it is better to use vectors or integers to load and store
15400       // to memory.
15401       EVT JointMemOpVT;
15402       if (UseVectorTy) {
15403         // Find a legal type for the vector store.
15404         unsigned Elts = NumElem * NumMemElts;
15405         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15406       } else {
15407         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15408         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15409       }
15410
15411       SDLoc LoadDL(LoadNodes[0].MemNode);
15412       SDLoc StoreDL(StoreNodes[0].MemNode);
15413
15414       // The merged loads are required to have the same incoming chain, so
15415       // using the first's chain is acceptable.
15416
15417       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
15418       AddToWorklist(NewStoreChain.getNode());
15419
15420       MachineMemOperand::Flags MMOFlags =
15421           isDereferenceable ? MachineMemOperand::MODereferenceable
15422                             : MachineMemOperand::MONone;
15423
15424       SDValue NewLoad, NewStore;
15425       if (UseVectorTy || !DoIntegerTruncate) {
15426         NewLoad =
15427             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
15428                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15429                         FirstLoadAlign, MMOFlags);
15430         NewStore = DAG.getStore(
15431             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
15432             FirstInChain->getPointerInfo(), FirstStoreAlign);
15433       } else { // This must be the truncstore/extload case
15434         EVT ExtendedTy =
15435             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
15436         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
15437                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
15438                                  FirstLoad->getPointerInfo(), JointMemOpVT,
15439                                  FirstLoadAlign, MMOFlags);
15440         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
15441                                      FirstInChain->getBasePtr(),
15442                                      FirstInChain->getPointerInfo(),
15443                                      JointMemOpVT, FirstInChain->getAlignment(),
15444                                      FirstInChain->getMemOperand()->getFlags());
15445       }
15446
15447       // Transfer chain users from old loads to the new load.
15448       for (unsigned i = 0; i < NumElem; ++i) {
15449         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
15450         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
15451                                       SDValue(NewLoad.getNode(), 1));
15452       }
15453
15454       // Replace the all stores with the new store. Recursively remove
15455       // corresponding value if its no longer used.
15456       for (unsigned i = 0; i < NumElem; ++i) {
15457         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
15458         CombineTo(StoreNodes[i].MemNode, NewStore);
15459         if (Val.getNode()->use_empty())
15460           recursivelyDeleteUnusedNodes(Val.getNode());
15461       }
15462
15463       RV = true;
15464       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15465       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15466       NumConsecutiveStores -= NumElem;
15467     }
15468   }
15469   return RV;
15470 }
15471
15472 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
15473   SDLoc SL(ST);
15474   SDValue ReplStore;
15475
15476   // Replace the chain to avoid dependency.
15477   if (ST->isTruncatingStore()) {
15478     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
15479                                   ST->getBasePtr(), ST->getMemoryVT(),
15480                                   ST->getMemOperand());
15481   } else {
15482     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
15483                              ST->getMemOperand());
15484   }
15485
15486   // Create token to keep both nodes around.
15487   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
15488                               MVT::Other, ST->getChain(), ReplStore);
15489
15490   // Make sure the new and old chains are cleaned up.
15491   AddToWorklist(Token.getNode());
15492
15493   // Don't add users to work list.
15494   return CombineTo(ST, Token, false);
15495 }
15496
15497 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
15498   SDValue Value = ST->getValue();
15499   if (Value.getOpcode() == ISD::TargetConstantFP)
15500     return SDValue();
15501
15502   SDLoc DL(ST);
15503
15504   SDValue Chain = ST->getChain();
15505   SDValue Ptr = ST->getBasePtr();
15506
15507   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
15508
15509   // NOTE: If the original store is volatile, this transform must not increase
15510   // the number of stores.  For example, on x86-32 an f64 can be stored in one
15511   // processor operation but an i64 (which is not legal) requires two.  So the
15512   // transform should not be done in this case.
15513
15514   SDValue Tmp;
15515   switch (CFP->getSimpleValueType(0).SimpleTy) {
15516   default:
15517     llvm_unreachable("Unknown FP type");
15518   case MVT::f16:    // We don't do this for these yet.
15519   case MVT::f80:
15520   case MVT::f128:
15521   case MVT::ppcf128:
15522     return SDValue();
15523   case MVT::f32:
15524     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
15525         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15526       ;
15527       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
15528                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
15529                             MVT::i32);
15530       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
15531     }
15532
15533     return SDValue();
15534   case MVT::f64:
15535     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
15536          !ST->isVolatile()) ||
15537         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
15538       ;
15539       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
15540                             getZExtValue(), SDLoc(CFP), MVT::i64);
15541       return DAG.getStore(Chain, DL, Tmp,
15542                           Ptr, ST->getMemOperand());
15543     }
15544
15545     if (!ST->isVolatile() &&
15546         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
15547       // Many FP stores are not made apparent until after legalize, e.g. for
15548       // argument passing.  Since this is so common, custom legalize the
15549       // 64-bit integer store into two 32-bit stores.
15550       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
15551       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
15552       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
15553       if (DAG.getDataLayout().isBigEndian())
15554         std::swap(Lo, Hi);
15555
15556       unsigned Alignment = ST->getAlignment();
15557       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15558       AAMDNodes AAInfo = ST->getAAInfo();
15559
15560       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15561                                  ST->getAlignment(), MMOFlags, AAInfo);
15562       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15563                         DAG.getConstant(4, DL, Ptr.getValueType()));
15564       Alignment = MinAlign(Alignment, 4U);
15565       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
15566                                  ST->getPointerInfo().getWithOffset(4),
15567                                  Alignment, MMOFlags, AAInfo);
15568       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
15569                          St0, St1);
15570     }
15571
15572     return SDValue();
15573   }
15574 }
15575
15576 SDValue DAGCombiner::visitSTORE(SDNode *N) {
15577   StoreSDNode *ST  = cast<StoreSDNode>(N);
15578   SDValue Chain = ST->getChain();
15579   SDValue Value = ST->getValue();
15580   SDValue Ptr   = ST->getBasePtr();
15581
15582   // If this is a store of a bit convert, store the input value if the
15583   // resultant store does not need a higher alignment than the original.
15584   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
15585       ST->isUnindexed()) {
15586     EVT SVT = Value.getOperand(0).getValueType();
15587     // If the store is volatile, we only want to change the store type if the
15588     // resulting store is legal. Otherwise we might increase the number of
15589     // memory accesses. We don't care if the original type was legal or not
15590     // as we assume software couldn't rely on the number of accesses of an
15591     // illegal type.
15592     if (((!LegalOperations && !ST->isVolatile()) ||
15593          TLI.isOperationLegal(ISD::STORE, SVT)) &&
15594         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
15595       unsigned OrigAlign = ST->getAlignment();
15596       bool Fast = false;
15597       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
15598                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
15599           Fast) {
15600         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
15601                             ST->getPointerInfo(), OrigAlign,
15602                             ST->getMemOperand()->getFlags(), ST->getAAInfo());
15603       }
15604     }
15605   }
15606
15607   // Turn 'store undef, Ptr' -> nothing.
15608   if (Value.isUndef() && ST->isUnindexed())
15609     return Chain;
15610
15611   // Try to infer better alignment information than the store already has.
15612   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
15613     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
15614       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
15615         SDValue NewStore =
15616             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
15617                               ST->getMemoryVT(), Align,
15618                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
15619         // NewStore will always be N as we are only refining the alignment
15620         assert(NewStore.getNode() == N);
15621         (void)NewStore;
15622       }
15623     }
15624   }
15625
15626   // Try transforming a pair floating point load / store ops to integer
15627   // load / store ops.
15628   if (SDValue NewST = TransformFPLoadStorePair(N))
15629     return NewST;
15630
15631   if (ST->isUnindexed()) {
15632     // Walk up chain skipping non-aliasing memory nodes, on this store and any
15633     // adjacent stores.
15634     if (findBetterNeighborChains(ST)) {
15635       // replaceStoreChain uses CombineTo, which handled all of the worklist
15636       // manipulation. Return the original node to not do anything else.
15637       return SDValue(ST, 0);
15638     }
15639     Chain = ST->getChain();
15640   }
15641
15642   // FIXME: is there such a thing as a truncating indexed store?
15643   if (ST->isTruncatingStore() && ST->isUnindexed() &&
15644       Value.getValueType().isInteger() &&
15645       (!isa<ConstantSDNode>(Value) ||
15646        !cast<ConstantSDNode>(Value)->isOpaque())) {
15647     APInt TruncDemandedBits =
15648         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
15649                              ST->getMemoryVT().getScalarSizeInBits());
15650
15651     // See if we can simplify the input to this truncstore with knowledge that
15652     // only the low bits are being used.  For example:
15653     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
15654     SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
15655     AddToWorklist(Value.getNode());
15656     if (Shorter)
15657       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
15658                                ST->getMemOperand());
15659
15660     // Otherwise, see if we can simplify the operation with
15661     // SimplifyDemandedBits, which only works if the value has a single use.
15662     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
15663       // Re-visit the store if anything changed and the store hasn't been merged
15664       // with another node (N is deleted) SimplifyDemandedBits will add Value's
15665       // node back to the worklist if necessary, but we also need to re-visit
15666       // the Store node itself.
15667       if (N->getOpcode() != ISD::DELETED_NODE)
15668         AddToWorklist(N);
15669       return SDValue(N, 0);
15670     }
15671   }
15672
15673   // If this is a load followed by a store to the same location, then the store
15674   // is dead/noop.
15675   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
15676     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
15677         ST->isUnindexed() && !ST->isVolatile() &&
15678         // There can't be any side effects between the load and store, such as
15679         // a call or store.
15680         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
15681       // The store is dead, remove it.
15682       return Chain;
15683     }
15684   }
15685
15686   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
15687     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
15688         !ST1->isVolatile()) {
15689       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
15690           ST->getMemoryVT() == ST1->getMemoryVT()) {
15691         // If this is a store followed by a store with the same value to the
15692         // same location, then the store is dead/noop.
15693         return Chain;
15694       }
15695
15696       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
15697           !ST1->getBasePtr().isUndef()) {
15698         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
15699         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
15700         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
15701         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
15702         // If this is a store who's preceding store to a subset of the current
15703         // location and no one other node is chained to that store we can
15704         // effectively drop the store. Do not remove stores to undef as they may
15705         // be used as data sinks.
15706         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
15707           CombineTo(ST1, ST1->getChain());
15708           return SDValue();
15709         }
15710
15711         // If ST stores to a subset of preceding store's write set, we may be
15712         // able to fold ST's value into the preceding stored value. As we know
15713         // the other uses of ST1's chain are unconcerned with ST, this folding
15714         // will not affect those nodes.
15715         int64_t BitOffset;
15716         if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
15717                                BitOffset)) {
15718           SDValue ChainValue = ST1->getValue();
15719           if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
15720             if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
15721               APInt Val = C1->getAPIntValue();
15722               APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
15723               // FIXME: Handle Big-endian mode.
15724               if (!DAG.getDataLayout().isBigEndian()) {
15725                 Val.insertBits(InsertVal, BitOffset);
15726                 SDValue NewSDVal =
15727                     DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
15728                                     C1->isTargetOpcode(), C1->isOpaque());
15729                 SDNode *NewST1 = DAG.UpdateNodeOperands(
15730                     ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
15731                     ST1->getOperand(3));
15732                 return CombineTo(ST, SDValue(NewST1, 0));
15733               }
15734             }
15735           }
15736         } // End ST subset of ST1 case.
15737       }
15738     }
15739   }
15740
15741   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
15742   // truncating store.  We can do this even if this is already a truncstore.
15743   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
15744       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
15745       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
15746                             ST->getMemoryVT())) {
15747     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
15748                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
15749   }
15750
15751   // Always perform this optimization before types are legal. If the target
15752   // prefers, also try this after legalization to catch stores that were created
15753   // by intrinsics or other nodes.
15754   if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
15755     while (true) {
15756       // There can be multiple store sequences on the same chain.
15757       // Keep trying to merge store sequences until we are unable to do so
15758       // or until we merge the last store on the chain.
15759       bool Changed = MergeConsecutiveStores(ST);
15760       if (!Changed) break;
15761       // Return N as merge only uses CombineTo and no worklist clean
15762       // up is necessary.
15763       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
15764         return SDValue(N, 0);
15765     }
15766   }
15767
15768   // Try transforming N to an indexed store.
15769   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
15770     return SDValue(N, 0);
15771
15772   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
15773   //
15774   // Make sure to do this only after attempting to merge stores in order to
15775   //  avoid changing the types of some subset of stores due to visit order,
15776   //  preventing their merging.
15777   if (isa<ConstantFPSDNode>(ST->getValue())) {
15778     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
15779       return NewSt;
15780   }
15781
15782   if (SDValue NewSt = splitMergedValStore(ST))
15783     return NewSt;
15784
15785   return ReduceLoadOpStoreWidth(N);
15786 }
15787
15788 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
15789   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
15790   if (!LifetimeEnd->hasOffset())
15791     return SDValue();
15792
15793   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
15794                                         LifetimeEnd->getOffset(), false);
15795
15796   // We walk up the chains to find stores.
15797   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
15798   while (!Chains.empty()) {
15799     SDValue Chain = Chains.back();
15800     Chains.pop_back();
15801     if (!Chain.hasOneUse())
15802       continue;
15803     switch (Chain.getOpcode()) {
15804     case ISD::TokenFactor:
15805       for (unsigned Nops = Chain.getNumOperands(); Nops;)
15806         Chains.push_back(Chain.getOperand(--Nops));
15807       break;
15808     case ISD::LIFETIME_START:
15809     case ISD::LIFETIME_END:
15810       // We can forward past any lifetime start/end that can be proven not to
15811       // alias the node.
15812       if (!isAlias(Chain.getNode(), N))
15813         Chains.push_back(Chain.getOperand(0));
15814       break;
15815     case ISD::STORE: {
15816       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
15817       if (ST->isVolatile() || ST->isIndexed())
15818         continue;
15819       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
15820       // If we store purely within object bounds just before its lifetime ends,
15821       // we can remove the store.
15822       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
15823                                    ST->getMemoryVT().getStoreSizeInBits())) {
15824         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
15825                    dbgs() << "\nwithin LIFETIME_END of : ";
15826                    LifetimeEndBase.dump(); dbgs() << "\n");
15827         CombineTo(ST, ST->getChain());
15828         return SDValue(N, 0);
15829       }
15830     }
15831     }
15832   }
15833   return SDValue();
15834 }
15835
15836 /// For the instruction sequence of store below, F and I values
15837 /// are bundled together as an i64 value before being stored into memory.
15838 /// Sometimes it is more efficent to generate separate stores for F and I,
15839 /// which can remove the bitwise instructions or sink them to colder places.
15840 ///
15841 ///   (store (or (zext (bitcast F to i32) to i64),
15842 ///              (shl (zext I to i64), 32)), addr)  -->
15843 ///   (store F, addr) and (store I, addr+4)
15844 ///
15845 /// Similarly, splitting for other merged store can also be beneficial, like:
15846 /// For pair of {i32, i32}, i64 store --> two i32 stores.
15847 /// For pair of {i32, i16}, i64 store --> two i32 stores.
15848 /// For pair of {i16, i16}, i32 store --> two i16 stores.
15849 /// For pair of {i16, i8},  i32 store --> two i16 stores.
15850 /// For pair of {i8, i8},   i16 store --> two i8 stores.
15851 ///
15852 /// We allow each target to determine specifically which kind of splitting is
15853 /// supported.
15854 ///
15855 /// The store patterns are commonly seen from the simple code snippet below
15856 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
15857 ///   void goo(const std::pair<int, float> &);
15858 ///   hoo() {
15859 ///     ...
15860 ///     goo(std::make_pair(tmp, ftmp));
15861 ///     ...
15862 ///   }
15863 ///
15864 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
15865   if (OptLevel == CodeGenOpt::None)
15866     return SDValue();
15867
15868   SDValue Val = ST->getValue();
15869   SDLoc DL(ST);
15870
15871   // Match OR operand.
15872   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
15873     return SDValue();
15874
15875   // Match SHL operand and get Lower and Higher parts of Val.
15876   SDValue Op1 = Val.getOperand(0);
15877   SDValue Op2 = Val.getOperand(1);
15878   SDValue Lo, Hi;
15879   if (Op1.getOpcode() != ISD::SHL) {
15880     std::swap(Op1, Op2);
15881     if (Op1.getOpcode() != ISD::SHL)
15882       return SDValue();
15883   }
15884   Lo = Op2;
15885   Hi = Op1.getOperand(0);
15886   if (!Op1.hasOneUse())
15887     return SDValue();
15888
15889   // Match shift amount to HalfValBitSize.
15890   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
15891   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
15892   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
15893     return SDValue();
15894
15895   // Lo and Hi are zero-extended from int with size less equal than 32
15896   // to i64.
15897   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
15898       !Lo.getOperand(0).getValueType().isScalarInteger() ||
15899       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
15900       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
15901       !Hi.getOperand(0).getValueType().isScalarInteger() ||
15902       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
15903     return SDValue();
15904
15905   // Use the EVT of low and high parts before bitcast as the input
15906   // of target query.
15907   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
15908                   ? Lo.getOperand(0).getValueType()
15909                   : Lo.getValueType();
15910   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
15911                    ? Hi.getOperand(0).getValueType()
15912                    : Hi.getValueType();
15913   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
15914     return SDValue();
15915
15916   // Start to split store.
15917   unsigned Alignment = ST->getAlignment();
15918   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
15919   AAMDNodes AAInfo = ST->getAAInfo();
15920
15921   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
15922   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
15923   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
15924   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
15925
15926   SDValue Chain = ST->getChain();
15927   SDValue Ptr = ST->getBasePtr();
15928   // Lower value store.
15929   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
15930                              ST->getAlignment(), MMOFlags, AAInfo);
15931   Ptr =
15932       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
15933                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
15934   // Higher value store.
15935   SDValue St1 =
15936       DAG.getStore(St0, DL, Hi, Ptr,
15937                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
15938                    Alignment / 2, MMOFlags, AAInfo);
15939   return St1;
15940 }
15941
15942 /// Convert a disguised subvector insertion into a shuffle:
15943 /// insert_vector_elt V, (bitcast X from vector type), IdxC -->
15944 /// bitcast(shuffle (bitcast V), (extended X), Mask)
15945 /// Note: We do not use an insert_subvector node because that requires a legal
15946 /// subvector type.
15947 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
15948   SDValue InsertVal = N->getOperand(1);
15949   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
15950       !InsertVal.getOperand(0).getValueType().isVector())
15951     return SDValue();
15952
15953   SDValue SubVec = InsertVal.getOperand(0);
15954   SDValue DestVec = N->getOperand(0);
15955   EVT SubVecVT = SubVec.getValueType();
15956   EVT VT = DestVec.getValueType();
15957   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
15958   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
15959   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
15960
15961   // Step 1: Create a shuffle mask that implements this insert operation. The
15962   // vector that we are inserting into will be operand 0 of the shuffle, so
15963   // those elements are just 'i'. The inserted subvector is in the first
15964   // positions of operand 1 of the shuffle. Example:
15965   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
15966   SmallVector<int, 16> Mask(NumMaskVals);
15967   for (unsigned i = 0; i != NumMaskVals; ++i) {
15968     if (i / NumSrcElts == InsIndex)
15969       Mask[i] = (i % NumSrcElts) + NumMaskVals;
15970     else
15971       Mask[i] = i;
15972   }
15973
15974   // Bail out if the target can not handle the shuffle we want to create.
15975   EVT SubVecEltVT = SubVecVT.getVectorElementType();
15976   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
15977   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
15978     return SDValue();
15979
15980   // Step 2: Create a wide vector from the inserted source vector by appending
15981   // undefined elements. This is the same size as our destination vector.
15982   SDLoc DL(N);
15983   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
15984   ConcatOps[0] = SubVec;
15985   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
15986
15987   // Step 3: Shuffle in the padded subvector.
15988   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
15989   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
15990   AddToWorklist(PaddedSubV.getNode());
15991   AddToWorklist(DestVecBC.getNode());
15992   AddToWorklist(Shuf.getNode());
15993   return DAG.getBitcast(VT, Shuf);
15994 }
15995
15996 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
15997   SDValue InVec = N->getOperand(0);
15998   SDValue InVal = N->getOperand(1);
15999   SDValue EltNo = N->getOperand(2);
16000   SDLoc DL(N);
16001
16002   // If the inserted element is an UNDEF, just use the input vector.
16003   if (InVal.isUndef())
16004     return InVec;
16005
16006   EVT VT = InVec.getValueType();
16007   unsigned NumElts = VT.getVectorNumElements();
16008
16009   // Remove redundant insertions:
16010   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16011   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16012       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
16013     return InVec;
16014
16015   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16016   if (!IndexC) {
16017     // If this is variable insert to undef vector, it might be better to splat:
16018     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16019     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
16020       SmallVector<SDValue, 8> Ops(NumElts, InVal);
16021       return DAG.getBuildVector(VT, DL, Ops);
16022     }
16023     return SDValue();
16024   }
16025
16026   // We must know which element is being inserted for folds below here.
16027   unsigned Elt = IndexC->getZExtValue();
16028   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16029     return Shuf;
16030
16031   // Canonicalize insert_vector_elt dag nodes.
16032   // Example:
16033   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16034   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16035   //
16036   // Do this only if the child insert_vector node has one use; also
16037   // do this only if indices are both constants and Idx1 < Idx0.
16038   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
16039       && isa<ConstantSDNode>(InVec.getOperand(2))) {
16040     unsigned OtherElt = InVec.getConstantOperandVal(2);
16041     if (Elt < OtherElt) {
16042       // Swap nodes.
16043       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16044                                   InVec.getOperand(0), InVal, EltNo);
16045       AddToWorklist(NewOp.getNode());
16046       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16047                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16048     }
16049   }
16050
16051   // If we can't generate a legal BUILD_VECTOR, exit
16052   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
16053     return SDValue();
16054
16055   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16056   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
16057   // vector elements.
16058   SmallVector<SDValue, 8> Ops;
16059   // Do not combine these two vectors if the output vector will not replace
16060   // the input vector.
16061   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
16062     Ops.append(InVec.getNode()->op_begin(),
16063                InVec.getNode()->op_end());
16064   } else if (InVec.isUndef()) {
16065     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16066   } else {
16067     return SDValue();
16068   }
16069   assert(Ops.size() == NumElts && "Unexpected vector size");
16070
16071   // Insert the element
16072   if (Elt < Ops.size()) {
16073     // All the operands of BUILD_VECTOR must have the same type;
16074     // we enforce that here.
16075     EVT OpVT = Ops[0].getValueType();
16076     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
16077   }
16078
16079   // Return the new vector
16080   return DAG.getBuildVector(VT, DL, Ops);
16081 }
16082
16083 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16084                                                   SDValue EltNo,
16085                                                   LoadSDNode *OriginalLoad) {
16086   assert(!OriginalLoad->isVolatile());
16087
16088   EVT ResultVT = EVE->getValueType(0);
16089   EVT VecEltVT = InVecVT.getVectorElementType();
16090   unsigned Align = OriginalLoad->getAlignment();
16091   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
16092       VecEltVT.getTypeForEVT(*DAG.getContext()));
16093
16094   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
16095     return SDValue();
16096
16097   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
16098     ISD::NON_EXTLOAD : ISD::EXTLOAD;
16099   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
16100     return SDValue();
16101
16102   Align = NewAlign;
16103
16104   SDValue NewPtr = OriginalLoad->getBasePtr();
16105   SDValue Offset;
16106   EVT PtrType = NewPtr.getValueType();
16107   MachinePointerInfo MPI;
16108   SDLoc DL(EVE);
16109   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
16110     int Elt = ConstEltNo->getZExtValue();
16111     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
16112     Offset = DAG.getConstant(PtrOff, DL, PtrType);
16113     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
16114   } else {
16115     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
16116     Offset = DAG.getNode(
16117         ISD::MUL, DL, PtrType, Offset,
16118         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
16119     // Discard the pointer info except the address space because the memory
16120     // operand can't represent this new access since the offset is variable.
16121     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
16122   }
16123   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
16124
16125   // The replacement we need to do here is a little tricky: we need to
16126   // replace an extractelement of a load with a load.
16127   // Use ReplaceAllUsesOfValuesWith to do the replacement.
16128   // Note that this replacement assumes that the extractvalue is the only
16129   // use of the load; that's okay because we don't want to perform this
16130   // transformation in other cases anyway.
16131   SDValue Load;
16132   SDValue Chain;
16133   if (ResultVT.bitsGT(VecEltVT)) {
16134     // If the result type of vextract is wider than the load, then issue an
16135     // extending load instead.
16136     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
16137                                                   VecEltVT)
16138                                    ? ISD::ZEXTLOAD
16139                                    : ISD::EXTLOAD;
16140     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
16141                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
16142                           Align, OriginalLoad->getMemOperand()->getFlags(),
16143                           OriginalLoad->getAAInfo());
16144     Chain = Load.getValue(1);
16145   } else {
16146     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
16147                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
16148                        OriginalLoad->getAAInfo());
16149     Chain = Load.getValue(1);
16150     if (ResultVT.bitsLT(VecEltVT))
16151       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
16152     else
16153       Load = DAG.getBitcast(ResultVT, Load);
16154   }
16155   WorklistRemover DeadNodes(*this);
16156   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
16157   SDValue To[] = { Load, Chain };
16158   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
16159   // Since we're explicitly calling ReplaceAllUses, add the new node to the
16160   // worklist explicitly as well.
16161   AddToWorklist(Load.getNode());
16162   AddUsersToWorklist(Load.getNode()); // Add users too
16163   // Make sure to revisit this node to clean it up; it will usually be dead.
16164   AddToWorklist(EVE);
16165   ++OpsNarrowed;
16166   return SDValue(EVE, 0);
16167 }
16168
16169 /// Transform a vector binary operation into a scalar binary operation by moving
16170 /// the math/logic after an extract element of a vector.
16171 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
16172                                        bool LegalOperations) {
16173   SDValue Vec = ExtElt->getOperand(0);
16174   SDValue Index = ExtElt->getOperand(1);
16175   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16176   if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
16177     return SDValue();
16178
16179   // Targets may want to avoid this to prevent an expensive register transfer.
16180   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16181   if (!TLI.shouldScalarizeBinop(Vec))
16182     return SDValue();
16183
16184   // Extracting an element of a vector constant is constant-folded, so this
16185   // transform is just replacing a vector op with a scalar op while moving the
16186   // extract.
16187   SDValue Op0 = Vec.getOperand(0);
16188   SDValue Op1 = Vec.getOperand(1);
16189   if (isAnyConstantBuildVector(Op0, true) ||
16190       isAnyConstantBuildVector(Op1, true)) {
16191     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16192     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16193     SDLoc DL(ExtElt);
16194     EVT VT = ExtElt->getValueType(0);
16195     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
16196     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
16197     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
16198   }
16199
16200   return SDValue();
16201 }
16202
16203 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
16204   SDValue VecOp = N->getOperand(0);
16205   SDValue Index = N->getOperand(1);
16206   EVT ScalarVT = N->getValueType(0);
16207   EVT VecVT = VecOp.getValueType();
16208   if (VecOp.isUndef())
16209     return DAG.getUNDEF(ScalarVT);
16210
16211   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16212   //
16213   // This only really matters if the index is non-constant since other combines
16214   // on the constant elements already work.
16215   SDLoc DL(N);
16216   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
16217       Index == VecOp.getOperand(2)) {
16218     SDValue Elt = VecOp.getOperand(1);
16219     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
16220   }
16221
16222   // (vextract (scalar_to_vector val, 0) -> val
16223   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16224     // Check if the result type doesn't match the inserted element type. A
16225     // SCALAR_TO_VECTOR may truncate the inserted element and the
16226     // EXTRACT_VECTOR_ELT may widen the extracted vector.
16227     SDValue InOp = VecOp.getOperand(0);
16228     if (InOp.getValueType() != ScalarVT) {
16229       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16230       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16231     }
16232     return InOp;
16233   }
16234
16235   // extract_vector_elt of out-of-bounds element -> UNDEF
16236   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16237   unsigned NumElts = VecVT.getVectorNumElements();
16238   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
16239     return DAG.getUNDEF(ScalarVT);
16240
16241   // extract_vector_elt (build_vector x, y), 1 -> y
16242   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
16243       TLI.isTypeLegal(VecVT) &&
16244       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
16245     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
16246     EVT InEltVT = Elt.getValueType();
16247
16248     // Sometimes build_vector's scalar input types do not match result type.
16249     if (ScalarVT == InEltVT)
16250       return Elt;
16251
16252     // TODO: It may be useful to truncate if free if the build_vector implicitly
16253     // converts.
16254   }
16255
16256   // TODO: These transforms should not require the 'hasOneUse' restriction, but
16257   // there are regressions on multiple targets without it. We can end up with a
16258   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16259   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
16260       VecOp.hasOneUse()) {
16261     // The vector index of the LSBs of the source depend on the endian-ness.
16262     bool IsLE = DAG.getDataLayout().isLittleEndian();
16263     unsigned ExtractIndex = IndexC->getZExtValue();
16264     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16265     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
16266     SDValue BCSrc = VecOp.getOperand(0);
16267     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
16268       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
16269
16270     if (LegalTypes && BCSrc.getValueType().isInteger() &&
16271         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16272       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16273       // trunc i64 X to i32
16274       SDValue X = BCSrc.getOperand(0);
16275       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
16276              "Extract element and scalar to vector can't change element type "
16277              "from FP to integer.");
16278       unsigned XBitWidth = X.getValueSizeInBits();
16279       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
16280       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
16281
16282       // An extract element return value type can be wider than its vector
16283       // operand element type. In that case, the high bits are undefined, so
16284       // it's possible that we may need to extend rather than truncate.
16285       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
16286         assert(XBitWidth % VecEltBitWidth == 0 &&
16287                "Scalar bitwidth must be a multiple of vector element bitwidth");
16288         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16289       }
16290     }
16291   }
16292
16293   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16294     return BO;
16295
16296   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16297   // We only perform this optimization before the op legalization phase because
16298   // we may introduce new vector instructions which are not backed by TD
16299   // patterns. For example on AVX, extracting elements from a wide vector
16300   // without using extract_subvector. However, if we can find an underlying
16301   // scalar value, then we can always use that.
16302   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16303     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16304     // Find the new index to extract from.
16305     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16306
16307     // Extracting an undef index is undef.
16308     if (OrigElt == -1)
16309       return DAG.getUNDEF(ScalarVT);
16310
16311     // Select the right vector half to extract from.
16312     SDValue SVInVec;
16313     if (OrigElt < (int)NumElts) {
16314       SVInVec = VecOp.getOperand(0);
16315     } else {
16316       SVInVec = VecOp.getOperand(1);
16317       OrigElt -= NumElts;
16318     }
16319
16320     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16321       SDValue InOp = SVInVec.getOperand(OrigElt);
16322       if (InOp.getValueType() != ScalarVT) {
16323         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16324         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16325       }
16326
16327       return InOp;
16328     }
16329
16330     // FIXME: We should handle recursing on other vector shuffles and
16331     // scalar_to_vector here as well.
16332
16333     if (!LegalOperations ||
16334         // FIXME: Should really be just isOperationLegalOrCustom.
16335         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
16336         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
16337       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16338       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16339                          DAG.getConstant(OrigElt, DL, IndexTy));
16340     }
16341   }
16342
16343   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16344   // simplify it based on the (valid) extraction indices.
16345   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
16346         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16347                Use->getOperand(0) == VecOp &&
16348                isa<ConstantSDNode>(Use->getOperand(1));
16349       })) {
16350     APInt DemandedElts = APInt::getNullValue(NumElts);
16351     for (SDNode *Use : VecOp->uses()) {
16352       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16353       if (CstElt->getAPIntValue().ult(NumElts))
16354         DemandedElts.setBit(CstElt->getZExtValue());
16355     }
16356     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16357       // We simplified the vector operand of this extract element. If this
16358       // extract is not dead, visit it again so it is folded properly.
16359       if (N->getOpcode() != ISD::DELETED_NODE)
16360         AddToWorklist(N);
16361       return SDValue(N, 0);
16362     }
16363   }
16364
16365   // Everything under here is trying to match an extract of a loaded value.
16366   // If the result of load has to be truncated, then it's not necessarily
16367   // profitable.
16368   bool BCNumEltsChanged = false;
16369   EVT ExtVT = VecVT.getVectorElementType();
16370   EVT LVT = ExtVT;
16371   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
16372     return SDValue();
16373
16374   if (VecOp.getOpcode() == ISD::BITCAST) {
16375     // Don't duplicate a load with other uses.
16376     if (!VecOp.hasOneUse())
16377       return SDValue();
16378
16379     EVT BCVT = VecOp.getOperand(0).getValueType();
16380     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
16381       return SDValue();
16382     if (NumElts != BCVT.getVectorNumElements())
16383       BCNumEltsChanged = true;
16384     VecOp = VecOp.getOperand(0);
16385     ExtVT = BCVT.getVectorElementType();
16386   }
16387
16388   // extract (vector load $addr), i --> load $addr + i * size
16389   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
16390       ISD::isNormalLoad(VecOp.getNode()) &&
16391       !Index->hasPredecessor(VecOp.getNode())) {
16392     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16393     if (VecLoad && !VecLoad->isVolatile())
16394       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16395   }
16396
16397   // Perform only after legalization to ensure build_vector / vector_shuffle
16398   // optimizations have already been done.
16399   if (!LegalOperations || !IndexC)
16400     return SDValue();
16401
16402   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16403   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16404   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16405   int Elt = IndexC->getZExtValue();
16406   LoadSDNode *LN0 = nullptr;
16407   if (ISD::isNormalLoad(VecOp.getNode())) {
16408     LN0 = cast<LoadSDNode>(VecOp);
16409   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16410              VecOp.getOperand(0).getValueType() == ExtVT &&
16411              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
16412     // Don't duplicate a load with other uses.
16413     if (!VecOp.hasOneUse())
16414       return SDValue();
16415
16416     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
16417   }
16418   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
16419     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
16420     // =>
16421     // (load $addr+1*size)
16422
16423     // Don't duplicate a load with other uses.
16424     if (!VecOp.hasOneUse())
16425       return SDValue();
16426
16427     // If the bit convert changed the number of elements, it is unsafe
16428     // to examine the mask.
16429     if (BCNumEltsChanged)
16430       return SDValue();
16431
16432     // Select the input vector, guarding against out of range extract vector.
16433     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
16434     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
16435
16436     if (VecOp.getOpcode() == ISD::BITCAST) {
16437       // Don't duplicate a load with other uses.
16438       if (!VecOp.hasOneUse())
16439         return SDValue();
16440
16441       VecOp = VecOp.getOperand(0);
16442     }
16443     if (ISD::isNormalLoad(VecOp.getNode())) {
16444       LN0 = cast<LoadSDNode>(VecOp);
16445       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
16446       Index = DAG.getConstant(Elt, DL, Index.getValueType());
16447     }
16448   }
16449
16450   // Make sure we found a non-volatile load and the extractelement is
16451   // the only use.
16452   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
16453     return SDValue();
16454
16455   // If Idx was -1 above, Elt is going to be -1, so just return undef.
16456   if (Elt == -1)
16457     return DAG.getUNDEF(LVT);
16458
16459   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
16460 }
16461
16462 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
16463 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
16464   // We perform this optimization post type-legalization because
16465   // the type-legalizer often scalarizes integer-promoted vectors.
16466   // Performing this optimization before may create bit-casts which
16467   // will be type-legalized to complex code sequences.
16468   // We perform this optimization only before the operation legalizer because we
16469   // may introduce illegal operations.
16470   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
16471     return SDValue();
16472
16473   unsigned NumInScalars = N->getNumOperands();
16474   SDLoc DL(N);
16475   EVT VT = N->getValueType(0);
16476
16477   // Check to see if this is a BUILD_VECTOR of a bunch of values
16478   // which come from any_extend or zero_extend nodes. If so, we can create
16479   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
16480   // optimizations. We do not handle sign-extend because we can't fill the sign
16481   // using shuffles.
16482   EVT SourceType = MVT::Other;
16483   bool AllAnyExt = true;
16484
16485   for (unsigned i = 0; i != NumInScalars; ++i) {
16486     SDValue In = N->getOperand(i);
16487     // Ignore undef inputs.
16488     if (In.isUndef()) continue;
16489
16490     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
16491     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
16492
16493     // Abort if the element is not an extension.
16494     if (!ZeroExt && !AnyExt) {
16495       SourceType = MVT::Other;
16496       break;
16497     }
16498
16499     // The input is a ZeroExt or AnyExt. Check the original type.
16500     EVT InTy = In.getOperand(0).getValueType();
16501
16502     // Check that all of the widened source types are the same.
16503     if (SourceType == MVT::Other)
16504       // First time.
16505       SourceType = InTy;
16506     else if (InTy != SourceType) {
16507       // Multiple income types. Abort.
16508       SourceType = MVT::Other;
16509       break;
16510     }
16511
16512     // Check if all of the extends are ANY_EXTENDs.
16513     AllAnyExt &= AnyExt;
16514   }
16515
16516   // In order to have valid types, all of the inputs must be extended from the
16517   // same source type and all of the inputs must be any or zero extend.
16518   // Scalar sizes must be a power of two.
16519   EVT OutScalarTy = VT.getScalarType();
16520   bool ValidTypes = SourceType != MVT::Other &&
16521                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
16522                  isPowerOf2_32(SourceType.getSizeInBits());
16523
16524   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
16525   // turn into a single shuffle instruction.
16526   if (!ValidTypes)
16527     return SDValue();
16528
16529   bool isLE = DAG.getDataLayout().isLittleEndian();
16530   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
16531   assert(ElemRatio > 1 && "Invalid element size ratio");
16532   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
16533                                DAG.getConstant(0, DL, SourceType);
16534
16535   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
16536   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
16537
16538   // Populate the new build_vector
16539   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
16540     SDValue Cast = N->getOperand(i);
16541     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
16542             Cast.getOpcode() == ISD::ZERO_EXTEND ||
16543             Cast.isUndef()) && "Invalid cast opcode");
16544     SDValue In;
16545     if (Cast.isUndef())
16546       In = DAG.getUNDEF(SourceType);
16547     else
16548       In = Cast->getOperand(0);
16549     unsigned Index = isLE ? (i * ElemRatio) :
16550                             (i * ElemRatio + (ElemRatio - 1));
16551
16552     assert(Index < Ops.size() && "Invalid index");
16553     Ops[Index] = In;
16554   }
16555
16556   // The type of the new BUILD_VECTOR node.
16557   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
16558   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
16559          "Invalid vector size");
16560   // Check if the new vector type is legal.
16561   if (!isTypeLegal(VecVT) ||
16562       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
16563        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
16564     return SDValue();
16565
16566   // Make the new BUILD_VECTOR.
16567   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
16568
16569   // The new BUILD_VECTOR node has the potential to be further optimized.
16570   AddToWorklist(BV.getNode());
16571   // Bitcast to the desired type.
16572   return DAG.getBitcast(VT, BV);
16573 }
16574
16575 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
16576                                            ArrayRef<int> VectorMask,
16577                                            SDValue VecIn1, SDValue VecIn2,
16578                                            unsigned LeftIdx, bool DidSplitVec) {
16579   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16580   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
16581
16582   EVT VT = N->getValueType(0);
16583   EVT InVT1 = VecIn1.getValueType();
16584   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
16585
16586   unsigned NumElems = VT.getVectorNumElements();
16587   unsigned ShuffleNumElems = NumElems;
16588
16589   // If we artificially split a vector in two already, then the offsets in the
16590   // operands will all be based off of VecIn1, even those in VecIn2.
16591   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
16592
16593   // We can't generate a shuffle node with mismatched input and output types.
16594   // Try to make the types match the type of the output.
16595   if (InVT1 != VT || InVT2 != VT) {
16596     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
16597       // If the output vector length is a multiple of both input lengths,
16598       // we can concatenate them and pad the rest with undefs.
16599       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
16600       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
16601       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
16602       ConcatOps[0] = VecIn1;
16603       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
16604       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16605       VecIn2 = SDValue();
16606     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
16607       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
16608         return SDValue();
16609
16610       if (!VecIn2.getNode()) {
16611         // If we only have one input vector, and it's twice the size of the
16612         // output, split it in two.
16613         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
16614                              DAG.getConstant(NumElems, DL, IdxTy));
16615         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
16616         // Since we now have shorter input vectors, adjust the offset of the
16617         // second vector's start.
16618         Vec2Offset = NumElems;
16619       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
16620         // VecIn1 is wider than the output, and we have another, possibly
16621         // smaller input. Pad the smaller input with undefs, shuffle at the
16622         // input vector width, and extract the output.
16623         // The shuffle type is different than VT, so check legality again.
16624         if (LegalOperations &&
16625             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
16626           return SDValue();
16627
16628         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
16629         // lower it back into a BUILD_VECTOR. So if the inserted type is
16630         // illegal, don't even try.
16631         if (InVT1 != InVT2) {
16632           if (!TLI.isTypeLegal(InVT2))
16633             return SDValue();
16634           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
16635                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
16636         }
16637         ShuffleNumElems = NumElems * 2;
16638       } else {
16639         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
16640         // than VecIn1. We can't handle this for now - this case will disappear
16641         // when we start sorting the vectors by type.
16642         return SDValue();
16643       }
16644     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
16645                InVT1.getSizeInBits() == VT.getSizeInBits()) {
16646       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
16647       ConcatOps[0] = VecIn2;
16648       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
16649     } else {
16650       // TODO: Support cases where the length mismatch isn't exactly by a
16651       // factor of 2.
16652       // TODO: Move this check upwards, so that if we have bad type
16653       // mismatches, we don't create any DAG nodes.
16654       return SDValue();
16655     }
16656   }
16657
16658   // Initialize mask to undef.
16659   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
16660
16661   // Only need to run up to the number of elements actually used, not the
16662   // total number of elements in the shuffle - if we are shuffling a wider
16663   // vector, the high lanes should be set to undef.
16664   for (unsigned i = 0; i != NumElems; ++i) {
16665     if (VectorMask[i] <= 0)
16666       continue;
16667
16668     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
16669     if (VectorMask[i] == (int)LeftIdx) {
16670       Mask[i] = ExtIndex;
16671     } else if (VectorMask[i] == (int)LeftIdx + 1) {
16672       Mask[i] = Vec2Offset + ExtIndex;
16673     }
16674   }
16675
16676   // The type the input vectors may have changed above.
16677   InVT1 = VecIn1.getValueType();
16678
16679   // If we already have a VecIn2, it should have the same type as VecIn1.
16680   // If we don't, get an undef/zero vector of the appropriate type.
16681   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
16682   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
16683
16684   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
16685   if (ShuffleNumElems > NumElems)
16686     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
16687
16688   return Shuffle;
16689 }
16690
16691 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
16692   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
16693
16694   // First, determine where the build vector is not undef.
16695   // TODO: We could extend this to handle zero elements as well as undefs.
16696   int NumBVOps = BV->getNumOperands();
16697   int ZextElt = -1;
16698   for (int i = 0; i != NumBVOps; ++i) {
16699     SDValue Op = BV->getOperand(i);
16700     if (Op.isUndef())
16701       continue;
16702     if (ZextElt == -1)
16703       ZextElt = i;
16704     else
16705       return SDValue();
16706   }
16707   // Bail out if there's no non-undef element.
16708   if (ZextElt == -1)
16709     return SDValue();
16710
16711   // The build vector contains some number of undef elements and exactly
16712   // one other element. That other element must be a zero-extended scalar
16713   // extracted from a vector at a constant index to turn this into a shuffle.
16714   // Also, require that the build vector does not implicitly truncate/extend
16715   // its elements.
16716   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
16717   EVT VT = BV->getValueType(0);
16718   SDValue Zext = BV->getOperand(ZextElt);
16719   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
16720       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16721       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
16722       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
16723     return SDValue();
16724
16725   // The zero-extend must be a multiple of the source size, and we must be
16726   // building a vector of the same size as the source of the extract element.
16727   SDValue Extract = Zext.getOperand(0);
16728   unsigned DestSize = Zext.getValueSizeInBits();
16729   unsigned SrcSize = Extract.getValueSizeInBits();
16730   if (DestSize % SrcSize != 0 ||
16731       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
16732     return SDValue();
16733
16734   // Create a shuffle mask that will combine the extracted element with zeros
16735   // and undefs.
16736   int ZextRatio = DestSize / SrcSize;
16737   int NumMaskElts = NumBVOps * ZextRatio;
16738   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
16739   for (int i = 0; i != NumMaskElts; ++i) {
16740     if (i / ZextRatio == ZextElt) {
16741       // The low bits of the (potentially translated) extracted element map to
16742       // the source vector. The high bits map to zero. We will use a zero vector
16743       // as the 2nd source operand of the shuffle, so use the 1st element of
16744       // that vector (mask value is number-of-elements) for the high bits.
16745       if (i % ZextRatio == 0)
16746         ShufMask[i] = Extract.getConstantOperandVal(1);
16747       else
16748         ShufMask[i] = NumMaskElts;
16749     }
16750
16751     // Undef elements of the build vector remain undef because we initialize
16752     // the shuffle mask with -1.
16753   }
16754
16755   // Turn this into a shuffle with zero if that's legal.
16756   EVT VecVT = Extract.getOperand(0).getValueType();
16757   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
16758     return SDValue();
16759
16760   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
16761   // bitcast (shuffle V, ZeroVec, VectorMask)
16762   SDLoc DL(BV);
16763   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
16764   SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
16765                                       ShufMask);
16766   return DAG.getBitcast(VT, Shuf);
16767 }
16768
16769 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
16770 // operations. If the types of the vectors we're extracting from allow it,
16771 // turn this into a vector_shuffle node.
16772 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
16773   SDLoc DL(N);
16774   EVT VT = N->getValueType(0);
16775
16776   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
16777   if (!isTypeLegal(VT))
16778     return SDValue();
16779
16780   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
16781     return V;
16782
16783   // May only combine to shuffle after legalize if shuffle is legal.
16784   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
16785     return SDValue();
16786
16787   bool UsesZeroVector = false;
16788   unsigned NumElems = N->getNumOperands();
16789
16790   // Record, for each element of the newly built vector, which input vector
16791   // that element comes from. -1 stands for undef, 0 for the zero vector,
16792   // and positive values for the input vectors.
16793   // VectorMask maps each element to its vector number, and VecIn maps vector
16794   // numbers to their initial SDValues.
16795
16796   SmallVector<int, 8> VectorMask(NumElems, -1);
16797   SmallVector<SDValue, 8> VecIn;
16798   VecIn.push_back(SDValue());
16799
16800   for (unsigned i = 0; i != NumElems; ++i) {
16801     SDValue Op = N->getOperand(i);
16802
16803     if (Op.isUndef())
16804       continue;
16805
16806     // See if we can use a blend with a zero vector.
16807     // TODO: Should we generalize this to a blend with an arbitrary constant
16808     // vector?
16809     if (isNullConstant(Op) || isNullFPConstant(Op)) {
16810       UsesZeroVector = true;
16811       VectorMask[i] = 0;
16812       continue;
16813     }
16814
16815     // Not an undef or zero. If the input is something other than an
16816     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
16817     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
16818         !isa<ConstantSDNode>(Op.getOperand(1)))
16819       return SDValue();
16820     SDValue ExtractedFromVec = Op.getOperand(0);
16821
16822     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
16823     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
16824       return SDValue();
16825
16826     // All inputs must have the same element type as the output.
16827     if (VT.getVectorElementType() !=
16828         ExtractedFromVec.getValueType().getVectorElementType())
16829       return SDValue();
16830
16831     // Have we seen this input vector before?
16832     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
16833     // a map back from SDValues to numbers isn't worth it.
16834     unsigned Idx = std::distance(
16835         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
16836     if (Idx == VecIn.size())
16837       VecIn.push_back(ExtractedFromVec);
16838
16839     VectorMask[i] = Idx;
16840   }
16841
16842   // If we didn't find at least one input vector, bail out.
16843   if (VecIn.size() < 2)
16844     return SDValue();
16845
16846   // If all the Operands of BUILD_VECTOR extract from same
16847   // vector, then split the vector efficiently based on the maximum
16848   // vector access index and adjust the VectorMask and
16849   // VecIn accordingly.
16850   bool DidSplitVec = false;
16851   if (VecIn.size() == 2) {
16852     unsigned MaxIndex = 0;
16853     unsigned NearestPow2 = 0;
16854     SDValue Vec = VecIn.back();
16855     EVT InVT = Vec.getValueType();
16856     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16857     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
16858
16859     for (unsigned i = 0; i < NumElems; i++) {
16860       if (VectorMask[i] <= 0)
16861         continue;
16862       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
16863       IndexVec[i] = Index;
16864       MaxIndex = std::max(MaxIndex, Index);
16865     }
16866
16867     NearestPow2 = PowerOf2Ceil(MaxIndex);
16868     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
16869         NumElems * 2 < NearestPow2) {
16870       unsigned SplitSize = NearestPow2 / 2;
16871       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
16872                                      InVT.getVectorElementType(), SplitSize);
16873       if (TLI.isTypeLegal(SplitVT)) {
16874         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16875                                      DAG.getConstant(SplitSize, DL, IdxTy));
16876         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
16877                                      DAG.getConstant(0, DL, IdxTy));
16878         VecIn.pop_back();
16879         VecIn.push_back(VecIn1);
16880         VecIn.push_back(VecIn2);
16881         DidSplitVec = true;
16882
16883         for (unsigned i = 0; i < NumElems; i++) {
16884           if (VectorMask[i] <= 0)
16885             continue;
16886           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
16887         }
16888       }
16889     }
16890   }
16891
16892   // TODO: We want to sort the vectors by descending length, so that adjacent
16893   // pairs have similar length, and the longer vector is always first in the
16894   // pair.
16895
16896   // TODO: Should this fire if some of the input vectors has illegal type (like
16897   // it does now), or should we let legalization run its course first?
16898
16899   // Shuffle phase:
16900   // Take pairs of vectors, and shuffle them so that the result has elements
16901   // from these vectors in the correct places.
16902   // For example, given:
16903   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
16904   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
16905   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
16906   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
16907   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
16908   // We will generate:
16909   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
16910   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
16911   SmallVector<SDValue, 4> Shuffles;
16912   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
16913     unsigned LeftIdx = 2 * In + 1;
16914     SDValue VecLeft = VecIn[LeftIdx];
16915     SDValue VecRight =
16916         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
16917
16918     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
16919                                                 VecRight, LeftIdx, DidSplitVec))
16920       Shuffles.push_back(Shuffle);
16921     else
16922       return SDValue();
16923   }
16924
16925   // If we need the zero vector as an "ingredient" in the blend tree, add it
16926   // to the list of shuffles.
16927   if (UsesZeroVector)
16928     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
16929                                       : DAG.getConstantFP(0.0, DL, VT));
16930
16931   // If we only have one shuffle, we're done.
16932   if (Shuffles.size() == 1)
16933     return Shuffles[0];
16934
16935   // Update the vector mask to point to the post-shuffle vectors.
16936   for (int &Vec : VectorMask)
16937     if (Vec == 0)
16938       Vec = Shuffles.size() - 1;
16939     else
16940       Vec = (Vec - 1) / 2;
16941
16942   // More than one shuffle. Generate a binary tree of blends, e.g. if from
16943   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
16944   // generate:
16945   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
16946   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
16947   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
16948   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
16949   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
16950   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
16951   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
16952
16953   // Make sure the initial size of the shuffle list is even.
16954   if (Shuffles.size() % 2)
16955     Shuffles.push_back(DAG.getUNDEF(VT));
16956
16957   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
16958     if (CurSize % 2) {
16959       Shuffles[CurSize] = DAG.getUNDEF(VT);
16960       CurSize++;
16961     }
16962     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
16963       int Left = 2 * In;
16964       int Right = 2 * In + 1;
16965       SmallVector<int, 8> Mask(NumElems, -1);
16966       for (unsigned i = 0; i != NumElems; ++i) {
16967         if (VectorMask[i] == Left) {
16968           Mask[i] = i;
16969           VectorMask[i] = In;
16970         } else if (VectorMask[i] == Right) {
16971           Mask[i] = i + NumElems;
16972           VectorMask[i] = In;
16973         }
16974       }
16975
16976       Shuffles[In] =
16977           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
16978     }
16979   }
16980   return Shuffles[0];
16981 }
16982
16983 // Try to turn a build vector of zero extends of extract vector elts into a
16984 // a vector zero extend and possibly an extract subvector.
16985 // TODO: Support sign extend?
16986 // TODO: Allow undef elements?
16987 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
16988   if (LegalOperations)
16989     return SDValue();
16990
16991   EVT VT = N->getValueType(0);
16992
16993   bool FoundZeroExtend = false;
16994   SDValue Op0 = N->getOperand(0);
16995   auto checkElem = [&](SDValue Op) -> int64_t {
16996     unsigned Opc = Op.getOpcode();
16997     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
16998     if ((Op.getOpcode() == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
16999         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17000         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
17001       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17002         return C->getZExtValue();
17003     return -1;
17004   };
17005
17006   // Make sure the first element matches
17007   // (zext (extract_vector_elt X, C))
17008   int64_t Offset = checkElem(Op0);
17009   if (Offset < 0)
17010     return SDValue();
17011
17012   unsigned NumElems = N->getNumOperands();
17013   SDValue In = Op0.getOperand(0).getOperand(0);
17014   EVT InSVT = In.getValueType().getScalarType();
17015   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
17016
17017   // Don't create an illegal input type after type legalization.
17018   if (LegalTypes && !TLI.isTypeLegal(InVT))
17019     return SDValue();
17020
17021   // Ensure all the elements come from the same vector and are adjacent.
17022   for (unsigned i = 1; i != NumElems; ++i) {
17023     if ((Offset + i) != checkElem(N->getOperand(i)))
17024       return SDValue();
17025   }
17026
17027   SDLoc DL(N);
17028   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
17029                    Op0.getOperand(0).getOperand(1));
17030   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
17031                      VT, In);
17032 }
17033
17034 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
17035   EVT VT = N->getValueType(0);
17036
17037   // A vector built entirely of undefs is undef.
17038   if (ISD::allOperandsUndef(N))
17039     return DAG.getUNDEF(VT);
17040
17041   // If this is a splat of a bitcast from another vector, change to a
17042   // concat_vector.
17043   // For example:
17044   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17045   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17046   //
17047   // If X is a build_vector itself, the concat can become a larger build_vector.
17048   // TODO: Maybe this is useful for non-splat too?
17049   if (!LegalOperations) {
17050     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17051       Splat = peekThroughBitcasts(Splat);
17052       EVT SrcVT = Splat.getValueType();
17053       if (SrcVT.isVector()) {
17054         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
17055         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
17056                                      SrcVT.getVectorElementType(), NumElts);
17057         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
17058           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
17059           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
17060                                        NewVT, Ops);
17061           return DAG.getBitcast(VT, Concat);
17062         }
17063       }
17064     }
17065   }
17066
17067   // Check if we can express BUILD VECTOR via subvector extract.
17068   if (!LegalTypes && (N->getNumOperands() > 1)) {
17069     SDValue Op0 = N->getOperand(0);
17070     auto checkElem = [&](SDValue Op) -> uint64_t {
17071       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
17072           (Op0.getOperand(0) == Op.getOperand(0)))
17073         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
17074           return CNode->getZExtValue();
17075       return -1;
17076     };
17077
17078     int Offset = checkElem(Op0);
17079     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
17080       if (Offset + i != checkElem(N->getOperand(i))) {
17081         Offset = -1;
17082         break;
17083       }
17084     }
17085
17086     if ((Offset == 0) &&
17087         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
17088       return Op0.getOperand(0);
17089     if ((Offset != -1) &&
17090         ((Offset % N->getValueType(0).getVectorNumElements()) ==
17091          0)) // IDX must be multiple of output size.
17092       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
17093                          Op0.getOperand(0), Op0.getOperand(1));
17094   }
17095
17096   if (SDValue V = convertBuildVecZextToZext(N))
17097     return V;
17098
17099   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
17100     return V;
17101
17102   if (SDValue V = reduceBuildVecToShuffle(N))
17103     return V;
17104
17105   return SDValue();
17106 }
17107
17108 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
17109   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17110   EVT OpVT = N->getOperand(0).getValueType();
17111
17112   // If the operands are legal vectors, leave them alone.
17113   if (TLI.isTypeLegal(OpVT))
17114     return SDValue();
17115
17116   SDLoc DL(N);
17117   EVT VT = N->getValueType(0);
17118   SmallVector<SDValue, 8> Ops;
17119
17120   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
17121   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17122
17123   // Keep track of what we encounter.
17124   bool AnyInteger = false;
17125   bool AnyFP = false;
17126   for (const SDValue &Op : N->ops()) {
17127     if (ISD::BITCAST == Op.getOpcode() &&
17128         !Op.getOperand(0).getValueType().isVector())
17129       Ops.push_back(Op.getOperand(0));
17130     else if (ISD::UNDEF == Op.getOpcode())
17131       Ops.push_back(ScalarUndef);
17132     else
17133       return SDValue();
17134
17135     // Note whether we encounter an integer or floating point scalar.
17136     // If it's neither, bail out, it could be something weird like x86mmx.
17137     EVT LastOpVT = Ops.back().getValueType();
17138     if (LastOpVT.isFloatingPoint())
17139       AnyFP = true;
17140     else if (LastOpVT.isInteger())
17141       AnyInteger = true;
17142     else
17143       return SDValue();
17144   }
17145
17146   // If any of the operands is a floating point scalar bitcast to a vector,
17147   // use floating point types throughout, and bitcast everything.
17148   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17149   if (AnyFP) {
17150     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
17151     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17152     if (AnyInteger) {
17153       for (SDValue &Op : Ops) {
17154         if (Op.getValueType() == SVT)
17155           continue;
17156         if (Op.isUndef())
17157           Op = ScalarUndef;
17158         else
17159           Op = DAG.getBitcast(SVT, Op);
17160       }
17161     }
17162   }
17163
17164   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
17165                                VT.getSizeInBits() / SVT.getSizeInBits());
17166   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
17167 }
17168
17169 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17170 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17171 // most two distinct vectors the same size as the result, attempt to turn this
17172 // into a legal shuffle.
17173 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
17174   EVT VT = N->getValueType(0);
17175   EVT OpVT = N->getOperand(0).getValueType();
17176   int NumElts = VT.getVectorNumElements();
17177   int NumOpElts = OpVT.getVectorNumElements();
17178
17179   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
17180   SmallVector<int, 8> Mask;
17181
17182   for (SDValue Op : N->ops()) {
17183     Op = peekThroughBitcasts(Op);
17184
17185     // UNDEF nodes convert to UNDEF shuffle mask values.
17186     if (Op.isUndef()) {
17187       Mask.append((unsigned)NumOpElts, -1);
17188       continue;
17189     }
17190
17191     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17192       return SDValue();
17193
17194     // What vector are we extracting the subvector from and at what index?
17195     SDValue ExtVec = Op.getOperand(0);
17196
17197     // We want the EVT of the original extraction to correctly scale the
17198     // extraction index.
17199     EVT ExtVT = ExtVec.getValueType();
17200     ExtVec = peekThroughBitcasts(ExtVec);
17201
17202     // UNDEF nodes convert to UNDEF shuffle mask values.
17203     if (ExtVec.isUndef()) {
17204       Mask.append((unsigned)NumOpElts, -1);
17205       continue;
17206     }
17207
17208     if (!isa<ConstantSDNode>(Op.getOperand(1)))
17209       return SDValue();
17210     int ExtIdx = Op.getConstantOperandVal(1);
17211
17212     // Ensure that we are extracting a subvector from a vector the same
17213     // size as the result.
17214     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
17215       return SDValue();
17216
17217     // Scale the subvector index to account for any bitcast.
17218     int NumExtElts = ExtVT.getVectorNumElements();
17219     if (0 == (NumExtElts % NumElts))
17220       ExtIdx /= (NumExtElts / NumElts);
17221     else if (0 == (NumElts % NumExtElts))
17222       ExtIdx *= (NumElts / NumExtElts);
17223     else
17224       return SDValue();
17225
17226     // At most we can reference 2 inputs in the final shuffle.
17227     if (SV0.isUndef() || SV0 == ExtVec) {
17228       SV0 = ExtVec;
17229       for (int i = 0; i != NumOpElts; ++i)
17230         Mask.push_back(i + ExtIdx);
17231     } else if (SV1.isUndef() || SV1 == ExtVec) {
17232       SV1 = ExtVec;
17233       for (int i = 0; i != NumOpElts; ++i)
17234         Mask.push_back(i + ExtIdx + NumElts);
17235     } else {
17236       return SDValue();
17237     }
17238   }
17239
17240   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
17241     return SDValue();
17242
17243   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
17244                               DAG.getBitcast(VT, SV1), Mask);
17245 }
17246
17247 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
17248   // If we only have one input vector, we don't need to do any concatenation.
17249   if (N->getNumOperands() == 1)
17250     return N->getOperand(0);
17251
17252   // Check if all of the operands are undefs.
17253   EVT VT = N->getValueType(0);
17254   if (ISD::allOperandsUndef(N))
17255     return DAG.getUNDEF(VT);
17256
17257   // Optimize concat_vectors where all but the first of the vectors are undef.
17258   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
17259         return Op.isUndef();
17260       })) {
17261     SDValue In = N->getOperand(0);
17262     assert(In.getValueType().isVector() && "Must concat vectors");
17263
17264     SDValue Scalar = peekThroughOneUseBitcasts(In);
17265
17266     // concat_vectors(scalar_to_vector(scalar), undef) ->
17267     //     scalar_to_vector(scalar)
17268     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17269          Scalar.hasOneUse()) {
17270       EVT SVT = Scalar.getValueType().getVectorElementType();
17271       if (SVT == Scalar.getOperand(0).getValueType())
17272         Scalar = Scalar.getOperand(0);
17273     }
17274
17275     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17276     if (!Scalar.getValueType().isVector()) {
17277       // If the bitcast type isn't legal, it might be a trunc of a legal type;
17278       // look through the trunc so we can still do the transform:
17279       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17280       if (Scalar->getOpcode() == ISD::TRUNCATE &&
17281           !TLI.isTypeLegal(Scalar.getValueType()) &&
17282           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
17283         Scalar = Scalar->getOperand(0);
17284
17285       EVT SclTy = Scalar.getValueType();
17286
17287       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17288         return SDValue();
17289
17290       // Bail out if the vector size is not a multiple of the scalar size.
17291       if (VT.getSizeInBits() % SclTy.getSizeInBits())
17292         return SDValue();
17293
17294       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17295       if (VNTNumElms < 2)
17296         return SDValue();
17297
17298       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17299       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17300         return SDValue();
17301
17302       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17303       return DAG.getBitcast(VT, Res);
17304     }
17305   }
17306
17307   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17308   // We have already tested above for an UNDEF only concatenation.
17309   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17310   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17311   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17312     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17313   };
17314   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17315     SmallVector<SDValue, 8> Opnds;
17316     EVT SVT = VT.getScalarType();
17317
17318     EVT MinVT = SVT;
17319     if (!SVT.isFloatingPoint()) {
17320       // If BUILD_VECTOR are from built from integer, they may have different
17321       // operand types. Get the smallest type and truncate all operands to it.
17322       bool FoundMinVT = false;
17323       for (const SDValue &Op : N->ops())
17324         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17325           EVT OpSVT = Op.getOperand(0).getValueType();
17326           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
17327           FoundMinVT = true;
17328         }
17329       assert(FoundMinVT && "Concat vector type mismatch");
17330     }
17331
17332     for (const SDValue &Op : N->ops()) {
17333       EVT OpVT = Op.getValueType();
17334       unsigned NumElts = OpVT.getVectorNumElements();
17335
17336       if (ISD::UNDEF == Op.getOpcode())
17337         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17338
17339       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17340         if (SVT.isFloatingPoint()) {
17341           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17342           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17343         } else {
17344           for (unsigned i = 0; i != NumElts; ++i)
17345             Opnds.push_back(
17346                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17347         }
17348       }
17349     }
17350
17351     assert(VT.getVectorNumElements() == Opnds.size() &&
17352            "Concat vector type mismatch");
17353     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17354   }
17355
17356   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17357   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
17358     return V;
17359
17360   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17361   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17362     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
17363       return V;
17364
17365   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17366   // nodes often generate nop CONCAT_VECTOR nodes.
17367   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17368   // place the incoming vectors at the exact same location.
17369   SDValue SingleSource = SDValue();
17370   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17371
17372   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17373     SDValue Op = N->getOperand(i);
17374
17375     if (Op.isUndef())
17376       continue;
17377
17378     // Check if this is the identity extract:
17379     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17380       return SDValue();
17381
17382     // Find the single incoming vector for the extract_subvector.
17383     if (SingleSource.getNode()) {
17384       if (Op.getOperand(0) != SingleSource)
17385         return SDValue();
17386     } else {
17387       SingleSource = Op.getOperand(0);
17388
17389       // Check the source type is the same as the type of the result.
17390       // If not, this concat may extend the vector, so we can not
17391       // optimize it away.
17392       if (SingleSource.getValueType() != N->getValueType(0))
17393         return SDValue();
17394     }
17395
17396     unsigned IdentityIndex = i * PartNumElem;
17397     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17398     // The extract index must be constant.
17399     if (!CS)
17400       return SDValue();
17401
17402     // Check that we are reading from the identity index.
17403     if (CS->getZExtValue() != IdentityIndex)
17404       return SDValue();
17405   }
17406
17407   if (SingleSource.getNode())
17408     return SingleSource;
17409
17410   return SDValue();
17411 }
17412
17413 /// If we are extracting a subvector produced by a wide binary operator try
17414 /// to use a narrow binary operator and/or avoid concatenation and extraction.
17415 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
17416   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
17417   // some of these bailouts with other transforms.
17418
17419   // The extract index must be a constant, so we can map it to a concat operand.
17420   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17421   if (!ExtractIndexC)
17422     return SDValue();
17423
17424   // We are looking for an optionally bitcasted wide vector binary operator
17425   // feeding an extract subvector.
17426   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
17427   if (!ISD::isBinaryOp(BinOp.getNode()))
17428     return SDValue();
17429
17430   // The binop must be a vector type, so we can extract some fraction of it.
17431   EVT WideBVT = BinOp.getValueType();
17432   if (!WideBVT.isVector())
17433     return SDValue();
17434
17435   EVT VT = Extract->getValueType(0);
17436   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
17437   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
17438          "Extract index is not a multiple of the vector length.");
17439
17440   // Bail out if this is not a proper multiple width extraction.
17441   unsigned WideWidth = WideBVT.getSizeInBits();
17442   unsigned NarrowWidth = VT.getSizeInBits();
17443   if (WideWidth % NarrowWidth != 0)
17444     return SDValue();
17445
17446   // Bail out if we are extracting a fraction of a single operation. This can
17447   // occur because we potentially looked through a bitcast of the binop.
17448   unsigned NarrowingRatio = WideWidth / NarrowWidth;
17449   unsigned WideNumElts = WideBVT.getVectorNumElements();
17450   if (WideNumElts % NarrowingRatio != 0)
17451     return SDValue();
17452
17453   // Bail out if the target does not support a narrower version of the binop.
17454   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
17455                                    WideNumElts / NarrowingRatio);
17456   unsigned BOpcode = BinOp.getOpcode();
17457   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17458   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
17459     return SDValue();
17460
17461   // If extraction is cheap, we don't need to look at the binop operands
17462   // for concat ops. The narrow binop alone makes this transform profitable.
17463   // We can't just reuse the original extract index operand because we may have
17464   // bitcasted.
17465   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
17466   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
17467   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
17468   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
17469       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
17470     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
17471     SDLoc DL(Extract);
17472     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
17473     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17474                             BinOp.getOperand(0), NewExtIndex);
17475     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17476                             BinOp.getOperand(1), NewExtIndex);
17477     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
17478                                       BinOp.getNode()->getFlags());
17479     return DAG.getBitcast(VT, NarrowBinOp);
17480   }
17481
17482   // Only handle the case where we are doubling and then halving. A larger ratio
17483   // may require more than two narrow binops to replace the wide binop.
17484   if (NarrowingRatio != 2)
17485     return SDValue();
17486
17487   // TODO: The motivating case for this transform is an x86 AVX1 target. That
17488   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
17489   // flavors, but no other 256-bit integer support. This could be extended to
17490   // handle any binop, but that may require fixing/adding other folds to avoid
17491   // codegen regressions.
17492   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
17493     return SDValue();
17494
17495   // We need at least one concatenation operation of a binop operand to make
17496   // this transform worthwhile. The concat must double the input vector sizes.
17497   // TODO: Should we also handle INSERT_SUBVECTOR patterns?
17498   SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
17499   SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
17500   bool ConcatL =
17501       LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
17502   bool ConcatR =
17503       RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
17504   if (ConcatL || ConcatR) {
17505     // If a binop operand was not the result of a concat, we must extract a
17506     // half-sized operand for our new narrow binop:
17507     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
17508     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
17509     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
17510     SDLoc DL(Extract);
17511     SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
17512     SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
17513                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17514                                       BinOp.getOperand(0), IndexC);
17515
17516     SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
17517                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
17518                                       BinOp.getOperand(1), IndexC);
17519
17520     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
17521     return DAG.getBitcast(VT, NarrowBinOp);
17522   }
17523
17524   return SDValue();
17525 }
17526
17527 /// If we are extracting a subvector from a wide vector load, convert to a
17528 /// narrow load to eliminate the extraction:
17529 /// (extract_subvector (load wide vector)) --> (load narrow vector)
17530 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
17531   // TODO: Add support for big-endian. The offset calculation must be adjusted.
17532   if (DAG.getDataLayout().isBigEndian())
17533     return SDValue();
17534
17535   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
17536   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
17537   if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
17538     return SDValue();
17539
17540   // Allow targets to opt-out.
17541   EVT VT = Extract->getValueType(0);
17542   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17543   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
17544     return SDValue();
17545
17546   // The narrow load will be offset from the base address of the old load if
17547   // we are extracting from something besides index 0 (little-endian).
17548   SDLoc DL(Extract);
17549   SDValue BaseAddr = Ld->getOperand(1);
17550   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
17551
17552   // TODO: Use "BaseIndexOffset" to make this more effective.
17553   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
17554   MachineFunction &MF = DAG.getMachineFunction();
17555   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
17556                                                    VT.getStoreSize());
17557   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
17558   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
17559   return NewLd;
17560 }
17561
17562 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
17563   EVT NVT = N->getValueType(0);
17564   SDValue V = N->getOperand(0);
17565
17566   // Extract from UNDEF is UNDEF.
17567   if (V.isUndef())
17568     return DAG.getUNDEF(NVT);
17569
17570   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
17571     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
17572       return NarrowLoad;
17573
17574   // Combine an extract of an extract into a single extract_subvector.
17575   // ext (ext X, C), 0 --> ext X, C
17576   if (isNullConstant(N->getOperand(1)) &&
17577       V.getOpcode() == ISD::EXTRACT_SUBVECTOR && V.hasOneUse() &&
17578       isa<ConstantSDNode>(V.getOperand(1))) {
17579     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
17580                                     V.getConstantOperandVal(1)) &&
17581         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
17582       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
17583                          V.getOperand(1));
17584     }
17585   }
17586
17587   // Combine:
17588   //    (extract_subvec (concat V1, V2, ...), i)
17589   // Into:
17590   //    Vi if possible
17591   // Only operand 0 is checked as 'concat' assumes all inputs of the same
17592   // type.
17593   if (V.getOpcode() == ISD::CONCAT_VECTORS &&
17594       isa<ConstantSDNode>(N->getOperand(1)) &&
17595       V.getOperand(0).getValueType() == NVT) {
17596     unsigned Idx = N->getConstantOperandVal(1);
17597     unsigned NumElems = NVT.getVectorNumElements();
17598     assert((Idx % NumElems) == 0 &&
17599            "IDX in concat is not a multiple of the result vector length.");
17600     return V->getOperand(Idx / NumElems);
17601   }
17602
17603   V = peekThroughBitcasts(V);
17604
17605   // If the input is a build vector. Try to make a smaller build vector.
17606   if (V.getOpcode() == ISD::BUILD_VECTOR) {
17607     if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
17608       EVT InVT = V.getValueType();
17609       unsigned ExtractSize = NVT.getSizeInBits();
17610       unsigned EltSize = InVT.getScalarSizeInBits();
17611       // Only do this if we won't split any elements.
17612       if (ExtractSize % EltSize == 0) {
17613         unsigned NumElems = ExtractSize / EltSize;
17614         EVT EltVT = InVT.getVectorElementType();
17615         EVT ExtractVT = NumElems == 1 ? EltVT
17616                                       : EVT::getVectorVT(*DAG.getContext(),
17617                                                          EltVT, NumElems);
17618         if ((Level < AfterLegalizeDAG ||
17619              (NumElems == 1 ||
17620               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
17621             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
17622           unsigned IdxVal = Idx->getZExtValue();
17623           IdxVal *= NVT.getScalarSizeInBits();
17624           IdxVal /= EltSize;
17625
17626           if (NumElems == 1) {
17627             SDValue Src = V->getOperand(IdxVal);
17628             if (EltVT != Src.getValueType())
17629               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
17630             return DAG.getBitcast(NVT, Src);
17631           }
17632
17633           // Extract the pieces from the original build_vector.
17634           SDValue BuildVec = DAG.getBuildVector(
17635               ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
17636           return DAG.getBitcast(NVT, BuildVec);
17637         }
17638       }
17639     }
17640   }
17641
17642   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
17643     // Handle only simple case where vector being inserted and vector
17644     // being extracted are of same size.
17645     EVT SmallVT = V.getOperand(1).getValueType();
17646     if (!NVT.bitsEq(SmallVT))
17647       return SDValue();
17648
17649     // Only handle cases where both indexes are constants.
17650     auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
17651     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
17652
17653     if (InsIdx && ExtIdx) {
17654       // Combine:
17655       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
17656       // Into:
17657       //    indices are equal or bit offsets are equal => V1
17658       //    otherwise => (extract_subvec V1, ExtIdx)
17659       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
17660           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
17661         return DAG.getBitcast(NVT, V.getOperand(1));
17662       return DAG.getNode(
17663           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
17664           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
17665           N->getOperand(1));
17666     }
17667   }
17668
17669   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
17670     return NarrowBOp;
17671
17672   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
17673     return SDValue(N, 0);
17674
17675   return SDValue();
17676 }
17677
17678 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
17679 /// followed by concatenation. Narrow vector ops may have better performance
17680 /// than wide ops, and this can unlock further narrowing of other vector ops.
17681 /// Targets can invert this transform later if it is not profitable.
17682 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
17683                                          SelectionDAG &DAG) {
17684   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
17685   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
17686       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
17687       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
17688     return SDValue();
17689
17690   // Split the wide shuffle mask into halves. Any mask element that is accessing
17691   // operand 1 is offset down to account for narrowing of the vectors.
17692   ArrayRef<int> Mask = Shuf->getMask();
17693   EVT VT = Shuf->getValueType(0);
17694   unsigned NumElts = VT.getVectorNumElements();
17695   unsigned HalfNumElts = NumElts / 2;
17696   SmallVector<int, 16> Mask0(HalfNumElts, -1);
17697   SmallVector<int, 16> Mask1(HalfNumElts, -1);
17698   for (unsigned i = 0; i != NumElts; ++i) {
17699     if (Mask[i] == -1)
17700       continue;
17701     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
17702     if (i < HalfNumElts)
17703       Mask0[i] = M;
17704     else
17705       Mask1[i - HalfNumElts] = M;
17706   }
17707
17708   // Ask the target if this is a valid transform.
17709   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17710   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
17711                                 HalfNumElts);
17712   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
17713       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
17714     return SDValue();
17715
17716   // shuffle (concat X, undef), (concat Y, undef), Mask -->
17717   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
17718   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
17719   SDLoc DL(Shuf);
17720   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
17721   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
17722   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
17723 }
17724
17725 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
17726 // or turn a shuffle of a single concat into simpler shuffle then concat.
17727 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
17728   EVT VT = N->getValueType(0);
17729   unsigned NumElts = VT.getVectorNumElements();
17730
17731   SDValue N0 = N->getOperand(0);
17732   SDValue N1 = N->getOperand(1);
17733   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
17734   ArrayRef<int> Mask = SVN->getMask();
17735
17736   SmallVector<SDValue, 4> Ops;
17737   EVT ConcatVT = N0.getOperand(0).getValueType();
17738   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
17739   unsigned NumConcats = NumElts / NumElemsPerConcat;
17740
17741   auto IsUndefMaskElt = [](int i) { return i == -1; };
17742
17743   // Special case: shuffle(concat(A,B)) can be more efficiently represented
17744   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
17745   // half vector elements.
17746   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
17747       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
17748                    IsUndefMaskElt)) {
17749     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
17750                               N0.getOperand(1),
17751                               Mask.slice(0, NumElemsPerConcat));
17752     N1 = DAG.getUNDEF(ConcatVT);
17753     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
17754   }
17755
17756   // Look at every vector that's inserted. We're looking for exact
17757   // subvector-sized copies from a concatenated vector
17758   for (unsigned I = 0; I != NumConcats; ++I) {
17759     unsigned Begin = I * NumElemsPerConcat;
17760     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
17761
17762     // Make sure we're dealing with a copy.
17763     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
17764       Ops.push_back(DAG.getUNDEF(ConcatVT));
17765       continue;
17766     }
17767
17768     int OpIdx = -1;
17769     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
17770       if (IsUndefMaskElt(SubMask[i]))
17771         continue;
17772       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
17773         return SDValue();
17774       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
17775       if (0 <= OpIdx && EltOpIdx != OpIdx)
17776         return SDValue();
17777       OpIdx = EltOpIdx;
17778     }
17779     assert(0 <= OpIdx && "Unknown concat_vectors op");
17780
17781     if (OpIdx < (int)N0.getNumOperands())
17782       Ops.push_back(N0.getOperand(OpIdx));
17783     else
17784       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
17785   }
17786
17787   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17788 }
17789
17790 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
17791 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
17792 //
17793 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
17794 // a simplification in some sense, but it isn't appropriate in general: some
17795 // BUILD_VECTORs are substantially cheaper than others. The general case
17796 // of a BUILD_VECTOR requires inserting each element individually (or
17797 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
17798 // all constants is a single constant pool load.  A BUILD_VECTOR where each
17799 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
17800 // are undef lowers to a small number of element insertions.
17801 //
17802 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
17803 // We don't fold shuffles where one side is a non-zero constant, and we don't
17804 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
17805 // non-constant operands. This seems to work out reasonably well in practice.
17806 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
17807                                        SelectionDAG &DAG,
17808                                        const TargetLowering &TLI) {
17809   EVT VT = SVN->getValueType(0);
17810   unsigned NumElts = VT.getVectorNumElements();
17811   SDValue N0 = SVN->getOperand(0);
17812   SDValue N1 = SVN->getOperand(1);
17813
17814   if (!N0->hasOneUse())
17815     return SDValue();
17816
17817   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
17818   // discussed above.
17819   if (!N1.isUndef()) {
17820     if (!N1->hasOneUse())
17821       return SDValue();
17822
17823     bool N0AnyConst = isAnyConstantBuildVector(N0);
17824     bool N1AnyConst = isAnyConstantBuildVector(N1);
17825     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
17826       return SDValue();
17827     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
17828       return SDValue();
17829   }
17830
17831   // If both inputs are splats of the same value then we can safely merge this
17832   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
17833   bool IsSplat = false;
17834   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
17835   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
17836   if (BV0 && BV1)
17837     if (SDValue Splat0 = BV0->getSplatValue())
17838       IsSplat = (Splat0 == BV1->getSplatValue());
17839
17840   SmallVector<SDValue, 8> Ops;
17841   SmallSet<SDValue, 16> DuplicateOps;
17842   for (int M : SVN->getMask()) {
17843     SDValue Op = DAG.getUNDEF(VT.getScalarType());
17844     if (M >= 0) {
17845       int Idx = M < (int)NumElts ? M : M - NumElts;
17846       SDValue &S = (M < (int)NumElts ? N0 : N1);
17847       if (S.getOpcode() == ISD::BUILD_VECTOR) {
17848         Op = S.getOperand(Idx);
17849       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
17850         SDValue Op0 = S.getOperand(0);
17851         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
17852       } else {
17853         // Operand can't be combined - bail out.
17854         return SDValue();
17855       }
17856     }
17857
17858     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
17859     // generating a splat; semantically, this is fine, but it's likely to
17860     // generate low-quality code if the target can't reconstruct an appropriate
17861     // shuffle.
17862     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
17863       if (!IsSplat && !DuplicateOps.insert(Op).second)
17864         return SDValue();
17865
17866     Ops.push_back(Op);
17867   }
17868
17869   // BUILD_VECTOR requires all inputs to be of the same type, find the
17870   // maximum type and extend them all.
17871   EVT SVT = VT.getScalarType();
17872   if (SVT.isInteger())
17873     for (SDValue &Op : Ops)
17874       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
17875   if (SVT != VT.getScalarType())
17876     for (SDValue &Op : Ops)
17877       Op = TLI.isZExtFree(Op.getValueType(), SVT)
17878                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
17879                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
17880   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
17881 }
17882
17883 // Match shuffles that can be converted to any_vector_extend_in_reg.
17884 // This is often generated during legalization.
17885 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
17886 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
17887 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
17888                                             SelectionDAG &DAG,
17889                                             const TargetLowering &TLI,
17890                                             bool LegalOperations) {
17891   EVT VT = SVN->getValueType(0);
17892   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17893
17894   // TODO Add support for big-endian when we have a test case.
17895   if (!VT.isInteger() || IsBigEndian)
17896     return SDValue();
17897
17898   unsigned NumElts = VT.getVectorNumElements();
17899   unsigned EltSizeInBits = VT.getScalarSizeInBits();
17900   ArrayRef<int> Mask = SVN->getMask();
17901   SDValue N0 = SVN->getOperand(0);
17902
17903   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
17904   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
17905     for (unsigned i = 0; i != NumElts; ++i) {
17906       if (Mask[i] < 0)
17907         continue;
17908       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
17909         continue;
17910       return false;
17911     }
17912     return true;
17913   };
17914
17915   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
17916   // power-of-2 extensions as they are the most likely.
17917   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
17918     // Check for non power of 2 vector sizes
17919     if (NumElts % Scale != 0)
17920       continue;
17921     if (!isAnyExtend(Scale))
17922       continue;
17923
17924     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
17925     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
17926     // Never create an illegal type. Only create unsupported operations if we
17927     // are pre-legalization.
17928     if (TLI.isTypeLegal(OutVT))
17929       if (!LegalOperations ||
17930           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
17931         return DAG.getBitcast(VT,
17932                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
17933                                           SDLoc(SVN), OutVT, N0));
17934   }
17935
17936   return SDValue();
17937 }
17938
17939 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
17940 // each source element of a large type into the lowest elements of a smaller
17941 // destination type. This is often generated during legalization.
17942 // If the source node itself was a '*_extend_vector_inreg' node then we should
17943 // then be able to remove it.
17944 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
17945                                         SelectionDAG &DAG) {
17946   EVT VT = SVN->getValueType(0);
17947   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
17948
17949   // TODO Add support for big-endian when we have a test case.
17950   if (!VT.isInteger() || IsBigEndian)
17951     return SDValue();
17952
17953   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
17954
17955   unsigned Opcode = N0.getOpcode();
17956   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
17957       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
17958       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
17959     return SDValue();
17960
17961   SDValue N00 = N0.getOperand(0);
17962   ArrayRef<int> Mask = SVN->getMask();
17963   unsigned NumElts = VT.getVectorNumElements();
17964   unsigned EltSizeInBits = VT.getScalarSizeInBits();
17965   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
17966   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
17967
17968   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
17969     return SDValue();
17970   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
17971
17972   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
17973   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
17974   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
17975   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
17976     for (unsigned i = 0; i != NumElts; ++i) {
17977       if (Mask[i] < 0)
17978         continue;
17979       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
17980         continue;
17981       return false;
17982     }
17983     return true;
17984   };
17985
17986   // At the moment we just handle the case where we've truncated back to the
17987   // same size as before the extension.
17988   // TODO: handle more extension/truncation cases as cases arise.
17989   if (EltSizeInBits != ExtSrcSizeInBits)
17990     return SDValue();
17991
17992   // We can remove *extend_vector_inreg only if the truncation happens at
17993   // the same scale as the extension.
17994   if (isTruncate(ExtScale))
17995     return DAG.getBitcast(VT, N00);
17996
17997   return SDValue();
17998 }
17999
18000 // Combine shuffles of splat-shuffles of the form:
18001 // shuffle (shuffle V, undef, splat-mask), undef, M
18002 // If splat-mask contains undef elements, we need to be careful about
18003 // introducing undef's in the folded mask which are not the result of composing
18004 // the masks of the shuffles.
18005 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
18006                                         SelectionDAG &DAG) {
18007   if (!Shuf->getOperand(1).isUndef())
18008     return SDValue();
18009   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18010   if (!Splat || !Splat->isSplat())
18011     return SDValue();
18012
18013   ArrayRef<int> ShufMask = Shuf->getMask();
18014   ArrayRef<int> SplatMask = Splat->getMask();
18015   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
18016
18017   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18018   // every undef mask element in the splat-shuffle has a corresponding undef
18019   // element in the user-shuffle's mask or if the composition of mask elements
18020   // would result in undef.
18021   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18022   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18023   //   In this case it is not legal to simplify to the splat-shuffle because we
18024   //   may be exposing the users of the shuffle an undef element at index 1
18025   //   which was not there before the combine.
18026   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18027   //   In this case the composition of masks yields SplatMask, so it's ok to
18028   //   simplify to the splat-shuffle.
18029   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18030   //   In this case the composed mask includes all undef elements of SplatMask
18031   //   and in addition sets element zero to undef. It is safe to simplify to
18032   //   the splat-shuffle.
18033   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
18034                                        ArrayRef<int> SplatMask) {
18035     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
18036       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
18037           SplatMask[UserMask[i]] != -1)
18038         return false;
18039     return true;
18040   };
18041   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
18042     return Shuf->getOperand(0);
18043
18044   // Create a new shuffle with a mask that is composed of the two shuffles'
18045   // masks.
18046   SmallVector<int, 32> NewMask;
18047   for (int Idx : ShufMask)
18048     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
18049
18050   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
18051                               Splat->getOperand(0), Splat->getOperand(1),
18052                               NewMask);
18053 }
18054
18055 /// If the shuffle mask is taking exactly one element from the first vector
18056 /// operand and passing through all other elements from the second vector
18057 /// operand, return the index of the mask element that is choosing an element
18058 /// from the first operand. Otherwise, return -1.
18059 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
18060   int MaskSize = Mask.size();
18061   int EltFromOp0 = -1;
18062   // TODO: This does not match if there are undef elements in the shuffle mask.
18063   // Should we ignore undefs in the shuffle mask instead? The trade-off is
18064   // removing an instruction (a shuffle), but losing the knowledge that some
18065   // vector lanes are not needed.
18066   for (int i = 0; i != MaskSize; ++i) {
18067     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
18068       // We're looking for a shuffle of exactly one element from operand 0.
18069       if (EltFromOp0 != -1)
18070         return -1;
18071       EltFromOp0 = i;
18072     } else if (Mask[i] != i + MaskSize) {
18073       // Nothing from operand 1 can change lanes.
18074       return -1;
18075     }
18076   }
18077   return EltFromOp0;
18078 }
18079
18080 /// If a shuffle inserts exactly one element from a source vector operand into
18081 /// another vector operand and we can access the specified element as a scalar,
18082 /// then we can eliminate the shuffle.
18083 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
18084                                       SelectionDAG &DAG) {
18085   // First, check if we are taking one element of a vector and shuffling that
18086   // element into another vector.
18087   ArrayRef<int> Mask = Shuf->getMask();
18088   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
18089   SDValue Op0 = Shuf->getOperand(0);
18090   SDValue Op1 = Shuf->getOperand(1);
18091   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
18092   if (ShufOp0Index == -1) {
18093     // Commute mask and check again.
18094     ShuffleVectorSDNode::commuteMask(CommutedMask);
18095     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
18096     if (ShufOp0Index == -1)
18097       return SDValue();
18098     // Commute operands to match the commuted shuffle mask.
18099     std::swap(Op0, Op1);
18100     Mask = CommutedMask;
18101   }
18102
18103   // The shuffle inserts exactly one element from operand 0 into operand 1.
18104   // Now see if we can access that element as a scalar via a real insert element
18105   // instruction.
18106   // TODO: We can try harder to locate the element as a scalar. Examples: it
18107   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18108   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
18109          "Shuffle mask value must be from operand 0");
18110   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
18111     return SDValue();
18112
18113   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
18114   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
18115     return SDValue();
18116
18117   // There's an existing insertelement with constant insertion index, so we
18118   // don't need to check the legality/profitability of a replacement operation
18119   // that differs at most in the constant value. The target should be able to
18120   // lower any of those in a similar way. If not, legalization will expand this
18121   // to a scalar-to-vector plus shuffle.
18122   //
18123   // Note that the shuffle may move the scalar from the position that the insert
18124   // element used. Therefore, our new insert element occurs at the shuffle's
18125   // mask index value, not the insert's index value.
18126   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18127   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
18128                                         Op0.getOperand(2).getValueType());
18129   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
18130                      Op1, Op0.getOperand(1), NewInsIndex);
18131 }
18132
18133 /// If we have a unary shuffle of a shuffle, see if it can be folded away
18134 /// completely. This has the potential to lose undef knowledge because the first
18135 /// shuffle may not have an undef mask element where the second one does. So
18136 /// only call this after doing simplifications based on demanded elements.
18137 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
18138   // shuf (shuf0 X, Y, Mask0), undef, Mask
18139   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18140   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
18141     return SDValue();
18142
18143   ArrayRef<int> Mask = Shuf->getMask();
18144   ArrayRef<int> Mask0 = Shuf0->getMask();
18145   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
18146     // Ignore undef elements.
18147     if (Mask[i] == -1)
18148       continue;
18149     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
18150
18151     // Is the element of the shuffle operand chosen by this shuffle the same as
18152     // the element chosen by the shuffle operand itself?
18153     if (Mask0[Mask[i]] != Mask0[i])
18154       return SDValue();
18155   }
18156   // Every element of this shuffle is identical to the result of the previous
18157   // shuffle, so we can replace this value.
18158   return Shuf->getOperand(0);
18159 }
18160
18161 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
18162   EVT VT = N->getValueType(0);
18163   unsigned NumElts = VT.getVectorNumElements();
18164
18165   SDValue N0 = N->getOperand(0);
18166   SDValue N1 = N->getOperand(1);
18167
18168   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
18169
18170   // Canonicalize shuffle undef, undef -> undef
18171   if (N0.isUndef() && N1.isUndef())
18172     return DAG.getUNDEF(VT);
18173
18174   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18175
18176   // Canonicalize shuffle v, v -> v, undef
18177   if (N0 == N1) {
18178     SmallVector<int, 8> NewMask;
18179     for (unsigned i = 0; i != NumElts; ++i) {
18180       int Idx = SVN->getMaskElt(i);
18181       if (Idx >= (int)NumElts) Idx -= NumElts;
18182       NewMask.push_back(Idx);
18183     }
18184     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
18185   }
18186
18187   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
18188   if (N0.isUndef())
18189     return DAG.getCommutedVectorShuffle(*SVN);
18190
18191   // Remove references to rhs if it is undef
18192   if (N1.isUndef()) {
18193     bool Changed = false;
18194     SmallVector<int, 8> NewMask;
18195     for (unsigned i = 0; i != NumElts; ++i) {
18196       int Idx = SVN->getMaskElt(i);
18197       if (Idx >= (int)NumElts) {
18198         Idx = -1;
18199         Changed = true;
18200       }
18201       NewMask.push_back(Idx);
18202     }
18203     if (Changed)
18204       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
18205   }
18206
18207   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
18208     return InsElt;
18209
18210   // A shuffle of a single vector that is a splatted value can always be folded.
18211   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
18212     return V;
18213
18214   // If it is a splat, check if the argument vector is another splat or a
18215   // build_vector.
18216   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
18217     int SplatIndex = SVN->getSplatIndex();
18218     if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
18219         ISD::isBinaryOp(N0.getNode())) {
18220       // splat (vector_bo L, R), Index -->
18221       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18222       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
18223       SDLoc DL(N);
18224       EVT EltVT = VT.getScalarType();
18225       SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
18226       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
18227       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
18228       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
18229                                   N0.getNode()->getFlags());
18230       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
18231       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
18232       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
18233     }
18234
18235     // If this is a bit convert that changes the element type of the vector but
18236     // not the number of vector elements, look through it.  Be careful not to
18237     // look though conversions that change things like v4f32 to v2f64.
18238     SDNode *V = N0.getNode();
18239     if (V->getOpcode() == ISD::BITCAST) {
18240       SDValue ConvInput = V->getOperand(0);
18241       if (ConvInput.getValueType().isVector() &&
18242           ConvInput.getValueType().getVectorNumElements() == NumElts)
18243         V = ConvInput.getNode();
18244     }
18245
18246     if (V->getOpcode() == ISD::BUILD_VECTOR) {
18247       assert(V->getNumOperands() == NumElts &&
18248              "BUILD_VECTOR has wrong number of operands");
18249       SDValue Base;
18250       bool AllSame = true;
18251       for (unsigned i = 0; i != NumElts; ++i) {
18252         if (!V->getOperand(i).isUndef()) {
18253           Base = V->getOperand(i);
18254           break;
18255         }
18256       }
18257       // Splat of <u, u, u, u>, return <u, u, u, u>
18258       if (!Base.getNode())
18259         return N0;
18260       for (unsigned i = 0; i != NumElts; ++i) {
18261         if (V->getOperand(i) != Base) {
18262           AllSame = false;
18263           break;
18264         }
18265       }
18266       // Splat of <x, x, x, x>, return <x, x, x, x>
18267       if (AllSame)
18268         return N0;
18269
18270       // Canonicalize any other splat as a build_vector.
18271       SDValue Splatted = V->getOperand(SplatIndex);
18272       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
18273       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
18274
18275       // We may have jumped through bitcasts, so the type of the
18276       // BUILD_VECTOR may not match the type of the shuffle.
18277       if (V->getValueType(0) != VT)
18278         NewBV = DAG.getBitcast(VT, NewBV);
18279       return NewBV;
18280     }
18281   }
18282
18283   // Simplify source operands based on shuffle mask.
18284   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18285     return SDValue(N, 0);
18286
18287   // This is intentionally placed after demanded elements simplification because
18288   // it could eliminate knowledge of undef elements created by this shuffle.
18289   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
18290     return ShufOp;
18291
18292   // Match shuffles that can be converted to any_vector_extend_in_reg.
18293   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
18294     return V;
18295
18296   // Combine "truncate_vector_in_reg" style shuffles.
18297   if (SDValue V = combineTruncationShuffle(SVN, DAG))
18298     return V;
18299
18300   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
18301       Level < AfterLegalizeVectorOps &&
18302       (N1.isUndef() ||
18303       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
18304        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
18305     if (SDValue V = partitionShuffleOfConcats(N, DAG))
18306       return V;
18307   }
18308
18309   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18310   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18311   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
18312     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
18313       return Res;
18314
18315   // If this shuffle only has a single input that is a bitcasted shuffle,
18316   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
18317   // back to their original types.
18318   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
18319       N1.isUndef() && Level < AfterLegalizeVectorOps &&
18320       TLI.isTypeLegal(VT)) {
18321     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
18322       if (Scale == 1)
18323         return SmallVector<int, 8>(Mask.begin(), Mask.end());
18324
18325       SmallVector<int, 8> NewMask;
18326       for (int M : Mask)
18327         for (int s = 0; s != Scale; ++s)
18328           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
18329       return NewMask;
18330     };
18331
18332     SDValue BC0 = peekThroughOneUseBitcasts(N0);
18333     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
18334       EVT SVT = VT.getScalarType();
18335       EVT InnerVT = BC0->getValueType(0);
18336       EVT InnerSVT = InnerVT.getScalarType();
18337
18338       // Determine which shuffle works with the smaller scalar type.
18339       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
18340       EVT ScaleSVT = ScaleVT.getScalarType();
18341
18342       if (TLI.isTypeLegal(ScaleVT) &&
18343           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
18344           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
18345         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
18346         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
18347
18348         // Scale the shuffle masks to the smaller scalar type.
18349         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
18350         SmallVector<int, 8> InnerMask =
18351             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
18352         SmallVector<int, 8> OuterMask =
18353             ScaleShuffleMask(SVN->getMask(), OuterScale);
18354
18355         // Merge the shuffle masks.
18356         SmallVector<int, 8> NewMask;
18357         for (int M : OuterMask)
18358           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
18359
18360         // Test for shuffle mask legality over both commutations.
18361         SDValue SV0 = BC0->getOperand(0);
18362         SDValue SV1 = BC0->getOperand(1);
18363         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
18364         if (!LegalMask) {
18365           std::swap(SV0, SV1);
18366           ShuffleVectorSDNode::commuteMask(NewMask);
18367           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
18368         }
18369
18370         if (LegalMask) {
18371           SV0 = DAG.getBitcast(ScaleVT, SV0);
18372           SV1 = DAG.getBitcast(ScaleVT, SV1);
18373           return DAG.getBitcast(
18374               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
18375         }
18376       }
18377     }
18378   }
18379
18380   // Canonicalize shuffles according to rules:
18381   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
18382   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
18383   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
18384   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
18385       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
18386       TLI.isTypeLegal(VT)) {
18387     // The incoming shuffle must be of the same type as the result of the
18388     // current shuffle.
18389     assert(N1->getOperand(0).getValueType() == VT &&
18390            "Shuffle types don't match");
18391
18392     SDValue SV0 = N1->getOperand(0);
18393     SDValue SV1 = N1->getOperand(1);
18394     bool HasSameOp0 = N0 == SV0;
18395     bool IsSV1Undef = SV1.isUndef();
18396     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
18397       // Commute the operands of this shuffle so that next rule
18398       // will trigger.
18399       return DAG.getCommutedVectorShuffle(*SVN);
18400   }
18401
18402   // Try to fold according to rules:
18403   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18404   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18405   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18406   // Don't try to fold shuffles with illegal type.
18407   // Only fold if this shuffle is the only user of the other shuffle.
18408   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
18409       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
18410     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
18411
18412     // Don't try to fold splats; they're likely to simplify somehow, or they
18413     // might be free.
18414     if (OtherSV->isSplat())
18415       return SDValue();
18416
18417     // The incoming shuffle must be of the same type as the result of the
18418     // current shuffle.
18419     assert(OtherSV->getOperand(0).getValueType() == VT &&
18420            "Shuffle types don't match");
18421
18422     SDValue SV0, SV1;
18423     SmallVector<int, 4> Mask;
18424     // Compute the combined shuffle mask for a shuffle with SV0 as the first
18425     // operand, and SV1 as the second operand.
18426     for (unsigned i = 0; i != NumElts; ++i) {
18427       int Idx = SVN->getMaskElt(i);
18428       if (Idx < 0) {
18429         // Propagate Undef.
18430         Mask.push_back(Idx);
18431         continue;
18432       }
18433
18434       SDValue CurrentVec;
18435       if (Idx < (int)NumElts) {
18436         // This shuffle index refers to the inner shuffle N0. Lookup the inner
18437         // shuffle mask to identify which vector is actually referenced.
18438         Idx = OtherSV->getMaskElt(Idx);
18439         if (Idx < 0) {
18440           // Propagate Undef.
18441           Mask.push_back(Idx);
18442           continue;
18443         }
18444
18445         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
18446                                            : OtherSV->getOperand(1);
18447       } else {
18448         // This shuffle index references an element within N1.
18449         CurrentVec = N1;
18450       }
18451
18452       // Simple case where 'CurrentVec' is UNDEF.
18453       if (CurrentVec.isUndef()) {
18454         Mask.push_back(-1);
18455         continue;
18456       }
18457
18458       // Canonicalize the shuffle index. We don't know yet if CurrentVec
18459       // will be the first or second operand of the combined shuffle.
18460       Idx = Idx % NumElts;
18461       if (!SV0.getNode() || SV0 == CurrentVec) {
18462         // Ok. CurrentVec is the left hand side.
18463         // Update the mask accordingly.
18464         SV0 = CurrentVec;
18465         Mask.push_back(Idx);
18466         continue;
18467       }
18468
18469       // Bail out if we cannot convert the shuffle pair into a single shuffle.
18470       if (SV1.getNode() && SV1 != CurrentVec)
18471         return SDValue();
18472
18473       // Ok. CurrentVec is the right hand side.
18474       // Update the mask accordingly.
18475       SV1 = CurrentVec;
18476       Mask.push_back(Idx + NumElts);
18477     }
18478
18479     // Check if all indices in Mask are Undef. In case, propagate Undef.
18480     bool isUndefMask = true;
18481     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
18482       isUndefMask &= Mask[i] < 0;
18483
18484     if (isUndefMask)
18485       return DAG.getUNDEF(VT);
18486
18487     if (!SV0.getNode())
18488       SV0 = DAG.getUNDEF(VT);
18489     if (!SV1.getNode())
18490       SV1 = DAG.getUNDEF(VT);
18491
18492     // Avoid introducing shuffles with illegal mask.
18493     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
18494       ShuffleVectorSDNode::commuteMask(Mask);
18495
18496       if (!TLI.isShuffleMaskLegal(Mask, VT))
18497         return SDValue();
18498
18499       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
18500       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
18501       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
18502       std::swap(SV0, SV1);
18503     }
18504
18505     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
18506     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
18507     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
18508     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
18509   }
18510
18511   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
18512     return V;
18513
18514   return SDValue();
18515 }
18516
18517 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
18518   SDValue InVal = N->getOperand(0);
18519   EVT VT = N->getValueType(0);
18520
18521   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
18522   // with a VECTOR_SHUFFLE and possible truncate.
18523   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
18524     SDValue InVec = InVal->getOperand(0);
18525     SDValue EltNo = InVal->getOperand(1);
18526     auto InVecT = InVec.getValueType();
18527     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
18528       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
18529       int Elt = C0->getZExtValue();
18530       NewMask[0] = Elt;
18531       SDValue Val;
18532       // If we have an implict truncate do truncate here as long as it's legal.
18533       // if it's not legal, this should
18534       if (VT.getScalarType() != InVal.getValueType() &&
18535           InVal.getValueType().isScalarInteger() &&
18536           isTypeLegal(VT.getScalarType())) {
18537         Val =
18538             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
18539         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
18540       }
18541       if (VT.getScalarType() == InVecT.getScalarType() &&
18542           VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
18543           TLI.isShuffleMaskLegal(NewMask, VT)) {
18544         Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
18545                                    DAG.getUNDEF(InVecT), NewMask);
18546         // If the initial vector is the correct size this shuffle is a
18547         // valid result.
18548         if (VT == InVecT)
18549           return Val;
18550         // If not we must truncate the vector.
18551         if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
18552           MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
18553           SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
18554           EVT SubVT =
18555               EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
18556                                VT.getVectorNumElements());
18557           Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
18558                             ZeroIdx);
18559           return Val;
18560         }
18561       }
18562     }
18563   }
18564
18565   return SDValue();
18566 }
18567
18568 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
18569   EVT VT = N->getValueType(0);
18570   SDValue N0 = N->getOperand(0);
18571   SDValue N1 = N->getOperand(1);
18572   SDValue N2 = N->getOperand(2);
18573
18574   // If inserting an UNDEF, just return the original vector.
18575   if (N1.isUndef())
18576     return N0;
18577
18578   // If this is an insert of an extracted vector into an undef vector, we can
18579   // just use the input to the extract.
18580   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18581       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
18582     return N1.getOperand(0);
18583
18584   // If we are inserting a bitcast value into an undef, with the same
18585   // number of elements, just use the bitcast input of the extract.
18586   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
18587   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
18588   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
18589       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18590       N1.getOperand(0).getOperand(1) == N2 &&
18591       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
18592           VT.getVectorNumElements() &&
18593       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
18594           VT.getSizeInBits()) {
18595     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
18596   }
18597
18598   // If both N1 and N2 are bitcast values on which insert_subvector
18599   // would makes sense, pull the bitcast through.
18600   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
18601   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
18602   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
18603     SDValue CN0 = N0.getOperand(0);
18604     SDValue CN1 = N1.getOperand(0);
18605     EVT CN0VT = CN0.getValueType();
18606     EVT CN1VT = CN1.getValueType();
18607     if (CN0VT.isVector() && CN1VT.isVector() &&
18608         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
18609         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
18610       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
18611                                       CN0.getValueType(), CN0, CN1, N2);
18612       return DAG.getBitcast(VT, NewINSERT);
18613     }
18614   }
18615
18616   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
18617   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
18618   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
18619   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
18620       N0.getOperand(1).getValueType() == N1.getValueType() &&
18621       N0.getOperand(2) == N2)
18622     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
18623                        N1, N2);
18624
18625   // Eliminate an intermediate insert into an undef vector:
18626   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
18627   // insert_subvector undef, X, N2
18628   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
18629       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
18630     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
18631                        N1.getOperand(1), N2);
18632
18633   if (!isa<ConstantSDNode>(N2))
18634     return SDValue();
18635
18636   unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
18637
18638   // Canonicalize insert_subvector dag nodes.
18639   // Example:
18640   // (insert_subvector (insert_subvector A, Idx0), Idx1)
18641   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
18642   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
18643       N1.getValueType() == N0.getOperand(1).getValueType() &&
18644       isa<ConstantSDNode>(N0.getOperand(2))) {
18645     unsigned OtherIdx = N0.getConstantOperandVal(2);
18646     if (InsIdx < OtherIdx) {
18647       // Swap nodes.
18648       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
18649                                   N0.getOperand(0), N1, N2);
18650       AddToWorklist(NewOp.getNode());
18651       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
18652                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
18653     }
18654   }
18655
18656   // If the input vector is a concatenation, and the insert replaces
18657   // one of the pieces, we can optimize into a single concat_vectors.
18658   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
18659       N0.getOperand(0).getValueType() == N1.getValueType()) {
18660     unsigned Factor = N1.getValueType().getVectorNumElements();
18661
18662     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
18663     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
18664
18665     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18666   }
18667
18668   // Simplify source operands based on insertion.
18669   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18670     return SDValue(N, 0);
18671
18672   return SDValue();
18673 }
18674
18675 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
18676   SDValue N0 = N->getOperand(0);
18677
18678   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
18679   if (N0->getOpcode() == ISD::FP16_TO_FP)
18680     return N0->getOperand(0);
18681
18682   return SDValue();
18683 }
18684
18685 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
18686   SDValue N0 = N->getOperand(0);
18687
18688   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
18689   if (N0->getOpcode() == ISD::AND) {
18690     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
18691     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
18692       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
18693                          N0.getOperand(0));
18694     }
18695   }
18696
18697   return SDValue();
18698 }
18699
18700 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
18701   SDValue N0 = N->getOperand(0);
18702   EVT VT = N0.getValueType();
18703   unsigned Opcode = N->getOpcode();
18704
18705   // VECREDUCE over 1-element vector is just an extract.
18706   if (VT.getVectorNumElements() == 1) {
18707     SDLoc dl(N);
18708     SDValue Res = DAG.getNode(
18709         ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
18710         DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
18711     if (Res.getValueType() != N->getValueType(0))
18712       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
18713     return Res;
18714   }
18715
18716   // On an boolean vector an and/or reduction is the same as a umin/umax
18717   // reduction. Convert them if the latter is legal while the former isn't.
18718   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
18719     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
18720         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
18721     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
18722         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
18723         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
18724       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
18725   }
18726
18727   return SDValue();
18728 }
18729
18730 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
18731 /// with the destination vector and a zero vector.
18732 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
18733 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
18734 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
18735   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
18736
18737   EVT VT = N->getValueType(0);
18738   SDValue LHS = N->getOperand(0);
18739   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
18740   SDLoc DL(N);
18741
18742   // Make sure we're not running after operation legalization where it
18743   // may have custom lowered the vector shuffles.
18744   if (LegalOperations)
18745     return SDValue();
18746
18747   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
18748     return SDValue();
18749
18750   EVT RVT = RHS.getValueType();
18751   unsigned NumElts = RHS.getNumOperands();
18752
18753   // Attempt to create a valid clear mask, splitting the mask into
18754   // sub elements and checking to see if each is
18755   // all zeros or all ones - suitable for shuffle masking.
18756   auto BuildClearMask = [&](int Split) {
18757     int NumSubElts = NumElts * Split;
18758     int NumSubBits = RVT.getScalarSizeInBits() / Split;
18759
18760     SmallVector<int, 8> Indices;
18761     for (int i = 0; i != NumSubElts; ++i) {
18762       int EltIdx = i / Split;
18763       int SubIdx = i % Split;
18764       SDValue Elt = RHS.getOperand(EltIdx);
18765       if (Elt.isUndef()) {
18766         Indices.push_back(-1);
18767         continue;
18768       }
18769
18770       APInt Bits;
18771       if (isa<ConstantSDNode>(Elt))
18772         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
18773       else if (isa<ConstantFPSDNode>(Elt))
18774         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
18775       else
18776         return SDValue();
18777
18778       // Extract the sub element from the constant bit mask.
18779       if (DAG.getDataLayout().isBigEndian()) {
18780         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
18781       } else {
18782         Bits.lshrInPlace(SubIdx * NumSubBits);
18783       }
18784
18785       if (Split > 1)
18786         Bits = Bits.trunc(NumSubBits);
18787
18788       if (Bits.isAllOnesValue())
18789         Indices.push_back(i);
18790       else if (Bits == 0)
18791         Indices.push_back(i + NumSubElts);
18792       else
18793         return SDValue();
18794     }
18795
18796     // Let's see if the target supports this vector_shuffle.
18797     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
18798     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
18799     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
18800       return SDValue();
18801
18802     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
18803     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
18804                                                    DAG.getBitcast(ClearVT, LHS),
18805                                                    Zero, Indices));
18806   };
18807
18808   // Determine maximum split level (byte level masking).
18809   int MaxSplit = 1;
18810   if (RVT.getScalarSizeInBits() % 8 == 0)
18811     MaxSplit = RVT.getScalarSizeInBits() / 8;
18812
18813   for (int Split = 1; Split <= MaxSplit; ++Split)
18814     if (RVT.getScalarSizeInBits() % Split == 0)
18815       if (SDValue S = BuildClearMask(Split))
18816         return S;
18817
18818   return SDValue();
18819 }
18820
18821 /// If a vector binop is performed on splat values, it may be profitable to
18822 /// extract, scalarize, and insert/splat.
18823 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
18824   SDValue N0 = N->getOperand(0);
18825   SDValue N1 = N->getOperand(1);
18826   unsigned Opcode = N->getOpcode();
18827   EVT VT = N->getValueType(0);
18828   EVT EltVT = VT.getVectorElementType();
18829   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18830
18831   // TODO: Remove/replace the extract cost check? If the elements are available
18832   //       as scalars, then there may be no extract cost. Should we ask if
18833   //       inserting a scalar back into a vector is cheap instead?
18834   int Index0, Index1;
18835   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
18836   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
18837   if (!Src0 || !Src1 || Index0 != Index1 ||
18838       Src0.getValueType().getVectorElementType() != EltVT ||
18839       Src1.getValueType().getVectorElementType() != EltVT ||
18840       !TLI.isExtractVecEltCheap(VT, Index0) ||
18841       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
18842     return SDValue();
18843
18844   SDLoc DL(N);
18845   SDValue IndexC =
18846       DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
18847   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
18848   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
18849   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
18850
18851   // If all lanes but 1 are undefined, no need to splat the scalar result.
18852   // TODO: Keep track of undefs and use that info in the general case.
18853   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
18854       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
18855       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
18856     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
18857     // build_vec ..undef, (bo X, Y), undef...
18858     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
18859     Ops[Index0] = ScalarBO;
18860     return DAG.getBuildVector(VT, DL, Ops);
18861   }
18862
18863   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
18864   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
18865   return DAG.getBuildVector(VT, DL, Ops);
18866 }
18867
18868 /// Visit a binary vector operation, like ADD.
18869 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
18870   assert(N->getValueType(0).isVector() &&
18871          "SimplifyVBinOp only works on vectors!");
18872
18873   SDValue LHS = N->getOperand(0);
18874   SDValue RHS = N->getOperand(1);
18875   SDValue Ops[] = {LHS, RHS};
18876   EVT VT = N->getValueType(0);
18877   unsigned Opcode = N->getOpcode();
18878
18879   // See if we can constant fold the vector operation.
18880   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
18881           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
18882     return Fold;
18883
18884   // Move unary shuffles with identical masks after a vector binop:
18885   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
18886   //   --> shuffle (VBinOp A, B), Undef, Mask
18887   // This does not require type legality checks because we are creating the
18888   // same types of operations that are in the original sequence. We do have to
18889   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
18890   // though. This code is adapted from the identical transform in instcombine.
18891   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
18892       Opcode != ISD::UREM && Opcode != ISD::SREM &&
18893       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
18894     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
18895     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
18896     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
18897         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
18898         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
18899       SDLoc DL(N);
18900       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
18901                                      RHS.getOperand(0), N->getFlags());
18902       SDValue UndefV = LHS.getOperand(1);
18903       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
18904     }
18905   }
18906
18907   // The following pattern is likely to emerge with vector reduction ops. Moving
18908   // the binary operation ahead of insertion may allow using a narrower vector
18909   // instruction that has better performance than the wide version of the op:
18910   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
18911   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
18912       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
18913       LHS.getOperand(2) == RHS.getOperand(2) &&
18914       (LHS.hasOneUse() || RHS.hasOneUse())) {
18915     SDValue X = LHS.getOperand(1);
18916     SDValue Y = RHS.getOperand(1);
18917     SDValue Z = LHS.getOperand(2);
18918     EVT NarrowVT = X.getValueType();
18919     if (NarrowVT == Y.getValueType() &&
18920         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
18921       // (binop undef, undef) may not return undef, so compute that result.
18922       SDLoc DL(N);
18923       SDValue VecC =
18924           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
18925       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
18926       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
18927     }
18928   }
18929
18930   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
18931     return V;
18932
18933   return SDValue();
18934 }
18935
18936 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
18937                                     SDValue N2) {
18938   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
18939
18940   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
18941                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
18942
18943   // If we got a simplified select_cc node back from SimplifySelectCC, then
18944   // break it down into a new SETCC node, and a new SELECT node, and then return
18945   // the SELECT node, since we were called with a SELECT node.
18946   if (SCC.getNode()) {
18947     // Check to see if we got a select_cc back (to turn into setcc/select).
18948     // Otherwise, just return whatever node we got back, like fabs.
18949     if (SCC.getOpcode() == ISD::SELECT_CC) {
18950       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
18951                                   N0.getValueType(),
18952                                   SCC.getOperand(0), SCC.getOperand(1),
18953                                   SCC.getOperand(4));
18954       AddToWorklist(SETCC.getNode());
18955       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
18956                            SCC.getOperand(2), SCC.getOperand(3));
18957     }
18958
18959     return SCC;
18960   }
18961   return SDValue();
18962 }
18963
18964 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
18965 /// being selected between, see if we can simplify the select.  Callers of this
18966 /// should assume that TheSelect is deleted if this returns true.  As such, they
18967 /// should return the appropriate thing (e.g. the node) back to the top-level of
18968 /// the DAG combiner loop to avoid it being looked at.
18969 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
18970                                     SDValue RHS) {
18971   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18972   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
18973   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
18974     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
18975       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
18976       SDValue Sqrt = RHS;
18977       ISD::CondCode CC;
18978       SDValue CmpLHS;
18979       const ConstantFPSDNode *Zero = nullptr;
18980
18981       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
18982         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
18983         CmpLHS = TheSelect->getOperand(0);
18984         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
18985       } else {
18986         // SELECT or VSELECT
18987         SDValue Cmp = TheSelect->getOperand(0);
18988         if (Cmp.getOpcode() == ISD::SETCC) {
18989           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
18990           CmpLHS = Cmp.getOperand(0);
18991           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
18992         }
18993       }
18994       if (Zero && Zero->isZero() &&
18995           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
18996           CC == ISD::SETULT || CC == ISD::SETLT)) {
18997         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
18998         CombineTo(TheSelect, Sqrt);
18999         return true;
19000       }
19001     }
19002   }
19003   // Cannot simplify select with vector condition
19004   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
19005
19006   // If this is a select from two identical things, try to pull the operation
19007   // through the select.
19008   if (LHS.getOpcode() != RHS.getOpcode() ||
19009       !LHS.hasOneUse() || !RHS.hasOneUse())
19010     return false;
19011
19012   // If this is a load and the token chain is identical, replace the select
19013   // of two loads with a load through a select of the address to load from.
19014   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19015   // constants have been dropped into the constant pool.
19016   if (LHS.getOpcode() == ISD::LOAD) {
19017     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
19018     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
19019
19020     // Token chains must be identical.
19021     if (LHS.getOperand(0) != RHS.getOperand(0) ||
19022         // Do not let this transformation reduce the number of volatile loads.
19023         LLD->isVolatile() || RLD->isVolatile() ||
19024         // FIXME: If either is a pre/post inc/dec load,
19025         // we'd need to split out the address adjustment.
19026         LLD->isIndexed() || RLD->isIndexed() ||
19027         // If this is an EXTLOAD, the VT's must match.
19028         LLD->getMemoryVT() != RLD->getMemoryVT() ||
19029         // If this is an EXTLOAD, the kind of extension must match.
19030         (LLD->getExtensionType() != RLD->getExtensionType() &&
19031          // The only exception is if one of the extensions is anyext.
19032          LLD->getExtensionType() != ISD::EXTLOAD &&
19033          RLD->getExtensionType() != ISD::EXTLOAD) ||
19034         // FIXME: this discards src value information.  This is
19035         // over-conservative. It would be beneficial to be able to remember
19036         // both potential memory locations.  Since we are discarding
19037         // src value info, don't do the transformation if the memory
19038         // locations are not in the default address space.
19039         LLD->getPointerInfo().getAddrSpace() != 0 ||
19040         RLD->getPointerInfo().getAddrSpace() != 0 ||
19041         // We can't produce a CMOV of a TargetFrameIndex since we won't
19042         // generate the address generation required.
19043         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19044         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19045         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
19046                                       LLD->getBasePtr().getValueType()))
19047       return false;
19048
19049     // The loads must not depend on one another.
19050     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
19051       return false;
19052
19053     // Check that the select condition doesn't reach either load.  If so,
19054     // folding this will induce a cycle into the DAG.  If not, this is safe to
19055     // xform, so create a select of the addresses.
19056
19057     SmallPtrSet<const SDNode *, 32> Visited;
19058     SmallVector<const SDNode *, 16> Worklist;
19059
19060     // Always fail if LLD and RLD are not independent. TheSelect is a
19061     // predecessor to all Nodes in question so we need not search past it.
19062
19063     Visited.insert(TheSelect);
19064     Worklist.push_back(LLD);
19065     Worklist.push_back(RLD);
19066
19067     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
19068         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
19069       return false;
19070
19071     SDValue Addr;
19072     if (TheSelect->getOpcode() == ISD::SELECT) {
19073       // We cannot do this optimization if any pair of {RLD, LLD} is a
19074       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19075       // Loads, we only need to check if CondNode is a successor to one of the
19076       // loads. We can further avoid this if there's no use of their chain
19077       // value.
19078       SDNode *CondNode = TheSelect->getOperand(0).getNode();
19079       Worklist.push_back(CondNode);
19080
19081       if ((LLD->hasAnyUseOfValue(1) &&
19082            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19083           (RLD->hasAnyUseOfValue(1) &&
19084            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19085         return false;
19086
19087       Addr = DAG.getSelect(SDLoc(TheSelect),
19088                            LLD->getBasePtr().getValueType(),
19089                            TheSelect->getOperand(0), LLD->getBasePtr(),
19090                            RLD->getBasePtr());
19091     } else {  // Otherwise SELECT_CC
19092       // We cannot do this optimization if any pair of {RLD, LLD} is a
19093       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19094       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19095       // one of the loads. We can further avoid this if there's no use of their
19096       // chain value.
19097
19098       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
19099       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
19100       Worklist.push_back(CondLHS);
19101       Worklist.push_back(CondRHS);
19102
19103       if ((LLD->hasAnyUseOfValue(1) &&
19104            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19105           (RLD->hasAnyUseOfValue(1) &&
19106            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19107         return false;
19108
19109       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
19110                          LLD->getBasePtr().getValueType(),
19111                          TheSelect->getOperand(0),
19112                          TheSelect->getOperand(1),
19113                          LLD->getBasePtr(), RLD->getBasePtr(),
19114                          TheSelect->getOperand(4));
19115     }
19116
19117     SDValue Load;
19118     // It is safe to replace the two loads if they have different alignments,
19119     // but the new load must be the minimum (most restrictive) alignment of the
19120     // inputs.
19121     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
19122     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
19123     if (!RLD->isInvariant())
19124       MMOFlags &= ~MachineMemOperand::MOInvariant;
19125     if (!RLD->isDereferenceable())
19126       MMOFlags &= ~MachineMemOperand::MODereferenceable;
19127     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
19128       // FIXME: Discards pointer and AA info.
19129       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
19130                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
19131                          MMOFlags);
19132     } else {
19133       // FIXME: Discards pointer and AA info.
19134       Load = DAG.getExtLoad(
19135           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
19136                                                   : LLD->getExtensionType(),
19137           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
19138           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
19139     }
19140
19141     // Users of the select now use the result of the load.
19142     CombineTo(TheSelect, Load);
19143
19144     // Users of the old loads now use the new load's chain.  We know the
19145     // old-load value is dead now.
19146     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
19147     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
19148     return true;
19149   }
19150
19151   return false;
19152 }
19153
19154 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19155 /// bitwise 'and'.
19156 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
19157                                             SDValue N1, SDValue N2, SDValue N3,
19158                                             ISD::CondCode CC) {
19159   // If this is a select where the false operand is zero and the compare is a
19160   // check of the sign bit, see if we can perform the "gzip trick":
19161   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19162   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19163   EVT XType = N0.getValueType();
19164   EVT AType = N2.getValueType();
19165   if (!isNullConstant(N3) || !XType.bitsGE(AType))
19166     return SDValue();
19167
19168   // If the comparison is testing for a positive value, we have to invert
19169   // the sign bit mask, so only do that transform if the target has a bitwise
19170   // 'and not' instruction (the invert is free).
19171   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
19172     // (X > -1) ? A : 0
19173     // (X >  0) ? X : 0 <-- This is canonical signed max.
19174     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
19175       return SDValue();
19176   } else if (CC == ISD::SETLT) {
19177     // (X <  0) ? A : 0
19178     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
19179     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
19180       return SDValue();
19181   } else {
19182     return SDValue();
19183   }
19184
19185   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
19186   // constant.
19187   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
19188   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19189   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
19190     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
19191     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
19192     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
19193     AddToWorklist(Shift.getNode());
19194
19195     if (XType.bitsGT(AType)) {
19196       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19197       AddToWorklist(Shift.getNode());
19198     }
19199
19200     if (CC == ISD::SETGT)
19201       Shift = DAG.getNOT(DL, Shift, AType);
19202
19203     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19204   }
19205
19206   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
19207   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
19208   AddToWorklist(Shift.getNode());
19209
19210   if (XType.bitsGT(AType)) {
19211     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19212     AddToWorklist(Shift.getNode());
19213   }
19214
19215   if (CC == ISD::SETGT)
19216     Shift = DAG.getNOT(DL, Shift, AType);
19217
19218   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19219 }
19220
19221 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
19222 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
19223 /// in it. This may be a win when the constant is not otherwise available
19224 /// because it replaces two constant pool loads with one.
19225 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
19226     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
19227     ISD::CondCode CC) {
19228   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
19229     return SDValue();
19230
19231   // If we are before legalize types, we want the other legalization to happen
19232   // first (for example, to avoid messing with soft float).
19233   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
19234   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
19235   EVT VT = N2.getValueType();
19236   if (!TV || !FV || !TLI.isTypeLegal(VT))
19237     return SDValue();
19238
19239   // If a constant can be materialized without loads, this does not make sense.
19240   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
19241       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
19242       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
19243     return SDValue();
19244
19245   // If both constants have multiple uses, then we won't need to do an extra
19246   // load. The values are likely around in registers for other users.
19247   if (!TV->hasOneUse() && !FV->hasOneUse())
19248     return SDValue();
19249
19250   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
19251                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
19252   Type *FPTy = Elts[0]->getType();
19253   const DataLayout &TD = DAG.getDataLayout();
19254
19255   // Create a ConstantArray of the two constants.
19256   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
19257   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
19258                                       TD.getPrefTypeAlignment(FPTy));
19259   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
19260
19261   // Get offsets to the 0 and 1 elements of the array, so we can select between
19262   // them.
19263   SDValue Zero = DAG.getIntPtrConstant(0, DL);
19264   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
19265   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
19266   SDValue Cond =
19267       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
19268   AddToWorklist(Cond.getNode());
19269   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
19270   AddToWorklist(CstOffset.getNode());
19271   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
19272   AddToWorklist(CPIdx.getNode());
19273   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
19274                      MachinePointerInfo::getConstantPool(
19275                          DAG.getMachineFunction()), Alignment);
19276 }
19277
19278 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
19279 /// where 'cond' is the comparison specified by CC.
19280 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
19281                                       SDValue N2, SDValue N3, ISD::CondCode CC,
19282                                       bool NotExtCompare) {
19283   // (x ? y : y) -> y.
19284   if (N2 == N3) return N2;
19285
19286   EVT CmpOpVT = N0.getValueType();
19287   EVT CmpResVT = getSetCCResultType(CmpOpVT);
19288   EVT VT = N2.getValueType();
19289   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
19290   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19291   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
19292
19293   // Determine if the condition we're dealing with is constant.
19294   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
19295     AddToWorklist(SCC.getNode());
19296     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
19297       // fold select_cc true, x, y -> x
19298       // fold select_cc false, x, y -> y
19299       return !(SCCC->isNullValue()) ? N2 : N3;
19300     }
19301   }
19302
19303   if (SDValue V =
19304           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
19305     return V;
19306
19307   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
19308     return V;
19309
19310   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
19311   // where y is has a single bit set.
19312   // A plaintext description would be, we can turn the SELECT_CC into an AND
19313   // when the condition can be materialized as an all-ones register.  Any
19314   // single bit-test can be materialized as an all-ones register with
19315   // shift-left and shift-right-arith.
19316   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
19317       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
19318     SDValue AndLHS = N0->getOperand(0);
19319     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
19320     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
19321       // Shift the tested bit over the sign bit.
19322       const APInt &AndMask = ConstAndRHS->getAPIntValue();
19323       SDValue ShlAmt =
19324         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
19325                         getShiftAmountTy(AndLHS.getValueType()));
19326       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
19327
19328       // Now arithmetic right shift it all the way over, so the result is either
19329       // all-ones, or zero.
19330       SDValue ShrAmt =
19331         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
19332                         getShiftAmountTy(Shl.getValueType()));
19333       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
19334
19335       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
19336     }
19337   }
19338
19339   // fold select C, 16, 0 -> shl C, 4
19340   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
19341   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
19342
19343   if ((Fold || Swap) &&
19344       TLI.getBooleanContents(CmpOpVT) ==
19345           TargetLowering::ZeroOrOneBooleanContent &&
19346       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
19347
19348     if (Swap) {
19349       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
19350       std::swap(N2C, N3C);
19351     }
19352
19353     // If the caller doesn't want us to simplify this into a zext of a compare,
19354     // don't do it.
19355     if (NotExtCompare && N2C->isOne())
19356       return SDValue();
19357
19358     SDValue Temp, SCC;
19359     // zext (setcc n0, n1)
19360     if (LegalTypes) {
19361       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
19362       if (VT.bitsLT(SCC.getValueType()))
19363         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
19364       else
19365         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
19366     } else {
19367       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
19368       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
19369     }
19370
19371     AddToWorklist(SCC.getNode());
19372     AddToWorklist(Temp.getNode());
19373
19374     if (N2C->isOne())
19375       return Temp;
19376
19377     // shl setcc result by log2 n2c
19378     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
19379                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
19380                                        SDLoc(Temp),
19381                                        getShiftAmountTy(Temp.getValueType())));
19382   }
19383
19384   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
19385   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
19386   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
19387   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
19388   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
19389   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
19390   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
19391   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
19392   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
19393     SDValue ValueOnZero = N2;
19394     SDValue Count = N3;
19395     // If the condition is NE instead of E, swap the operands.
19396     if (CC == ISD::SETNE)
19397       std::swap(ValueOnZero, Count);
19398     // Check if the value on zero is a constant equal to the bits in the type.
19399     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
19400       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
19401         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
19402         // legal, combine to just cttz.
19403         if ((Count.getOpcode() == ISD::CTTZ ||
19404              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
19405             N0 == Count.getOperand(0) &&
19406             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
19407           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
19408         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
19409         // legal, combine to just ctlz.
19410         if ((Count.getOpcode() == ISD::CTLZ ||
19411              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
19412             N0 == Count.getOperand(0) &&
19413             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
19414           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
19415       }
19416     }
19417   }
19418
19419   return SDValue();
19420 }
19421
19422 /// This is a stub for TargetLowering::SimplifySetCC.
19423 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
19424                                    ISD::CondCode Cond, const SDLoc &DL,
19425                                    bool foldBooleans) {
19426   TargetLowering::DAGCombinerInfo
19427     DagCombineInfo(DAG, Level, false, this);
19428   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
19429 }
19430
19431 /// Given an ISD::SDIV node expressing a divide by constant, return
19432 /// a DAG expression to select that will generate the same value by multiplying
19433 /// by a magic number.
19434 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19435 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
19436   // when optimising for minimum size, we don't want to expand a div to a mul
19437   // and a shift.
19438   if (DAG.getMachineFunction().getFunction().hasMinSize())
19439     return SDValue();
19440
19441   SmallVector<SDNode *, 8> Built;
19442   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
19443     for (SDNode *N : Built)
19444       AddToWorklist(N);
19445     return S;
19446   }
19447
19448   return SDValue();
19449 }
19450
19451 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
19452 /// DAG expression that will generate the same value by right shifting.
19453 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
19454   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
19455   if (!C)
19456     return SDValue();
19457
19458   // Avoid division by zero.
19459   if (C->isNullValue())
19460     return SDValue();
19461
19462   SmallVector<SDNode *, 8> Built;
19463   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
19464     for (SDNode *N : Built)
19465       AddToWorklist(N);
19466     return S;
19467   }
19468
19469   return SDValue();
19470 }
19471
19472 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
19473 /// expression that will generate the same value by multiplying by a magic
19474 /// number.
19475 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
19476 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
19477   // when optimising for minimum size, we don't want to expand a div to a mul
19478   // and a shift.
19479   if (DAG.getMachineFunction().getFunction().hasMinSize())
19480     return SDValue();
19481
19482   SmallVector<SDNode *, 8> Built;
19483   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
19484     for (SDNode *N : Built)
19485       AddToWorklist(N);
19486     return S;
19487   }
19488
19489   return SDValue();
19490 }
19491
19492 /// Determines the LogBase2 value for a non-null input value using the
19493 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
19494 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
19495   EVT VT = V.getValueType();
19496   unsigned EltBits = VT.getScalarSizeInBits();
19497   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
19498   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
19499   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
19500   return LogBase2;
19501 }
19502
19503 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19504 /// For the reciprocal, we need to find the zero of the function:
19505 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
19506 ///     =>
19507 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
19508 ///     does not require additional intermediate precision]
19509 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
19510   if (Level >= AfterLegalizeDAG)
19511     return SDValue();
19512
19513   // TODO: Handle half and/or extended types?
19514   EVT VT = Op.getValueType();
19515   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19516     return SDValue();
19517
19518   // If estimates are explicitly disabled for this function, we're done.
19519   MachineFunction &MF = DAG.getMachineFunction();
19520   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
19521   if (Enabled == TLI.ReciprocalEstimate::Disabled)
19522     return SDValue();
19523
19524   // Estimates may be explicitly enabled for this type with a custom number of
19525   // refinement steps.
19526   int Iterations = TLI.getDivRefinementSteps(VT, MF);
19527   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
19528     AddToWorklist(Est.getNode());
19529
19530     if (Iterations) {
19531       SDLoc DL(Op);
19532       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
19533
19534       // Newton iterations: Est = Est + Est (1 - Arg * Est)
19535       for (int i = 0; i < Iterations; ++i) {
19536         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
19537         AddToWorklist(NewEst.getNode());
19538
19539         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
19540         AddToWorklist(NewEst.getNode());
19541
19542         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19543         AddToWorklist(NewEst.getNode());
19544
19545         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
19546         AddToWorklist(Est.getNode());
19547       }
19548     }
19549     return Est;
19550   }
19551
19552   return SDValue();
19553 }
19554
19555 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19556 /// For the reciprocal sqrt, we need to find the zero of the function:
19557 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19558 ///     =>
19559 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
19560 /// As a result, we precompute A/2 prior to the iteration loop.
19561 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
19562                                          unsigned Iterations,
19563                                          SDNodeFlags Flags, bool Reciprocal) {
19564   EVT VT = Arg.getValueType();
19565   SDLoc DL(Arg);
19566   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
19567
19568   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
19569   // this entire sequence requires only one FP constant.
19570   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
19571   AddToWorklist(HalfArg.getNode());
19572
19573   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
19574   AddToWorklist(HalfArg.getNode());
19575
19576   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
19577   for (unsigned i = 0; i < Iterations; ++i) {
19578     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
19579     AddToWorklist(NewEst.getNode());
19580
19581     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
19582     AddToWorklist(NewEst.getNode());
19583
19584     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
19585     AddToWorklist(NewEst.getNode());
19586
19587     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
19588     AddToWorklist(Est.getNode());
19589   }
19590
19591   // If non-reciprocal square root is requested, multiply the result by Arg.
19592   if (!Reciprocal) {
19593     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
19594     AddToWorklist(Est.getNode());
19595   }
19596
19597   return Est;
19598 }
19599
19600 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
19601 /// For the reciprocal sqrt, we need to find the zero of the function:
19602 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
19603 ///     =>
19604 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
19605 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
19606                                          unsigned Iterations,
19607                                          SDNodeFlags Flags, bool Reciprocal) {
19608   EVT VT = Arg.getValueType();
19609   SDLoc DL(Arg);
19610   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
19611   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
19612
19613   // This routine must enter the loop below to work correctly
19614   // when (Reciprocal == false).
19615   assert(Iterations > 0);
19616
19617   // Newton iterations for reciprocal square root:
19618   // E = (E * -0.5) * ((A * E) * E + -3.0)
19619   for (unsigned i = 0; i < Iterations; ++i) {
19620     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
19621     AddToWorklist(AE.getNode());
19622
19623     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
19624     AddToWorklist(AEE.getNode());
19625
19626     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
19627     AddToWorklist(RHS.getNode());
19628
19629     // When calculating a square root at the last iteration build:
19630     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
19631     // (notice a common subexpression)
19632     SDValue LHS;
19633     if (Reciprocal || (i + 1) < Iterations) {
19634       // RSQRT: LHS = (E * -0.5)
19635       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
19636     } else {
19637       // SQRT: LHS = (A * E) * -0.5
19638       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
19639     }
19640     AddToWorklist(LHS.getNode());
19641
19642     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
19643     AddToWorklist(Est.getNode());
19644   }
19645
19646   return Est;
19647 }
19648
19649 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
19650 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
19651 /// Op can be zero.
19652 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
19653                                            bool Reciprocal) {
19654   if (Level >= AfterLegalizeDAG)
19655     return SDValue();
19656
19657   // TODO: Handle half and/or extended types?
19658   EVT VT = Op.getValueType();
19659   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
19660     return SDValue();
19661
19662   // If estimates are explicitly disabled for this function, we're done.
19663   MachineFunction &MF = DAG.getMachineFunction();
19664   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
19665   if (Enabled == TLI.ReciprocalEstimate::Disabled)
19666     return SDValue();
19667
19668   // Estimates may be explicitly enabled for this type with a custom number of
19669   // refinement steps.
19670   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
19671
19672   bool UseOneConstNR = false;
19673   if (SDValue Est =
19674       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
19675                           Reciprocal)) {
19676     AddToWorklist(Est.getNode());
19677
19678     if (Iterations) {
19679       Est = UseOneConstNR
19680             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
19681             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
19682
19683       if (!Reciprocal) {
19684         // The estimate is now completely wrong if the input was exactly 0.0 or
19685         // possibly a denormal. Force the answer to 0.0 for those cases.
19686         SDLoc DL(Op);
19687         EVT CCVT = getSetCCResultType(VT);
19688         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
19689         const Function &F = DAG.getMachineFunction().getFunction();
19690         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
19691         if (Denorms.getValueAsString().equals("ieee")) {
19692           // fabs(X) < SmallestNormal ? 0.0 : Est
19693           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
19694           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
19695           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
19696           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19697           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
19698           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
19699           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
19700           AddToWorklist(Fabs.getNode());
19701           AddToWorklist(IsDenorm.getNode());
19702           AddToWorklist(Est.getNode());
19703         } else {
19704           // X == 0.0 ? 0.0 : Est
19705           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
19706           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
19707           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
19708           AddToWorklist(IsZero.getNode());
19709           AddToWorklist(Est.getNode());
19710         }
19711       }
19712     }
19713     return Est;
19714   }
19715
19716   return SDValue();
19717 }
19718
19719 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19720   return buildSqrtEstimateImpl(Op, Flags, true);
19721 }
19722
19723 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
19724   return buildSqrtEstimateImpl(Op, Flags, false);
19725 }
19726
19727 /// Return true if there is any possibility that the two addresses overlap.
19728 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
19729
19730   struct MemUseCharacteristics {
19731     bool IsVolatile;
19732     SDValue BasePtr;
19733     int64_t Offset;
19734     Optional<int64_t> NumBytes;
19735     MachineMemOperand *MMO;
19736   };
19737
19738   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
19739     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
19740       int64_t Offset = 0;
19741       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
19742         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
19743                      ? C->getSExtValue()
19744                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
19745                            ? -1 * C->getSExtValue()
19746                            : 0;
19747       return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
19748               Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
19749               LSN->getMemOperand()};
19750     }
19751     if (const auto *LN = cast<LifetimeSDNode>(N))
19752       return {false /*isVolatile*/, LN->getOperand(1),
19753               (LN->hasOffset()) ? LN->getOffset() : 0,
19754               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
19755                                 : Optional<int64_t>(),
19756               (MachineMemOperand *)nullptr};
19757     // Default.
19758     return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
19759             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
19760   };
19761
19762   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
19763                         MUC1 = getCharacteristics(Op1);
19764
19765   // If they are to the same address, then they must be aliases.
19766   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
19767       MUC0.Offset == MUC1.Offset)
19768     return true;
19769
19770   // If they are both volatile then they cannot be reordered.
19771   if (MUC0.IsVolatile && MUC1.IsVolatile)
19772     return true;
19773
19774   if (MUC0.MMO && MUC1.MMO) {
19775     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
19776         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
19777       return false;
19778   }
19779
19780   bool IsAlias;
19781   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
19782                                        DAG, IsAlias) &&
19783       !IsAlias)
19784     return IsAlias;
19785
19786   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
19787   // either are not known.
19788   if (!MUC0.MMO || !MUC1.MMO)
19789     return true;
19790
19791   // If one operation reads from invariant memory, and the other may store, they
19792   // cannot alias. These should really be checking the equivalent of mayWrite,
19793   // but it only matters for memory nodes other than load /store.
19794   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
19795       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
19796     return false;
19797
19798   // If we know required SrcValue1 and SrcValue2 have relatively large
19799   // alignment compared to the size and offset of the access, we may be able
19800   // to prove they do not alias. This check is conservative for now to catch
19801   // cases created by splitting vector types.
19802   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
19803   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
19804   unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
19805   unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
19806   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
19807       MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
19808       *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
19809     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
19810     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
19811
19812     // There is no overlap between these relatively aligned accesses of
19813     // similar size. Return no alias.
19814     if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
19815         (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
19816       return false;
19817   }
19818
19819   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
19820                    ? CombinerGlobalAA
19821                    : DAG.getSubtarget().useAA();
19822 #ifndef NDEBUG
19823   if (CombinerAAOnlyFunc.getNumOccurrences() &&
19824       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
19825     UseAA = false;
19826 #endif
19827
19828   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
19829     // Use alias analysis information.
19830     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
19831     int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
19832     int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
19833     AliasResult AAResult = AA->alias(
19834         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
19835                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
19836         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
19837                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
19838     if (AAResult == NoAlias)
19839       return false;
19840   }
19841
19842   // Otherwise we have to assume they alias.
19843   return true;
19844 }
19845
19846 /// Walk up chain skipping non-aliasing memory nodes,
19847 /// looking for aliasing nodes and adding them to the Aliases vector.
19848 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
19849                                    SmallVectorImpl<SDValue> &Aliases) {
19850   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
19851   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
19852
19853   // Get alias information for node.
19854   const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
19855
19856   // Starting off.
19857   Chains.push_back(OriginalChain);
19858   unsigned Depth = 0;
19859
19860   // Attempt to improve chain by a single step
19861   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
19862     switch (C.getOpcode()) {
19863     case ISD::EntryToken:
19864       // No need to mark EntryToken.
19865       C = SDValue();
19866       return true;
19867     case ISD::LOAD:
19868     case ISD::STORE: {
19869       // Get alias information for C.
19870       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
19871                       !cast<LSBaseSDNode>(C.getNode())->isVolatile();
19872       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
19873         // Look further up the chain.
19874         C = C.getOperand(0);
19875         return true;
19876       }
19877       // Alias, so stop here.
19878       return false;
19879     }
19880
19881     case ISD::CopyFromReg:
19882       // Always forward past past CopyFromReg.
19883       C = C.getOperand(0);
19884       return true;
19885
19886     case ISD::LIFETIME_START:
19887     case ISD::LIFETIME_END: {
19888       // We can forward past any lifetime start/end that can be proven not to
19889       // alias the memory access.
19890       if (!isAlias(N, C.getNode())) {
19891         // Look further up the chain.
19892         C = C.getOperand(0);
19893         return true;
19894       }
19895       return false;
19896     }
19897     default:
19898       return false;
19899     }
19900   };
19901
19902   // Look at each chain and determine if it is an alias.  If so, add it to the
19903   // aliases list.  If not, then continue up the chain looking for the next
19904   // candidate.
19905   while (!Chains.empty()) {
19906     SDValue Chain = Chains.pop_back_val();
19907
19908     // Don't bother if we've seen Chain before.
19909     if (!Visited.insert(Chain.getNode()).second)
19910       continue;
19911
19912     // For TokenFactor nodes, look at each operand and only continue up the
19913     // chain until we reach the depth limit.
19914     //
19915     // FIXME: The depth check could be made to return the last non-aliasing
19916     // chain we found before we hit a tokenfactor rather than the original
19917     // chain.
19918     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
19919       Aliases.clear();
19920       Aliases.push_back(OriginalChain);
19921       return;
19922     }
19923
19924     if (Chain.getOpcode() == ISD::TokenFactor) {
19925       // We have to check each of the operands of the token factor for "small"
19926       // token factors, so we queue them up.  Adding the operands to the queue
19927       // (stack) in reverse order maintains the original order and increases the
19928       // likelihood that getNode will find a matching token factor (CSE.)
19929       if (Chain.getNumOperands() > 16) {
19930         Aliases.push_back(Chain);
19931         continue;
19932       }
19933       for (unsigned n = Chain.getNumOperands(); n;)
19934         Chains.push_back(Chain.getOperand(--n));
19935       ++Depth;
19936       continue;
19937     }
19938     // Everything else
19939     if (ImproveChain(Chain)) {
19940       // Updated Chain Found, Consider new chain if one exists.
19941       if (Chain.getNode())
19942         Chains.push_back(Chain);
19943       ++Depth;
19944       continue;
19945     }
19946     // No Improved Chain Possible, treat as Alias.
19947     Aliases.push_back(Chain);
19948   }
19949 }
19950
19951 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
19952 /// (aliasing node.)
19953 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
19954   if (OptLevel == CodeGenOpt::None)
19955     return OldChain;
19956
19957   // Ops for replacing token factor.
19958   SmallVector<SDValue, 8> Aliases;
19959
19960   // Accumulate all the aliases to this node.
19961   GatherAllAliases(N, OldChain, Aliases);
19962
19963   // If no operands then chain to entry token.
19964   if (Aliases.size() == 0)
19965     return DAG.getEntryNode();
19966
19967   // If a single operand then chain to it.  We don't need to revisit it.
19968   if (Aliases.size() == 1)
19969     return Aliases[0];
19970
19971   // Construct a custom tailored token factor.
19972   return DAG.getTokenFactor(SDLoc(N), Aliases);
19973 }
19974
19975 namespace {
19976 // TODO: Replace with with std::monostate when we move to C++17.
19977 struct UnitT { } Unit;
19978 bool operator==(const UnitT &, const UnitT &) { return true; }
19979 bool operator!=(const UnitT &, const UnitT &) { return false; }
19980 } // namespace
19981
19982 // This function tries to collect a bunch of potentially interesting
19983 // nodes to improve the chains of, all at once. This might seem
19984 // redundant, as this function gets called when visiting every store
19985 // node, so why not let the work be done on each store as it's visited?
19986 //
19987 // I believe this is mainly important because MergeConsecutiveStores
19988 // is unable to deal with merging stores of different sizes, so unless
19989 // we improve the chains of all the potential candidates up-front
19990 // before running MergeConsecutiveStores, it might only see some of
19991 // the nodes that will eventually be candidates, and then not be able
19992 // to go from a partially-merged state to the desired final
19993 // fully-merged state.
19994
19995 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
19996   SmallVector<StoreSDNode *, 8> ChainedStores;
19997   StoreSDNode *STChain = St;
19998   // Intervals records which offsets from BaseIndex have been covered. In
19999   // the common case, every store writes to the immediately previous address
20000   // space and thus merged with the previous interval at insertion time.
20001
20002   using IMap =
20003       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
20004   IMap::Allocator A;
20005   IMap Intervals(A);
20006
20007   // This holds the base pointer, index, and the offset in bytes from the base
20008   // pointer.
20009   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20010
20011   // We must have a base and an offset.
20012   if (!BasePtr.getBase().getNode())
20013     return false;
20014
20015   // Do not handle stores to undef base pointers.
20016   if (BasePtr.getBase().isUndef())
20017     return false;
20018
20019   // Add ST's interval.
20020   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
20021
20022   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
20023     // If the chain has more than one use, then we can't reorder the mem ops.
20024     if (!SDValue(Chain, 0)->hasOneUse())
20025       break;
20026     if (Chain->isVolatile() || Chain->isIndexed())
20027       break;
20028
20029     // Find the base pointer and offset for this memory node.
20030     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
20031     // Check that the base pointer is the same as the original one.
20032     int64_t Offset;
20033     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
20034       break;
20035     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
20036     // Make sure we don't overlap with other intervals by checking the ones to
20037     // the left or right before inserting.
20038     auto I = Intervals.find(Offset);
20039     // If there's a next interval, we should end before it.
20040     if (I != Intervals.end() && I.start() < (Offset + Length))
20041       break;
20042     // If there's a previous interval, we should start after it.
20043     if (I != Intervals.begin() && (--I).stop() <= Offset)
20044       break;
20045     Intervals.insert(Offset, Offset + Length, Unit);
20046
20047     ChainedStores.push_back(Chain);
20048     STChain = Chain;
20049   }
20050
20051   // If we didn't find a chained store, exit.
20052   if (ChainedStores.size() == 0)
20053     return false;
20054
20055   // Improve all chained stores (St and ChainedStores members) starting from
20056   // where the store chain ended and return single TokenFactor.
20057   SDValue NewChain = STChain->getChain();
20058   SmallVector<SDValue, 8> TFOps;
20059   for (unsigned I = ChainedStores.size(); I;) {
20060     StoreSDNode *S = ChainedStores[--I];
20061     SDValue BetterChain = FindBetterChain(S, NewChain);
20062     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
20063         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
20064     TFOps.push_back(SDValue(S, 0));
20065     ChainedStores[I] = S;
20066   }
20067
20068   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20069   SDValue BetterChain = FindBetterChain(St, NewChain);
20070   SDValue NewST;
20071   if (St->isTruncatingStore())
20072     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
20073                               St->getBasePtr(), St->getMemoryVT(),
20074                               St->getMemOperand());
20075   else
20076     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
20077                          St->getBasePtr(), St->getMemOperand());
20078
20079   TFOps.push_back(NewST);
20080
20081   // If we improved every element of TFOps, then we've lost the dependence on
20082   // NewChain to successors of St and we need to add it back to TFOps. Do so at
20083   // the beginning to keep relative order consistent with FindBetterChains.
20084   auto hasImprovedChain = [&](SDValue ST) -> bool {
20085     return ST->getOperand(0) != NewChain;
20086   };
20087   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
20088   if (AddNewChain)
20089     TFOps.insert(TFOps.begin(), NewChain);
20090
20091   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
20092   CombineTo(St, TF);
20093
20094   AddToWorklist(STChain);
20095   // Add TF operands worklist in reverse order.
20096   for (auto I = TF->getNumOperands(); I;)
20097     AddToWorklist(TF->getOperand(--I).getNode());
20098   AddToWorklist(TF.getNode());
20099   return true;
20100 }
20101
20102 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
20103   if (OptLevel == CodeGenOpt::None)
20104     return false;
20105
20106   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20107
20108   // We must have a base and an offset.
20109   if (!BasePtr.getBase().getNode())
20110     return false;
20111
20112   // Do not handle stores to undef base pointers.
20113   if (BasePtr.getBase().isUndef())
20114     return false;
20115
20116   // Directly improve a chain of disjoint stores starting at St.
20117   if (parallelizeChainedStores(St))
20118     return true;
20119
20120   // Improve St's Chain..
20121   SDValue BetterChain = FindBetterChain(St, St->getChain());
20122   if (St->getChain() != BetterChain) {
20123     replaceStoreChain(St, BetterChain);
20124     return true;
20125   }
20126   return false;
20127 }
20128
20129 /// This is the entry point for the file.
20130 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
20131                            CodeGenOpt::Level OptLevel) {
20132   /// This is the main entry point to this class.
20133   DAGCombiner(*this, AA, OptLevel).Run(Level);
20134 }