lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallPtrSet.h"
  28 #include "llvm/ADT/SmallSet.h"
  29 #include "llvm/ADT/SmallVector.h"
  30 #include "llvm/ADT/Statistic.h"
  31 #include "llvm/Analysis/AliasAnalysis.h"
  32 #include "llvm/Analysis/MemoryLocation.h"
  33 #include "llvm/CodeGen/DAGCombine.h"
  34 #include "llvm/CodeGen/ISDOpcodes.h"
  35 #include "llvm/CodeGen/MachineFrameInfo.h"
  36 #include "llvm/CodeGen/MachineFunction.h"
  37 #include "llvm/CodeGen/MachineMemOperand.h"
  38 #include "llvm/CodeGen/RuntimeLibcalls.h"
  39 #include "llvm/CodeGen/SelectionDAG.h"
  40 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  41 #include "llvm/CodeGen/SelectionDAGNodes.h"
  42 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  43 #include "llvm/CodeGen/TargetLowering.h"
  44 #include "llvm/CodeGen/TargetRegisterInfo.h"
  45 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  46 #include "llvm/CodeGen/ValueTypes.h"
  47 #include "llvm/IR/Attributes.h"
  48 #include "llvm/IR/Constant.h"
  49 #include "llvm/IR/DataLayout.h"
  50 #include "llvm/IR/DerivedTypes.h"
  51 #include "llvm/IR/Function.h"
  52 #include "llvm/IR/LLVMContext.h"
  53 #include "llvm/IR/Metadata.h"
  54 #include "llvm/Support/Casting.h"
  55 #include "llvm/Support/CodeGen.h"
  56 #include "llvm/Support/CommandLine.h"
  57 #include "llvm/Support/Compiler.h"
  58 #include "llvm/Support/Debug.h"
  59 #include "llvm/Support/ErrorHandling.h"
  60 #include "llvm/Support/KnownBits.h"
  61 #include "llvm/Support/MachineValueType.h"
  62 #include "llvm/Support/MathExtras.h"
  63 #include "llvm/Support/raw_ostream.h"
  64 #include "llvm/Target/TargetMachine.h"
  65 #include "llvm/Target/TargetOptions.h"
  66 #include <algorithm>
  67 #include <cassert>
  68 #include <cstdint>
  69 #include <functional>
  70 #include <iterator>
  71 #include <string>
  72 #include <tuple>
  73 #include <utility>
  74
  75 using namespace llvm;
  76
  77 #define DEBUG_TYPE "dagcombine"
  78
  79 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  80 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  81 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  82 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  83 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  84 STATISTIC(SlicedLoads, "Number of load sliced");
  85 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  86
  87 static cl::opt<bool>
  88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  89                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  90
  91 static cl::opt<bool>
  92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  93         cl::desc("Enable DAG combiner's use of TBAA"));
  94
  95 #ifndef NDEBUG
  96 static cl::opt<std::string>
  97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  98                    cl::desc("Only use DAG-combiner alias analysis in this"
  99                             " function"));
 100 #endif
 101
 102 /// Hidden option to stress test load slicing, i.e., when this option
 103 /// is enabled, load slicing bypasses most of its profitability guards.
 104 static cl::opt<bool>
 105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 106                   cl::desc("Bypass the profitability model of load slicing"),
 107                   cl::init(false));
 108
 109 static cl::opt<bool>
 110   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 111                     cl::desc("DAG combiner may split indexing from loads"));
 112
 113 static cl::opt<bool>
 114     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
 115                        cl::desc("DAG combiner enable merging multiple stores "
 116                                 "into a wider store"));
 117
 118 static cl::opt<unsigned> TokenFactorInlineLimit(
 119     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
 120     cl::desc("Limit the number of operands to inline for Token Factors"));
 121
 122 static cl::opt<unsigned> StoreMergeDependenceLimit(
 123     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
 124     cl::desc("Limit the number of times for the same StoreNode and RootNode "
 125              "to bail out in store merging dependence check"));
 126
 127 namespace {
 128
 129   class DAGCombiner {
 130     SelectionDAG &DAG;
 131     const TargetLowering &TLI;
 132     CombineLevel Level;
 133     CodeGenOpt::Level OptLevel;
 134     bool LegalOperations = false;
 135     bool LegalTypes = false;
 136     bool ForCodeSize;
 137
 138     /// Worklist of all of the nodes that need to be simplified.
 139     ///
 140     /// This must behave as a stack -- new nodes to process are pushed onto the
 141     /// back and when processing we pop off of the back.
 142     ///
 143     /// The worklist will not contain duplicates but may contain null entries
 144     /// due to nodes being deleted from the underlying DAG.
 145     SmallVector<SDNode *, 64> Worklist;
 146
 147     /// Mapping from an SDNode to its position on the worklist.
 148     ///
 149     /// This is used to find and remove nodes from the worklist (by nulling
 150     /// them) when they are deleted from the underlying DAG. It relies on
 151     /// stable indices of nodes within the worklist.
 152     DenseMap<SDNode *, unsigned> WorklistMap;
 153     /// This records all nodes attempted to add to the worklist since we
 154     /// considered a new worklist entry. As we keep do not add duplicate nodes
 155     /// in the worklist, this is different from the tail of the worklist.
 156     SmallSetVector<SDNode *, 32> PruningList;
 157
 158     /// Set of nodes which have been combined (at least once).
 159     ///
 160     /// This is used to allow us to reliably add any operands of a DAG node
 161     /// which have not yet been combined to the worklist.
 162     SmallPtrSet<SDNode *, 32> CombinedNodes;
 163
 164     /// Map from candidate StoreNode to the pair of RootNode and count.
 165     /// The count is used to track how many times we have seen the StoreNode
 166     /// with the same RootNode bail out in dependence check. If we have seen
 167     /// the bail out for the same pair many times over a limit, we won't
 168     /// consider the StoreNode with the same RootNode as store merging
 169     /// candidate again.
 170     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
 171
 172     // AA - Used for DAG load/store alias analysis.
 173     AliasAnalysis *AA;
 174
 175     /// When an instruction is simplified, add all users of the instruction to
 176     /// the work lists because they might get more simplified now.
 177     void AddUsersToWorklist(SDNode *N) {
 178       for (SDNode *Node : N->uses())
 179         AddToWorklist(Node);
 180     }
 181
 182     // Prune potentially dangling nodes. This is called after
 183     // any visit to a node, but should also be called during a visit after any
 184     // failed combine which may have created a DAG node.
 185     void clearAddedDanglingWorklistEntries() {
 186       // Check any nodes added to the worklist to see if they are prunable.
 187       while (!PruningList.empty()) {
 188         auto *N = PruningList.pop_back_val();
 189         if (N->use_empty())
 190           recursivelyDeleteUnusedNodes(N);
 191       }
 192     }
 193
 194     SDNode *getNextWorklistEntry() {
 195       // Before we do any work, remove nodes that are not in use.
 196       clearAddedDanglingWorklistEntries();
 197       SDNode *N = nullptr;
 198       // The Worklist holds the SDNodes in order, but it may contain null
 199       // entries.
 200       while (!N && !Worklist.empty()) {
 201         N = Worklist.pop_back_val();
 202       }
 203
 204       if (N) {
 205         bool GoodWorklistEntry = WorklistMap.erase(N);
 206         (void)GoodWorklistEntry;
 207         assert(GoodWorklistEntry &&
 208                "Found a worklist entry without a corresponding map entry!");
 209       }
 210       return N;
 211     }
 212
 213     /// Call the node-specific routine that folds each particular type of node.
 214     SDValue visit(SDNode *N);
 215
 216   public:
 217     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 218         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 219           OptLevel(OL), AA(AA) {
 220       ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
 221
 222       MaximumLegalStoreInBits = 0;
 223       for (MVT VT : MVT::all_valuetypes())
 224         if (EVT(VT).isSimple() && VT != MVT::Other &&
 225             TLI.isTypeLegal(EVT(VT)) &&
 226             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 227           MaximumLegalStoreInBits = VT.getSizeInBits();
 228     }
 229
 230     void ConsiderForPruning(SDNode *N) {
 231       // Mark this for potential pruning.
 232       PruningList.insert(N);
 233     }
 234
 235     /// Add to the worklist making sure its instance is at the back (next to be
 236     /// processed.)
 237     void AddToWorklist(SDNode *N) {
 238       assert(N->getOpcode() != ISD::DELETED_NODE &&
 239              "Deleted Node added to Worklist");
 240
 241       // Skip handle nodes as they can't usefully be combined and confuse the
 242       // zero-use deletion strategy.
 243       if (N->getOpcode() == ISD::HANDLENODE)
 244         return;
 245
 246       ConsiderForPruning(N);
 247
 248       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 249         Worklist.push_back(N);
 250     }
 251
 252     /// Remove all instances of N from the worklist.
 253     void removeFromWorklist(SDNode *N) {
 254       CombinedNodes.erase(N);
 255       PruningList.remove(N);
 256       StoreRootCountMap.erase(N);
 257
 258       auto It = WorklistMap.find(N);
 259       if (It == WorklistMap.end())
 260         return; // Not in the worklist.
 261
 262       // Null out the entry rather than erasing it to avoid a linear operation.
 263       Worklist[It->second] = nullptr;
 264       WorklistMap.erase(It);
 265     }
 266
 267     void deleteAndRecombine(SDNode *N);
 268     bool recursivelyDeleteUnusedNodes(SDNode *N);
 269
 270     /// Replaces all uses of the results of one DAG node with new values.
 271     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 272                       bool AddTo = true);
 273
 274     /// Replaces all uses of the results of one DAG node with new values.
 275     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 276       return CombineTo(N, &Res, 1, AddTo);
 277     }
 278
 279     /// Replaces all uses of the results of one DAG node with new values.
 280     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 281                       bool AddTo = true) {
 282       SDValue To[] = { Res0, Res1 };
 283       return CombineTo(N, To, 2, AddTo);
 284     }
 285
 286     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 287
 288   private:
 289     unsigned MaximumLegalStoreInBits;
 290
 291     /// Check the specified integer node value to see if it can be simplified or
 292     /// if things it uses can be simplified by bit propagation.
 293     /// If so, return true.
 294     bool SimplifyDemandedBits(SDValue Op) {
 295       unsigned BitWidth = Op.getScalarValueSizeInBits();
 296       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
 297       return SimplifyDemandedBits(Op, DemandedBits);
 298     }
 299
 300     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 301       EVT VT = Op.getValueType();
 302       unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
 303       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 304       return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
 305     }
 306
 307     /// Check the specified vector node value to see if it can be simplified or
 308     /// if things it uses can be simplified as it only uses some of the
 309     /// elements. If so, return true.
 310     bool SimplifyDemandedVectorElts(SDValue Op) {
 311       unsigned NumElts = Op.getValueType().getVectorNumElements();
 312       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 313       return SimplifyDemandedVectorElts(Op, DemandedElts);
 314     }
 315
 316     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 317                               const APInt &DemandedElts);
 318     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 319                                     bool AssumeSingleUse = false);
 320
 321     bool CombineToPreIndexedLoadStore(SDNode *N);
 322     bool CombineToPostIndexedLoadStore(SDNode *N);
 323     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 324     bool SliceUpLoad(SDNode *N);
 325
 326     // Scalars have size 0 to distinguish from singleton vectors.
 327     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 328     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 329     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 330
 331     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 332     ///   load.
 333     ///
 334     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 335     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 336     /// \param EltNo index of the vector element to load.
 337     /// \param OriginalLoad load that EVE came from to be replaced.
 338     /// \returns EVE on success SDValue() on failure.
 339     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 340                                          SDValue EltNo,
 341                                          LoadSDNode *OriginalLoad);
 342     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 343     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 344     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 345     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 346     SDValue PromoteIntBinOp(SDValue Op);
 347     SDValue PromoteIntShiftOp(SDValue Op);
 348     SDValue PromoteExtend(SDValue Op);
 349     bool PromoteLoad(SDValue Op);
 350
 351     /// Call the node-specific routine that knows how to fold each
 352     /// particular type of node. If that doesn't do anything, try the
 353     /// target-specific DAG combines.
 354     SDValue combine(SDNode *N);
 355
 356     // Visitation implementation - Implement dag node combining for different
 357     // node types.  The semantics are as follows:
 358     // Return Value:
 359     //   SDValue.getNode() == 0 - No change was made
 360     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 361     //   otherwise              - N should be replaced by the returned Operand.
 362     //
 363     SDValue visitTokenFactor(SDNode *N);
 364     SDValue visitMERGE_VALUES(SDNode *N);
 365     SDValue visitADD(SDNode *N);
 366     SDValue visitADDLike(SDNode *N);
 367     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 368     SDValue visitSUB(SDNode *N);
 369     SDValue visitADDSAT(SDNode *N);
 370     SDValue visitSUBSAT(SDNode *N);
 371     SDValue visitADDC(SDNode *N);
 372     SDValue visitADDO(SDNode *N);
 373     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 374     SDValue visitSUBC(SDNode *N);
 375     SDValue visitSUBO(SDNode *N);
 376     SDValue visitADDE(SDNode *N);
 377     SDValue visitADDCARRY(SDNode *N);
 378     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 379     SDValue visitSUBE(SDNode *N);
 380     SDValue visitSUBCARRY(SDNode *N);
 381     SDValue visitMUL(SDNode *N);
 382     SDValue visitMULFIX(SDNode *N);
 383     SDValue useDivRem(SDNode *N);
 384     SDValue visitSDIV(SDNode *N);
 385     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 386     SDValue visitUDIV(SDNode *N);
 387     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 388     SDValue visitREM(SDNode *N);
 389     SDValue visitMULHU(SDNode *N);
 390     SDValue visitMULHS(SDNode *N);
 391     SDValue visitSMUL_LOHI(SDNode *N);
 392     SDValue visitUMUL_LOHI(SDNode *N);
 393     SDValue visitMULO(SDNode *N);
 394     SDValue visitIMINMAX(SDNode *N);
 395     SDValue visitAND(SDNode *N);
 396     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 397     SDValue visitOR(SDNode *N);
 398     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 399     SDValue visitXOR(SDNode *N);
 400     SDValue SimplifyVBinOp(SDNode *N);
 401     SDValue visitSHL(SDNode *N);
 402     SDValue visitSRA(SDNode *N);
 403     SDValue visitSRL(SDNode *N);
 404     SDValue visitFunnelShift(SDNode *N);
 405     SDValue visitRotate(SDNode *N);
 406     SDValue visitABS(SDNode *N);
 407     SDValue visitBSWAP(SDNode *N);
 408     SDValue visitBITREVERSE(SDNode *N);
 409     SDValue visitCTLZ(SDNode *N);
 410     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 411     SDValue visitCTTZ(SDNode *N);
 412     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 413     SDValue visitCTPOP(SDNode *N);
 414     SDValue visitSELECT(SDNode *N);
 415     SDValue visitVSELECT(SDNode *N);
 416     SDValue visitSELECT_CC(SDNode *N);
 417     SDValue visitSETCC(SDNode *N);
 418     SDValue visitSETCCCARRY(SDNode *N);
 419     SDValue visitSIGN_EXTEND(SDNode *N);
 420     SDValue visitZERO_EXTEND(SDNode *N);
 421     SDValue visitANY_EXTEND(SDNode *N);
 422     SDValue visitAssertExt(SDNode *N);
 423     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 424     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 425     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 426     SDValue visitTRUNCATE(SDNode *N);
 427     SDValue visitBITCAST(SDNode *N);
 428     SDValue visitBUILD_PAIR(SDNode *N);
 429     SDValue visitFADD(SDNode *N);
 430     SDValue visitFSUB(SDNode *N);
 431     SDValue visitFMUL(SDNode *N);
 432     SDValue visitFMA(SDNode *N);
 433     SDValue visitFDIV(SDNode *N);
 434     SDValue visitFREM(SDNode *N);
 435     SDValue visitFSQRT(SDNode *N);
 436     SDValue visitFCOPYSIGN(SDNode *N);
 437     SDValue visitFPOW(SDNode *N);
 438     SDValue visitSINT_TO_FP(SDNode *N);
 439     SDValue visitUINT_TO_FP(SDNode *N);
 440     SDValue visitFP_TO_SINT(SDNode *N);
 441     SDValue visitFP_TO_UINT(SDNode *N);
 442     SDValue visitFP_ROUND(SDNode *N);
 443     SDValue visitFP_EXTEND(SDNode *N);
 444     SDValue visitFNEG(SDNode *N);
 445     SDValue visitFABS(SDNode *N);
 446     SDValue visitFCEIL(SDNode *N);
 447     SDValue visitFTRUNC(SDNode *N);
 448     SDValue visitFFLOOR(SDNode *N);
 449     SDValue visitFMINNUM(SDNode *N);
 450     SDValue visitFMAXNUM(SDNode *N);
 451     SDValue visitFMINIMUM(SDNode *N);
 452     SDValue visitFMAXIMUM(SDNode *N);
 453     SDValue visitBRCOND(SDNode *N);
 454     SDValue visitBR_CC(SDNode *N);
 455     SDValue visitLOAD(SDNode *N);
 456
 457     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 458     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 459
 460     SDValue visitSTORE(SDNode *N);
 461     SDValue visitLIFETIME_END(SDNode *N);
 462     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 463     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 464     SDValue visitBUILD_VECTOR(SDNode *N);
 465     SDValue visitCONCAT_VECTORS(SDNode *N);
 466     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 467     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 468     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 469     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 470     SDValue visitMLOAD(SDNode *N);
 471     SDValue visitMSTORE(SDNode *N);
 472     SDValue visitMGATHER(SDNode *N);
 473     SDValue visitMSCATTER(SDNode *N);
 474     SDValue visitFP_TO_FP16(SDNode *N);
 475     SDValue visitFP16_TO_FP(SDNode *N);
 476     SDValue visitVECREDUCE(SDNode *N);
 477
 478     SDValue visitFADDForFMACombine(SDNode *N);
 479     SDValue visitFSUBForFMACombine(SDNode *N);
 480     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 481
 482     SDValue XformToShuffleWithZero(SDNode *N);
 483     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
 484                                                     const SDLoc &DL, SDValue N0,
 485                                                     SDValue N1);
 486     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 487                                       SDValue N1);
 488     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 489                            SDValue N1, SDNodeFlags Flags);
 490
 491     SDValue visitShiftByConstant(SDNode *N);
 492
 493     SDValue foldSelectOfConstants(SDNode *N);
 494     SDValue foldVSelectOfConstants(SDNode *N);
 495     SDValue foldBinOpIntoSelect(SDNode *BO);
 496     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 497     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 498     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 499     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 500                              SDValue N2, SDValue N3, ISD::CondCode CC,
 501                              bool NotExtCompare = false);
 502     SDValue convertSelectOfFPConstantsToLoadOffset(
 503         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 504         ISD::CondCode CC);
 505     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 506                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 507     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 508                               const SDLoc &DL);
 509     SDValue unfoldMaskedMerge(SDNode *N);
 510     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 511     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 512                           const SDLoc &DL, bool foldBooleans);
 513     SDValue rebuildSetCC(SDValue N);
 514
 515     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 516                            SDValue &CC) const;
 517     bool isOneUseSetCC(SDValue N) const;
 518     bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
 519
 520     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 521                                          unsigned HiOp);
 522     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 523     SDValue CombineExtLoad(SDNode *N);
 524     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 525     SDValue combineRepeatedFPDivisors(SDNode *N);
 526     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 527     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 528     SDValue BuildSDIV(SDNode *N);
 529     SDValue BuildSDIVPow2(SDNode *N);
 530     SDValue BuildUDIV(SDNode *N);
 531     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 532     SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
 533     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 534     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 535     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 536     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 537                                 SDNodeFlags Flags, bool Reciprocal);
 538     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 539                                 SDNodeFlags Flags, bool Reciprocal);
 540     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 541                                bool DemandHighBits = true);
 542     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 543     SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 544                               SDValue InnerPos, SDValue InnerNeg,
 545                               unsigned PosOpcode, unsigned NegOpcode,
 546                               const SDLoc &DL);
 547     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 548     SDValue MatchLoadCombine(SDNode *N);
 549     SDValue MatchStoreCombine(StoreSDNode *N);
 550     SDValue ReduceLoadWidth(SDNode *N);
 551     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 552     SDValue splitMergedValStore(StoreSDNode *ST);
 553     SDValue TransformFPLoadStorePair(SDNode *N);
 554     SDValue convertBuildVecZextToZext(SDNode *N);
 555     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 556     SDValue reduceBuildVecToShuffle(SDNode *N);
 557     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 558                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 559                                   SDValue VecIn2, unsigned LeftIdx,
 560                                   bool DidSplitVec);
 561     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 562
 563     /// Walk up chain skipping non-aliasing memory nodes,
 564     /// looking for aliasing nodes and adding them to the Aliases vector.
 565     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 566                           SmallVectorImpl<SDValue> &Aliases);
 567
 568     /// Return true if there is any possibility that the two addresses overlap.
 569     bool isAlias(SDNode *Op0, SDNode *Op1) const;
 570
 571     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 572     /// chain (aliasing node.)
 573     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 574
 575     /// Try to replace a store and any possibly adjacent stores on
 576     /// consecutive chains with better chains. Return true only if St is
 577     /// replaced.
 578     ///
 579     /// Notice that other chains may still be replaced even if the function
 580     /// returns false.
 581     bool findBetterNeighborChains(StoreSDNode *St);
 582
 583     // Helper for findBetterNeighborChains. Walk up store chain add additional
 584     // chained stores that do not overlap and can be parallelized.
 585     bool parallelizeChainedStores(StoreSDNode *St);
 586
 587     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 588     /// is located in a sequence of memory operations connected by a chain.
 589     struct MemOpLink {
 590       // Ptr to the mem node.
 591       LSBaseSDNode *MemNode;
 592
 593       // Offset from the base ptr.
 594       int64_t OffsetFromBase;
 595
 596       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 597           : MemNode(N), OffsetFromBase(Offset) {}
 598     };
 599
 600     /// This is a helper function for visitMUL to check the profitability
 601     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 602     /// MulNode is the original multiply, AddNode is (add x, c1),
 603     /// and ConstNode is c2.
 604     bool isMulAddWithConstProfitable(SDNode *MulNode,
 605                                      SDValue &AddNode,
 606                                      SDValue &ConstNode);
 607
 608     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 609     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 610     /// the type of the loaded value to be extended.
 611     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 612                           EVT LoadResultTy, EVT &ExtVT);
 613
 614     /// Helper function to calculate whether the given Load/Store can have its
 615     /// width reduced to ExtVT.
 616     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 617                            EVT &MemVT, unsigned ShAmt = 0);
 618
 619     /// Used by BackwardsPropagateMask to find suitable loads.
 620     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 621                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 622                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 623     /// Attempt to propagate a given AND node back to load leaves so that they
 624     /// can be combined into narrow loads.
 625     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 626
 627     /// Helper function for MergeConsecutiveStores which merges the
 628     /// component store chains.
 629     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 630                                 unsigned NumStores);
 631
 632     /// This is a helper function for MergeConsecutiveStores. When the
 633     /// source elements of the consecutive stores are all constants or
 634     /// all extracted vector elements, try to merge them into one
 635     /// larger store introducing bitcasts if necessary.  \return True
 636     /// if a merged store was created.
 637     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 638                                          EVT MemVT, unsigned NumStores,
 639                                          bool IsConstantSrc, bool UseVector,
 640                                          bool UseTrunc);
 641
 642     /// This is a helper function for MergeConsecutiveStores. Stores
 643     /// that potentially may be merged with St are placed in
 644     /// StoreNodes. RootNode is a chain predecessor to all store
 645     /// candidates.
 646     void getStoreMergeCandidates(StoreSDNode *St,
 647                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 648                                  SDNode *&Root);
 649
 650     /// Helper function for MergeConsecutiveStores. Checks if
 651     /// candidate stores have indirect dependency through their
 652     /// operands. RootNode is the predecessor to all stores calculated
 653     /// by getStoreMergeCandidates and is used to prune the dependency check.
 654     /// \return True if safe to merge.
 655     bool checkMergeStoreCandidatesForDependencies(
 656         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 657         SDNode *RootNode);
 658
 659     /// Merge consecutive store operations into a wide store.
 660     /// This optimization uses wide integers or vectors when possible.
 661     /// \return number of stores that were merged into a merged store (the
 662     /// affected nodes are stored as a prefix in \p StoreNodes).
 663     bool MergeConsecutiveStores(StoreSDNode *St);
 664
 665     /// Try to transform a truncation where C is a constant:
 666     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 667     ///
 668     /// \p N needs to be a truncation and its first operand an AND. Other
 669     /// requirements are checked by the function (e.g. that trunc is
 670     /// single-use) and if missed an empty SDValue is returned.
 671     SDValue distributeTruncateThroughAnd(SDNode *N);
 672
 673     /// Helper function to determine whether the target supports operation
 674     /// given by \p Opcode for type \p VT, that is, whether the operation
 675     /// is legal or custom before legalizing operations, and whether is
 676     /// legal (but not custom) after legalization.
 677     bool hasOperation(unsigned Opcode, EVT VT) {
 678       if (LegalOperations)
 679         return TLI.isOperationLegal(Opcode, VT);
 680       return TLI.isOperationLegalOrCustom(Opcode, VT);
 681     }
 682
 683   public:
 684     /// Runs the dag combiner on all nodes in the work list
 685     void Run(CombineLevel AtLevel);
 686
 687     SelectionDAG &getDAG() const { return DAG; }
 688
 689     /// Returns a type large enough to hold any valid shift amount - before type
 690     /// legalization these can be huge.
 691     EVT getShiftAmountTy(EVT LHSTy) {
 692       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 693       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 694     }
 695
 696     /// This method returns true if we are running before type legalization or
 697     /// if the specified VT is legal.
 698     bool isTypeLegal(const EVT &VT) {
 699       if (!LegalTypes) return true;
 700       return TLI.isTypeLegal(VT);
 701     }
 702
 703     /// Convenience wrapper around TargetLowering::getSetCCResultType
 704     EVT getSetCCResultType(EVT VT) const {
 705       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 706     }
 707
 708     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 709                          SDValue OrigLoad, SDValue ExtLoad,
 710                          ISD::NodeType ExtType);
 711   };
 712
 713 /// This class is a DAGUpdateListener that removes any deleted
 714 /// nodes from the worklist.
 715 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 716   DAGCombiner &DC;
 717
 718 public:
 719   explicit WorklistRemover(DAGCombiner &dc)
 720     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 721
 722   void NodeDeleted(SDNode *N, SDNode *E) override {
 723     DC.removeFromWorklist(N);
 724   }
 725 };
 726
 727 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 728   DAGCombiner &DC;
 729
 730 public:
 731   explicit WorklistInserter(DAGCombiner &dc)
 732       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 733
 734   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 735   //        compile time costs in large DAGs, e.g. Halide.
 736   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 737 };
 738
 739 } // end anonymous namespace
 740
 741 //===----------------------------------------------------------------------===//
 742 //  TargetLowering::DAGCombinerInfo implementation
 743 //===----------------------------------------------------------------------===//
 744
 745 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 746   ((DAGCombiner*)DC)->AddToWorklist(N);
 747 }
 748
 749 SDValue TargetLowering::DAGCombinerInfo::
 750 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 751   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 752 }
 753
 754 SDValue TargetLowering::DAGCombinerInfo::
 755 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 756   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 757 }
 758
 759 SDValue TargetLowering::DAGCombinerInfo::
 760 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 761   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 762 }
 763
 764 bool TargetLowering::DAGCombinerInfo::
 765 recursivelyDeleteUnusedNodes(SDNode *N) {
 766   return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
 767 }
 768
 769 void TargetLowering::DAGCombinerInfo::
 770 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 771   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 772 }
 773
 774 //===----------------------------------------------------------------------===//
 775 // Helper Functions
 776 //===----------------------------------------------------------------------===//
 777
 778 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 779   removeFromWorklist(N);
 780
 781   // If the operands of this node are only used by the node, they will now be
 782   // dead. Make sure to re-visit them and recursively delete dead nodes.
 783   for (const SDValue &Op : N->ops())
 784     // For an operand generating multiple values, one of the values may
 785     // become dead allowing further simplification (e.g. split index
 786     // arithmetic from an indexed load).
 787     if (Op->hasOneUse() || Op->getNumValues() > 1)
 788       AddToWorklist(Op.getNode());
 789
 790   DAG.DeleteNode(N);
 791 }
 792
 793 // APInts must be the same size for most operations, this helper
 794 // function zero extends the shorter of the pair so that they match.
 795 // We provide an Offset so that we can create bitwidths that won't overflow.
 796 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
 797   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
 798   LHS = LHS.zextOrSelf(Bits);
 799   RHS = RHS.zextOrSelf(Bits);
 800 }
 801
 802 // Return true if this node is a setcc, or is a select_cc
 803 // that selects between the target values used for true and false, making it
 804 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
 805 // the appropriate nodes based on the type of node we are checking. This
 806 // simplifies life a bit for the callers.
 807 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 808                                     SDValue &CC) const {
 809   if (N.getOpcode() == ISD::SETCC) {
 810     LHS = N.getOperand(0);
 811     RHS = N.getOperand(1);
 812     CC  = N.getOperand(2);
 813     return true;
 814   }
 815
 816   if (N.getOpcode() != ISD::SELECT_CC ||
 817       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
 818       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
 819     return false;
 820
 821   if (TLI.getBooleanContents(N.getValueType()) ==
 822       TargetLowering::UndefinedBooleanContent)
 823     return false;
 824
 825   LHS = N.getOperand(0);
 826   RHS = N.getOperand(1);
 827   CC  = N.getOperand(4);
 828   return true;
 829 }
 830
 831 /// Return true if this is a SetCC-equivalent operation with only one use.
 832 /// If this is true, it allows the users to invert the operation for free when
 833 /// it is profitable to do so.
 834 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
 835   SDValue N0, N1, N2;
 836   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
 837     return true;
 838   return false;
 839 }
 840
 841 // Returns the SDNode if it is a constant float BuildVector
 842 // or constant float.
 843 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
 844   if (isa<ConstantFPSDNode>(N))
 845     return N.getNode();
 846   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
 847     return N.getNode();
 848   return nullptr;
 849 }
 850
 851 // Determines if it is a constant integer or a build vector of constant
 852 // integers (and undefs).
 853 // Do not permit build vector implicit truncation.
 854 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
 855   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
 856     return !(Const->isOpaque() && NoOpaques);
 857   if (N.getOpcode() != ISD::BUILD_VECTOR)
 858     return false;
 859   unsigned BitWidth = N.getScalarValueSizeInBits();
 860   for (const SDValue &Op : N->op_values()) {
 861     if (Op.isUndef())
 862       continue;
 863     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
 864     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
 865         (Const->isOpaque() && NoOpaques))
 866       return false;
 867   }
 868   return true;
 869 }
 870
 871 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
 872 // undef's.
 873 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
 874   if (V.getOpcode() != ISD::BUILD_VECTOR)
 875     return false;
 876   return isConstantOrConstantVector(V, NoOpaques) ||
 877          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
 878 }
 879
 880 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
 881                                                              const SDLoc &DL,
 882                                                              SDValue N0,
 883                                                              SDValue N1) {
 884   // Currently this only tries to ensure we don't undo the GEP splits done by
 885   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
 886   // we check if the following transformation would be problematic:
 887   // (load/store (add, (add, x, offset1), offset2)) ->
 888   // (load/store (add, x, offset1+offset2)).
 889
 890   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
 891     return false;
 892
 893   if (N0.hasOneUse())
 894     return false;
 895
 896   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
 897   auto *C2 = dyn_cast<ConstantSDNode>(N1);
 898   if (!C1 || !C2)
 899     return false;
 900
 901   const APInt &C1APIntVal = C1->getAPIntValue();
 902   const APInt &C2APIntVal = C2->getAPIntValue();
 903   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
 904     return false;
 905
 906   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
 907   if (CombinedValueIntVal.getBitWidth() > 64)
 908     return false;
 909   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
 910
 911   for (SDNode *Node : N0->uses()) {
 912     auto LoadStore = dyn_cast<MemSDNode>(Node);
 913     if (LoadStore) {
 914       // Is x[offset2] already not a legal addressing mode? If so then
 915       // reassociating the constants breaks nothing (we test offset2 because
 916       // that's the one we hope to fold into the load or store).
 917       TargetLoweringBase::AddrMode AM;
 918       AM.HasBaseReg = true;
 919       AM.BaseOffs = C2APIntVal.getSExtValue();
 920       EVT VT = LoadStore->getMemoryVT();
 921       unsigned AS = LoadStore->getAddressSpace();
 922       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
 923       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
 924         continue;
 925
 926       // Would x[offset1+offset2] still be a legal addressing mode?
 927       AM.BaseOffs = CombinedValue;
 928       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
 929         return true;
 930     }
 931   }
 932
 933   return false;
 934 }
 935
 936 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
 937 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
 938 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
 939                                                SDValue N0, SDValue N1) {
 940   EVT VT = N0.getValueType();
 941
 942   if (N0.getOpcode() != Opc)
 943     return SDValue();
 944
 945   // Don't reassociate reductions.
 946   if (N0->getFlags().hasVectorReduction())
 947     return SDValue();
 948
 949   if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
 950     if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
 951       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
 952       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
 953         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
 954       return SDValue();
 955     }
 956     if (N0.hasOneUse()) {
 957       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
 958       //              iff (op x, c1) has one use
 959       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
 960       if (!OpNode.getNode())
 961         return SDValue();
 962       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
 963     }
 964   }
 965   return SDValue();
 966 }
 967
 968 // Try to reassociate commutative binops.
 969 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 970                                     SDValue N1, SDNodeFlags Flags) {
 971   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
 972   // Don't reassociate reductions.
 973   if (Flags.hasVectorReduction())
 974     return SDValue();
 975
 976   // Floating-point reassociation is not allowed without loose FP math.
 977   if (N0.getValueType().isFloatingPoint() ||
 978       N1.getValueType().isFloatingPoint())
 979     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
 980       return SDValue();
 981
 982   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
 983     return Combined;
 984   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
 985     return Combined;
 986   return SDValue();
 987 }
 988
 989 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 990                                bool AddTo) {
 991   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
 992   ++NodesCombined;
 993   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
 994              To[0].getNode()->dump(&DAG);
 995              dbgs() << " and " << NumTo - 1 << " other values\n");
 996   for (unsigned i = 0, e = NumTo; i != e; ++i)
 997     assert((!To[i].getNode() ||
 998             N->getValueType(i) == To[i].getValueType()) &&
 999            "Cannot combine value to value of different type!");
1000
1001   WorklistRemover DeadNodes(*this);
1002   DAG.ReplaceAllUsesWith(N, To);
1003   if (AddTo) {
1004     // Push the new nodes and any users onto the worklist
1005     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1006       if (To[i].getNode()) {
1007         AddToWorklist(To[i].getNode());
1008         AddUsersToWorklist(To[i].getNode());
1009       }
1010     }
1011   }
1012
1013   // Finally, if the node is now dead, remove it from the graph.  The node
1014   // may not be dead if the replacement process recursively simplified to
1015   // something else needing this node.
1016   if (N->use_empty())
1017     deleteAndRecombine(N);
1018   return SDValue(N, 0);
1019 }
1020
1021 void DAGCombiner::
1022 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1023   // Replace all uses.  If any nodes become isomorphic to other nodes and
1024   // are deleted, make sure to remove them from our worklist.
1025   WorklistRemover DeadNodes(*this);
1026   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1027
1028   // Push the new node and any (possibly new) users onto the worklist.
1029   AddToWorklist(TLO.New.getNode());
1030   AddUsersToWorklist(TLO.New.getNode());
1031
1032   // Finally, if the node is now dead, remove it from the graph.  The node
1033   // may not be dead if the replacement process recursively simplified to
1034   // something else needing this node.
1035   if (TLO.Old.getNode()->use_empty())
1036     deleteAndRecombine(TLO.Old.getNode());
1037 }
1038
1039 /// Check the specified integer node value to see if it can be simplified or if
1040 /// things it uses can be simplified by bit propagation. If so, return true.
1041 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1042                                        const APInt &DemandedElts) {
1043   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1044   KnownBits Known;
1045   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1046     return false;
1047
1048   // Revisit the node.
1049   AddToWorklist(Op.getNode());
1050
1051   // Replace the old value with the new one.
1052   ++NodesCombined;
1053   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1054              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1055              dbgs() << '\n');
1056
1057   CommitTargetLoweringOpt(TLO);
1058   return true;
1059 }
1060
1061 /// Check the specified vector node value to see if it can be simplified or
1062 /// if things it uses can be simplified as it only uses some of the elements.
1063 /// If so, return true.
1064 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1065                                              const APInt &DemandedElts,
1066                                              bool AssumeSingleUse) {
1067   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1068   APInt KnownUndef, KnownZero;
1069   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1070                                       TLO, 0, AssumeSingleUse))
1071     return false;
1072
1073   // Revisit the node.
1074   AddToWorklist(Op.getNode());
1075
1076   // Replace the old value with the new one.
1077   ++NodesCombined;
1078   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1079              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1080              dbgs() << '\n');
1081
1082   CommitTargetLoweringOpt(TLO);
1083   return true;
1084 }
1085
1086 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1087   SDLoc DL(Load);
1088   EVT VT = Load->getValueType(0);
1089   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1090
1091   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1092              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1093   WorklistRemover DeadNodes(*this);
1094   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1095   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1096   deleteAndRecombine(Load);
1097   AddToWorklist(Trunc.getNode());
1098 }
1099
1100 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1101   Replace = false;
1102   SDLoc DL(Op);
1103   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1104     LoadSDNode *LD = cast<LoadSDNode>(Op);
1105     EVT MemVT = LD->getMemoryVT();
1106     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1107                                                       : LD->getExtensionType();
1108     Replace = true;
1109     return DAG.getExtLoad(ExtType, DL, PVT,
1110                           LD->getChain(), LD->getBasePtr(),
1111                           MemVT, LD->getMemOperand());
1112   }
1113
1114   unsigned Opc = Op.getOpcode();
1115   switch (Opc) {
1116   default: break;
1117   case ISD::AssertSext:
1118     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1119       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1120     break;
1121   case ISD::AssertZext:
1122     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1123       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1124     break;
1125   case ISD::Constant: {
1126     unsigned ExtOpc =
1127       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1128     return DAG.getNode(ExtOpc, DL, PVT, Op);
1129   }
1130   }
1131
1132   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1133     return SDValue();
1134   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1135 }
1136
1137 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1138   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1139     return SDValue();
1140   EVT OldVT = Op.getValueType();
1141   SDLoc DL(Op);
1142   bool Replace = false;
1143   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1144   if (!NewOp.getNode())
1145     return SDValue();
1146   AddToWorklist(NewOp.getNode());
1147
1148   if (Replace)
1149     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1150   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1151                      DAG.getValueType(OldVT));
1152 }
1153
1154 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1155   EVT OldVT = Op.getValueType();
1156   SDLoc DL(Op);
1157   bool Replace = false;
1158   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1159   if (!NewOp.getNode())
1160     return SDValue();
1161   AddToWorklist(NewOp.getNode());
1162
1163   if (Replace)
1164     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1165   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1166 }
1167
1168 /// Promote the specified integer binary operation if the target indicates it is
1169 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1170 /// i32 since i16 instructions are longer.
1171 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1172   if (!LegalOperations)
1173     return SDValue();
1174
1175   EVT VT = Op.getValueType();
1176   if (VT.isVector() || !VT.isInteger())
1177     return SDValue();
1178
1179   // If operation type is 'undesirable', e.g. i16 on x86, consider
1180   // promoting it.
1181   unsigned Opc = Op.getOpcode();
1182   if (TLI.isTypeDesirableForOp(Opc, VT))
1183     return SDValue();
1184
1185   EVT PVT = VT;
1186   // Consult target whether it is a good idea to promote this operation and
1187   // what's the right type to promote it to.
1188   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1189     assert(PVT != VT && "Don't know what type to promote to!");
1190
1191     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1192
1193     bool Replace0 = false;
1194     SDValue N0 = Op.getOperand(0);
1195     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1196
1197     bool Replace1 = false;
1198     SDValue N1 = Op.getOperand(1);
1199     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1200     SDLoc DL(Op);
1201
1202     SDValue RV =
1203         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1204
1205     // We are always replacing N0/N1's use in N and only need
1206     // additional replacements if there are additional uses.
1207     Replace0 &= !N0->hasOneUse();
1208     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1209
1210     // Combine Op here so it is preserved past replacements.
1211     CombineTo(Op.getNode(), RV);
1212
1213     // If operands have a use ordering, make sure we deal with
1214     // predecessor first.
1215     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1216       std::swap(N0, N1);
1217       std::swap(NN0, NN1);
1218     }
1219
1220     if (Replace0) {
1221       AddToWorklist(NN0.getNode());
1222       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1223     }
1224     if (Replace1) {
1225       AddToWorklist(NN1.getNode());
1226       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1227     }
1228     return Op;
1229   }
1230   return SDValue();
1231 }
1232
1233 /// Promote the specified integer shift operation if the target indicates it is
1234 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1235 /// i32 since i16 instructions are longer.
1236 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1237   if (!LegalOperations)
1238     return SDValue();
1239
1240   EVT VT = Op.getValueType();
1241   if (VT.isVector() || !VT.isInteger())
1242     return SDValue();
1243
1244   // If operation type is 'undesirable', e.g. i16 on x86, consider
1245   // promoting it.
1246   unsigned Opc = Op.getOpcode();
1247   if (TLI.isTypeDesirableForOp(Opc, VT))
1248     return SDValue();
1249
1250   EVT PVT = VT;
1251   // Consult target whether it is a good idea to promote this operation and
1252   // what's the right type to promote it to.
1253   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1254     assert(PVT != VT && "Don't know what type to promote to!");
1255
1256     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1257
1258     bool Replace = false;
1259     SDValue N0 = Op.getOperand(0);
1260     SDValue N1 = Op.getOperand(1);
1261     if (Opc == ISD::SRA)
1262       N0 = SExtPromoteOperand(N0, PVT);
1263     else if (Opc == ISD::SRL)
1264       N0 = ZExtPromoteOperand(N0, PVT);
1265     else
1266       N0 = PromoteOperand(N0, PVT, Replace);
1267
1268     if (!N0.getNode())
1269       return SDValue();
1270
1271     SDLoc DL(Op);
1272     SDValue RV =
1273         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1274
1275     if (Replace)
1276       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1277
1278     // Deal with Op being deleted.
1279     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1280       return RV;
1281   }
1282   return SDValue();
1283 }
1284
1285 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1286   if (!LegalOperations)
1287     return SDValue();
1288
1289   EVT VT = Op.getValueType();
1290   if (VT.isVector() || !VT.isInteger())
1291     return SDValue();
1292
1293   // If operation type is 'undesirable', e.g. i16 on x86, consider
1294   // promoting it.
1295   unsigned Opc = Op.getOpcode();
1296   if (TLI.isTypeDesirableForOp(Opc, VT))
1297     return SDValue();
1298
1299   EVT PVT = VT;
1300   // Consult target whether it is a good idea to promote this operation and
1301   // what's the right type to promote it to.
1302   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1303     assert(PVT != VT && "Don't know what type to promote to!");
1304     // fold (aext (aext x)) -> (aext x)
1305     // fold (aext (zext x)) -> (zext x)
1306     // fold (aext (sext x)) -> (sext x)
1307     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1308     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1309   }
1310   return SDValue();
1311 }
1312
1313 bool DAGCombiner::PromoteLoad(SDValue Op) {
1314   if (!LegalOperations)
1315     return false;
1316
1317   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1318     return false;
1319
1320   EVT VT = Op.getValueType();
1321   if (VT.isVector() || !VT.isInteger())
1322     return false;
1323
1324   // If operation type is 'undesirable', e.g. i16 on x86, consider
1325   // promoting it.
1326   unsigned Opc = Op.getOpcode();
1327   if (TLI.isTypeDesirableForOp(Opc, VT))
1328     return false;
1329
1330   EVT PVT = VT;
1331   // Consult target whether it is a good idea to promote this operation and
1332   // what's the right type to promote it to.
1333   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1334     assert(PVT != VT && "Don't know what type to promote to!");
1335
1336     SDLoc DL(Op);
1337     SDNode *N = Op.getNode();
1338     LoadSDNode *LD = cast<LoadSDNode>(N);
1339     EVT MemVT = LD->getMemoryVT();
1340     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1341                                                       : LD->getExtensionType();
1342     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1343                                    LD->getChain(), LD->getBasePtr(),
1344                                    MemVT, LD->getMemOperand());
1345     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1346
1347     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1348                Result.getNode()->dump(&DAG); dbgs() << '\n');
1349     WorklistRemover DeadNodes(*this);
1350     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1351     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1352     deleteAndRecombine(N);
1353     AddToWorklist(Result.getNode());
1354     return true;
1355   }
1356   return false;
1357 }
1358
1359 /// Recursively delete a node which has no uses and any operands for
1360 /// which it is the only use.
1361 ///
1362 /// Note that this both deletes the nodes and removes them from the worklist.
1363 /// It also adds any nodes who have had a user deleted to the worklist as they
1364 /// may now have only one use and subject to other combines.
1365 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1366   if (!N->use_empty())
1367     return false;
1368
1369   SmallSetVector<SDNode *, 16> Nodes;
1370   Nodes.insert(N);
1371   do {
1372     N = Nodes.pop_back_val();
1373     if (!N)
1374       continue;
1375
1376     if (N->use_empty()) {
1377       for (const SDValue &ChildN : N->op_values())
1378         Nodes.insert(ChildN.getNode());
1379
1380       removeFromWorklist(N);
1381       DAG.DeleteNode(N);
1382     } else {
1383       AddToWorklist(N);
1384     }
1385   } while (!Nodes.empty());
1386   return true;
1387 }
1388
1389 //===----------------------------------------------------------------------===//
1390 //  Main DAG Combiner implementation
1391 //===----------------------------------------------------------------------===//
1392
1393 void DAGCombiner::Run(CombineLevel AtLevel) {
1394   // set the instance variables, so that the various visit routines may use it.
1395   Level = AtLevel;
1396   LegalOperations = Level >= AfterLegalizeVectorOps;
1397   LegalTypes = Level >= AfterLegalizeTypes;
1398
1399   WorklistInserter AddNodes(*this);
1400
1401   // Add all the dag nodes to the worklist.
1402   for (SDNode &Node : DAG.allnodes())
1403     AddToWorklist(&Node);
1404
1405   // Create a dummy node (which is not added to allnodes), that adds a reference
1406   // to the root node, preventing it from being deleted, and tracking any
1407   // changes of the root.
1408   HandleSDNode Dummy(DAG.getRoot());
1409
1410   // While we have a valid worklist entry node, try to combine it.
1411   while (SDNode *N = getNextWorklistEntry()) {
1412     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1413     // N is deleted from the DAG, since they too may now be dead or may have a
1414     // reduced number of uses, allowing other xforms.
1415     if (recursivelyDeleteUnusedNodes(N))
1416       continue;
1417
1418     WorklistRemover DeadNodes(*this);
1419
1420     // If this combine is running after legalizing the DAG, re-legalize any
1421     // nodes pulled off the worklist.
1422     if (Level == AfterLegalizeDAG) {
1423       SmallSetVector<SDNode *, 16> UpdatedNodes;
1424       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1425
1426       for (SDNode *LN : UpdatedNodes) {
1427         AddUsersToWorklist(LN);
1428         AddToWorklist(LN);
1429       }
1430       if (!NIsValid)
1431         continue;
1432     }
1433
1434     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1435
1436     // Add any operands of the new node which have not yet been combined to the
1437     // worklist as well. Because the worklist uniques things already, this
1438     // won't repeatedly process the same operand.
1439     CombinedNodes.insert(N);
1440     for (const SDValue &ChildN : N->op_values())
1441       if (!CombinedNodes.count(ChildN.getNode()))
1442         AddToWorklist(ChildN.getNode());
1443
1444     SDValue RV = combine(N);
1445
1446     if (!RV.getNode())
1447       continue;
1448
1449     ++NodesCombined;
1450
1451     // If we get back the same node we passed in, rather than a new node or
1452     // zero, we know that the node must have defined multiple values and
1453     // CombineTo was used.  Since CombineTo takes care of the worklist
1454     // mechanics for us, we have no work to do in this case.
1455     if (RV.getNode() == N)
1456       continue;
1457
1458     assert(N->getOpcode() != ISD::DELETED_NODE &&
1459            RV.getOpcode() != ISD::DELETED_NODE &&
1460            "Node was deleted but visit returned new node!");
1461
1462     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1463
1464     if (N->getNumValues() == RV.getNode()->getNumValues())
1465       DAG.ReplaceAllUsesWith(N, RV.getNode());
1466     else {
1467       assert(N->getValueType(0) == RV.getValueType() &&
1468              N->getNumValues() == 1 && "Type mismatch");
1469       DAG.ReplaceAllUsesWith(N, &RV);
1470     }
1471
1472     // Push the new node and any users onto the worklist
1473     AddToWorklist(RV.getNode());
1474     AddUsersToWorklist(RV.getNode());
1475
1476     // Finally, if the node is now dead, remove it from the graph.  The node
1477     // may not be dead if the replacement process recursively simplified to
1478     // something else needing this node. This will also take care of adding any
1479     // operands which have lost a user to the worklist.
1480     recursivelyDeleteUnusedNodes(N);
1481   }
1482
1483   // If the root changed (e.g. it was a dead load, update the root).
1484   DAG.setRoot(Dummy.getValue());
1485   DAG.RemoveDeadNodes();
1486 }
1487
1488 SDValue DAGCombiner::visit(SDNode *N) {
1489   switch (N->getOpcode()) {
1490   default: break;
1491   case ISD::TokenFactor:        return visitTokenFactor(N);
1492   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1493   case ISD::ADD:                return visitADD(N);
1494   case ISD::SUB:                return visitSUB(N);
1495   case ISD::SADDSAT:
1496   case ISD::UADDSAT:            return visitADDSAT(N);
1497   case ISD::SSUBSAT:
1498   case ISD::USUBSAT:            return visitSUBSAT(N);
1499   case ISD::ADDC:               return visitADDC(N);
1500   case ISD::SADDO:
1501   case ISD::UADDO:              return visitADDO(N);
1502   case ISD::SUBC:               return visitSUBC(N);
1503   case ISD::SSUBO:
1504   case ISD::USUBO:              return visitSUBO(N);
1505   case ISD::ADDE:               return visitADDE(N);
1506   case ISD::ADDCARRY:           return visitADDCARRY(N);
1507   case ISD::SUBE:               return visitSUBE(N);
1508   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1509   case ISD::SMULFIX:
1510   case ISD::SMULFIXSAT:
1511   case ISD::UMULFIX:
1512   case ISD::UMULFIXSAT:         return visitMULFIX(N);
1513   case ISD::MUL:                return visitMUL(N);
1514   case ISD::SDIV:               return visitSDIV(N);
1515   case ISD::UDIV:               return visitUDIV(N);
1516   case ISD::SREM:
1517   case ISD::UREM:               return visitREM(N);
1518   case ISD::MULHU:              return visitMULHU(N);
1519   case ISD::MULHS:              return visitMULHS(N);
1520   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1521   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1522   case ISD::SMULO:
1523   case ISD::UMULO:              return visitMULO(N);
1524   case ISD::SMIN:
1525   case ISD::SMAX:
1526   case ISD::UMIN:
1527   case ISD::UMAX:               return visitIMINMAX(N);
1528   case ISD::AND:                return visitAND(N);
1529   case ISD::OR:                 return visitOR(N);
1530   case ISD::XOR:                return visitXOR(N);
1531   case ISD::SHL:                return visitSHL(N);
1532   case ISD::SRA:                return visitSRA(N);
1533   case ISD::SRL:                return visitSRL(N);
1534   case ISD::ROTR:
1535   case ISD::ROTL:               return visitRotate(N);
1536   case ISD::FSHL:
1537   case ISD::FSHR:               return visitFunnelShift(N);
1538   case ISD::ABS:                return visitABS(N);
1539   case ISD::BSWAP:              return visitBSWAP(N);
1540   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1541   case ISD::CTLZ:               return visitCTLZ(N);
1542   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1543   case ISD::CTTZ:               return visitCTTZ(N);
1544   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1545   case ISD::CTPOP:              return visitCTPOP(N);
1546   case ISD::SELECT:             return visitSELECT(N);
1547   case ISD::VSELECT:            return visitVSELECT(N);
1548   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1549   case ISD::SETCC:              return visitSETCC(N);
1550   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1551   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1552   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1553   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1554   case ISD::AssertSext:
1555   case ISD::AssertZext:         return visitAssertExt(N);
1556   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1557   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1558   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1559   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1560   case ISD::BITCAST:            return visitBITCAST(N);
1561   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1562   case ISD::FADD:               return visitFADD(N);
1563   case ISD::FSUB:               return visitFSUB(N);
1564   case ISD::FMUL:               return visitFMUL(N);
1565   case ISD::FMA:                return visitFMA(N);
1566   case ISD::FDIV:               return visitFDIV(N);
1567   case ISD::FREM:               return visitFREM(N);
1568   case ISD::FSQRT:              return visitFSQRT(N);
1569   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1570   case ISD::FPOW:               return visitFPOW(N);
1571   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1572   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1573   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1574   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1575   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1576   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1577   case ISD::FNEG:               return visitFNEG(N);
1578   case ISD::FABS:               return visitFABS(N);
1579   case ISD::FFLOOR:             return visitFFLOOR(N);
1580   case ISD::FMINNUM:            return visitFMINNUM(N);
1581   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1582   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1583   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1584   case ISD::FCEIL:              return visitFCEIL(N);
1585   case ISD::FTRUNC:             return visitFTRUNC(N);
1586   case ISD::BRCOND:             return visitBRCOND(N);
1587   case ISD::BR_CC:              return visitBR_CC(N);
1588   case ISD::LOAD:               return visitLOAD(N);
1589   case ISD::STORE:              return visitSTORE(N);
1590   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1591   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1592   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1593   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1594   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1595   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1596   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1597   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1598   case ISD::MGATHER:            return visitMGATHER(N);
1599   case ISD::MLOAD:              return visitMLOAD(N);
1600   case ISD::MSCATTER:           return visitMSCATTER(N);
1601   case ISD::MSTORE:             return visitMSTORE(N);
1602   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1603   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1604   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1605   case ISD::VECREDUCE_FADD:
1606   case ISD::VECREDUCE_FMUL:
1607   case ISD::VECREDUCE_ADD:
1608   case ISD::VECREDUCE_MUL:
1609   case ISD::VECREDUCE_AND:
1610   case ISD::VECREDUCE_OR:
1611   case ISD::VECREDUCE_XOR:
1612   case ISD::VECREDUCE_SMAX:
1613   case ISD::VECREDUCE_SMIN:
1614   case ISD::VECREDUCE_UMAX:
1615   case ISD::VECREDUCE_UMIN:
1616   case ISD::VECREDUCE_FMAX:
1617   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1618   }
1619   return SDValue();
1620 }
1621
1622 SDValue DAGCombiner::combine(SDNode *N) {
1623   SDValue RV = visit(N);
1624
1625   // If nothing happened, try a target-specific DAG combine.
1626   if (!RV.getNode()) {
1627     assert(N->getOpcode() != ISD::DELETED_NODE &&
1628            "Node was deleted but visit returned NULL!");
1629
1630     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1631         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1632
1633       // Expose the DAG combiner to the target combiner impls.
1634       TargetLowering::DAGCombinerInfo
1635         DagCombineInfo(DAG, Level, false, this);
1636
1637       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1638     }
1639   }
1640
1641   // If nothing happened still, try promoting the operation.
1642   if (!RV.getNode()) {
1643     switch (N->getOpcode()) {
1644     default: break;
1645     case ISD::ADD:
1646     case ISD::SUB:
1647     case ISD::MUL:
1648     case ISD::AND:
1649     case ISD::OR:
1650     case ISD::XOR:
1651       RV = PromoteIntBinOp(SDValue(N, 0));
1652       break;
1653     case ISD::SHL:
1654     case ISD::SRA:
1655     case ISD::SRL:
1656       RV = PromoteIntShiftOp(SDValue(N, 0));
1657       break;
1658     case ISD::SIGN_EXTEND:
1659     case ISD::ZERO_EXTEND:
1660     case ISD::ANY_EXTEND:
1661       RV = PromoteExtend(SDValue(N, 0));
1662       break;
1663     case ISD::LOAD:
1664       if (PromoteLoad(SDValue(N, 0)))
1665         RV = SDValue(N, 0);
1666       break;
1667     }
1668   }
1669
1670   // If N is a commutative binary node, try to eliminate it if the commuted
1671   // version is already present in the DAG.
1672   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1673       N->getNumValues() == 1) {
1674     SDValue N0 = N->getOperand(0);
1675     SDValue N1 = N->getOperand(1);
1676
1677     // Constant operands are canonicalized to RHS.
1678     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1679       SDValue Ops[] = {N1, N0};
1680       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1681                                             N->getFlags());
1682       if (CSENode)
1683         return SDValue(CSENode, 0);
1684     }
1685   }
1686
1687   return RV;
1688 }
1689
1690 /// Given a node, return its input chain if it has one, otherwise return a null
1691 /// sd operand.
1692 static SDValue getInputChainForNode(SDNode *N) {
1693   if (unsigned NumOps = N->getNumOperands()) {
1694     if (N->getOperand(0).getValueType() == MVT::Other)
1695       return N->getOperand(0);
1696     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1697       return N->getOperand(NumOps-1);
1698     for (unsigned i = 1; i < NumOps-1; ++i)
1699       if (N->getOperand(i).getValueType() == MVT::Other)
1700         return N->getOperand(i);
1701   }
1702   return SDValue();
1703 }
1704
1705 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1706   // If N has two operands, where one has an input chain equal to the other,
1707   // the 'other' chain is redundant.
1708   if (N->getNumOperands() == 2) {
1709     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1710       return N->getOperand(0);
1711     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1712       return N->getOperand(1);
1713   }
1714
1715   // Don't simplify token factors if optnone.
1716   if (OptLevel == CodeGenOpt::None)
1717     return SDValue();
1718
1719   // If the sole user is a token factor, we should make sure we have a
1720   // chance to merge them together. This prevents TF chains from inhibiting
1721   // optimizations.
1722   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1723     AddToWorklist(*(N->use_begin()));
1724
1725   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1726   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1727   SmallPtrSet<SDNode*, 16> SeenOps;
1728   bool Changed = false;             // If we should replace this token factor.
1729
1730   // Start out with this token factor.
1731   TFs.push_back(N);
1732
1733   // Iterate through token factors.  The TFs grows when new token factors are
1734   // encountered.
1735   for (unsigned i = 0; i < TFs.size(); ++i) {
1736     // Limit number of nodes to inline, to avoid quadratic compile times.
1737     // We have to add the outstanding Token Factors to Ops, otherwise we might
1738     // drop Ops from the resulting Token Factors.
1739     if (Ops.size() > TokenFactorInlineLimit) {
1740       for (unsigned j = i; j < TFs.size(); j++)
1741         Ops.emplace_back(TFs[j], 0);
1742       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1743       // combiner worklist later.
1744       TFs.resize(i);
1745       break;
1746     }
1747
1748     SDNode *TF = TFs[i];
1749     // Check each of the operands.
1750     for (const SDValue &Op : TF->op_values()) {
1751       switch (Op.getOpcode()) {
1752       case ISD::EntryToken:
1753         // Entry tokens don't need to be added to the list. They are
1754         // redundant.
1755         Changed = true;
1756         break;
1757
1758       case ISD::TokenFactor:
1759         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
1760           // Queue up for processing.
1761           TFs.push_back(Op.getNode());
1762           Changed = true;
1763           break;
1764         }
1765         LLVM_FALLTHROUGH;
1766
1767       default:
1768         // Only add if it isn't already in the list.
1769         if (SeenOps.insert(Op.getNode()).second)
1770           Ops.push_back(Op);
1771         else
1772           Changed = true;
1773         break;
1774       }
1775     }
1776   }
1777
1778   // Re-visit inlined Token Factors, to clean them up in case they have been
1779   // removed. Skip the first Token Factor, as this is the current node.
1780   for (unsigned i = 1, e = TFs.size(); i < e; i++)
1781     AddToWorklist(TFs[i]);
1782
1783   // Remove Nodes that are chained to another node in the list. Do so
1784   // by walking up chains breath-first stopping when we've seen
1785   // another operand. In general we must climb to the EntryNode, but we can exit
1786   // early if we find all remaining work is associated with just one operand as
1787   // no further pruning is possible.
1788
1789   // List of nodes to search through and original Ops from which they originate.
1790   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
1791   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
1792   SmallPtrSet<SDNode *, 16> SeenChains;
1793   bool DidPruneOps = false;
1794
1795   unsigned NumLeftToConsider = 0;
1796   for (const SDValue &Op : Ops) {
1797     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
1798     OpWorkCount.push_back(1);
1799   }
1800
1801   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
1802     // If this is an Op, we can remove the op from the list. Remark any
1803     // search associated with it as from the current OpNumber.
1804     if (SeenOps.count(Op) != 0) {
1805       Changed = true;
1806       DidPruneOps = true;
1807       unsigned OrigOpNumber = 0;
1808       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
1809         OrigOpNumber++;
1810       assert((OrigOpNumber != Ops.size()) &&
1811              "expected to find TokenFactor Operand");
1812       // Re-mark worklist from OrigOpNumber to OpNumber
1813       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
1814         if (Worklist[i].second == OrigOpNumber) {
1815           Worklist[i].second = OpNumber;
1816         }
1817       }
1818       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
1819       OpWorkCount[OrigOpNumber] = 0;
1820       NumLeftToConsider--;
1821     }
1822     // Add if it's a new chain
1823     if (SeenChains.insert(Op).second) {
1824       OpWorkCount[OpNumber]++;
1825       Worklist.push_back(std::make_pair(Op, OpNumber));
1826     }
1827   };
1828
1829   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
1830     // We need at least be consider at least 2 Ops to prune.
1831     if (NumLeftToConsider <= 1)
1832       break;
1833     auto CurNode = Worklist[i].first;
1834     auto CurOpNumber = Worklist[i].second;
1835     assert((OpWorkCount[CurOpNumber] > 0) &&
1836            "Node should not appear in worklist");
1837     switch (CurNode->getOpcode()) {
1838     case ISD::EntryToken:
1839       // Hitting EntryToken is the only way for the search to terminate without
1840       // hitting
1841       // another operand's search. Prevent us from marking this operand
1842       // considered.
1843       NumLeftToConsider++;
1844       break;
1845     case ISD::TokenFactor:
1846       for (const SDValue &Op : CurNode->op_values())
1847         AddToWorklist(i, Op.getNode(), CurOpNumber);
1848       break;
1849     case ISD::LIFETIME_START:
1850     case ISD::LIFETIME_END:
1851     case ISD::CopyFromReg:
1852     case ISD::CopyToReg:
1853       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
1854       break;
1855     default:
1856       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
1857         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
1858       break;
1859     }
1860     OpWorkCount[CurOpNumber]--;
1861     if (OpWorkCount[CurOpNumber] == 0)
1862       NumLeftToConsider--;
1863   }
1864
1865   // If we've changed things around then replace token factor.
1866   if (Changed) {
1867     SDValue Result;
1868     if (Ops.empty()) {
1869       // The entry token is the only possible outcome.
1870       Result = DAG.getEntryNode();
1871     } else {
1872       if (DidPruneOps) {
1873         SmallVector<SDValue, 8> PrunedOps;
1874         //
1875         for (const SDValue &Op : Ops) {
1876           if (SeenChains.count(Op.getNode()) == 0)
1877             PrunedOps.push_back(Op);
1878         }
1879         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
1880       } else {
1881         Result = DAG.getTokenFactor(SDLoc(N), Ops);
1882       }
1883     }
1884     return Result;
1885   }
1886   return SDValue();
1887 }
1888
1889 /// MERGE_VALUES can always be eliminated.
1890 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1891   WorklistRemover DeadNodes(*this);
1892   // Replacing results may cause a different MERGE_VALUES to suddenly
1893   // be CSE'd with N, and carry its uses with it. Iterate until no
1894   // uses remain, to ensure that the node can be safely deleted.
1895   // First add the users of this node to the work list so that they
1896   // can be tried again once they have new operands.
1897   AddUsersToWorklist(N);
1898   do {
1899     // Do as a single replacement to avoid rewalking use lists.
1900     SmallVector<SDValue, 8> Ops;
1901     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1902       Ops.push_back(N->getOperand(i));
1903     DAG.ReplaceAllUsesWith(N, Ops.data());
1904   } while (!N->use_empty());
1905   deleteAndRecombine(N);
1906   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1907 }
1908
1909 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
1910 /// ConstantSDNode pointer else nullptr.
1911 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
1912   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
1913   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
1914 }
1915
1916 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
1917   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
1918          "Unexpected binary operator");
1919
1920   // Don't do this unless the old select is going away. We want to eliminate the
1921   // binary operator, not replace a binop with a select.
1922   // TODO: Handle ISD::SELECT_CC.
1923   unsigned SelOpNo = 0;
1924   SDValue Sel = BO->getOperand(0);
1925   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
1926     SelOpNo = 1;
1927     Sel = BO->getOperand(1);
1928   }
1929
1930   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
1931     return SDValue();
1932
1933   SDValue CT = Sel.getOperand(1);
1934   if (!isConstantOrConstantVector(CT, true) &&
1935       !isConstantFPBuildVectorOrConstantFP(CT))
1936     return SDValue();
1937
1938   SDValue CF = Sel.getOperand(2);
1939   if (!isConstantOrConstantVector(CF, true) &&
1940       !isConstantFPBuildVectorOrConstantFP(CF))
1941     return SDValue();
1942
1943   // Bail out if any constants are opaque because we can't constant fold those.
1944   // The exception is "and" and "or" with either 0 or -1 in which case we can
1945   // propagate non constant operands into select. I.e.:
1946   // and (select Cond, 0, -1), X --> select Cond, 0, X
1947   // or X, (select Cond, -1, 0) --> select Cond, -1, X
1948   auto BinOpcode = BO->getOpcode();
1949   bool CanFoldNonConst =
1950       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
1951       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
1952       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
1953
1954   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
1955   if (!CanFoldNonConst &&
1956       !isConstantOrConstantVector(CBO, true) &&
1957       !isConstantFPBuildVectorOrConstantFP(CBO))
1958     return SDValue();
1959
1960   EVT VT = Sel.getValueType();
1961
1962   // In case of shift value and shift amount may have different VT. For instance
1963   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
1964   // swapped operands and value types do not match. NB: x86 is fine if operands
1965   // are not swapped with shift amount VT being not bigger than shifted value.
1966   // TODO: that is possible to check for a shift operation, correct VTs and
1967   // still perform optimization on x86 if needed.
1968   if (SelOpNo && VT != CBO.getValueType())
1969     return SDValue();
1970
1971   // We have a select-of-constants followed by a binary operator with a
1972   // constant. Eliminate the binop by pulling the constant math into the select.
1973   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
1974   SDLoc DL(Sel);
1975   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
1976                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
1977   if (!CanFoldNonConst && !NewCT.isUndef() &&
1978       !isConstantOrConstantVector(NewCT, true) &&
1979       !isConstantFPBuildVectorOrConstantFP(NewCT))
1980     return SDValue();
1981
1982   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
1983                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
1984   if (!CanFoldNonConst && !NewCF.isUndef() &&
1985       !isConstantOrConstantVector(NewCF, true) &&
1986       !isConstantFPBuildVectorOrConstantFP(NewCF))
1987     return SDValue();
1988
1989   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
1990   SelectOp->setFlags(BO->getFlags());
1991   return SelectOp;
1992 }
1993
1994 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
1995   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
1996          "Expecting add or sub");
1997
1998   // Match a constant operand and a zext operand for the math instruction:
1999   // add Z, C
2000   // sub C, Z
2001   bool IsAdd = N->getOpcode() == ISD::ADD;
2002   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2003   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2004   auto *CN = dyn_cast<ConstantSDNode>(C);
2005   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2006     return SDValue();
2007
2008   // Match the zext operand as a setcc of a boolean.
2009   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2010       Z.getOperand(0).getValueType() != MVT::i1)
2011     return SDValue();
2012
2013   // Match the compare as: setcc (X & 1), 0, eq.
2014   SDValue SetCC = Z.getOperand(0);
2015   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2016   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2017       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2018       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2019     return SDValue();
2020
2021   // We are adding/subtracting a constant and an inverted low bit. Turn that
2022   // into a subtract/add of the low bit with incremented/decremented constant:
2023   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2024   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2025   EVT VT = C.getValueType();
2026   SDLoc DL(N);
2027   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2028   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2029                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2030   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2031 }
2032
2033 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2034 /// a shift and add with a different constant.
2035 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2036   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2037          "Expecting add or sub");
2038
2039   // We need a constant operand for the add/sub, and the other operand is a
2040   // logical shift right: add (srl), C or sub C, (srl).
2041   // TODO - support non-uniform vector amounts.
2042   bool IsAdd = N->getOpcode() == ISD::ADD;
2043   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2044   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2045   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2046   if (!C || ShiftOp.getOpcode() != ISD::SRL)
2047     return SDValue();
2048
2049   // The shift must be of a 'not' value.
2050   SDValue Not = ShiftOp.getOperand(0);
2051   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2052     return SDValue();
2053
2054   // The shift must be moving the sign bit to the least-significant-bit.
2055   EVT VT = ShiftOp.getValueType();
2056   SDValue ShAmt = ShiftOp.getOperand(1);
2057   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2058   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2059     return SDValue();
2060
2061   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2062   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2063   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2064   SDLoc DL(N);
2065   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2066   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2067   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2068   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2069 }
2070
2071 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2072 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2073 /// are no common bits set in the operands).
2074 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2075   SDValue N0 = N->getOperand(0);
2076   SDValue N1 = N->getOperand(1);
2077   EVT VT = N0.getValueType();
2078   SDLoc DL(N);
2079
2080   // fold vector ops
2081   if (VT.isVector()) {
2082     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2083       return FoldedVOp;
2084
2085     // fold (add x, 0) -> x, vector edition
2086     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2087       return N0;
2088     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2089       return N1;
2090   }
2091
2092   // fold (add x, undef) -> undef
2093   if (N0.isUndef())
2094     return N0;
2095
2096   if (N1.isUndef())
2097     return N1;
2098
2099   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2100     // canonicalize constant to RHS
2101     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2102       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2103     // fold (add c1, c2) -> c1+c2
2104     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2105                                       N1.getNode());
2106   }
2107
2108   // fold (add x, 0) -> x
2109   if (isNullConstant(N1))
2110     return N0;
2111
2112   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2113     // fold ((A-c1)+c2) -> (A+(c2-c1))
2114     if (N0.getOpcode() == ISD::SUB &&
2115         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2116       SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2117                                                N0.getOperand(1).getNode());
2118       assert(Sub && "Constant folding failed");
2119       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2120     }
2121
2122     // fold ((c1-A)+c2) -> (c1+c2)-A
2123     if (N0.getOpcode() == ISD::SUB &&
2124         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2125       SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
2126                                                N0.getOperand(0).getNode());
2127       assert(Add && "Constant folding failed");
2128       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2129     }
2130
2131     // add (sext i1 X), 1 -> zext (not i1 X)
2132     // We don't transform this pattern:
2133     //   add (zext i1 X), -1 -> sext (not i1 X)
2134     // because most (?) targets generate better code for the zext form.
2135     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2136         isOneOrOneSplat(N1)) {
2137       SDValue X = N0.getOperand(0);
2138       if ((!LegalOperations ||
2139            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2140             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2141           X.getScalarValueSizeInBits() == 1) {
2142         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2143         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2144       }
2145     }
2146
2147     // Undo the add -> or combine to merge constant offsets from a frame index.
2148     if (N0.getOpcode() == ISD::OR &&
2149         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2150         isa<ConstantSDNode>(N0.getOperand(1)) &&
2151         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2152       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2153       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2154     }
2155   }
2156
2157   if (SDValue NewSel = foldBinOpIntoSelect(N))
2158     return NewSel;
2159
2160   // reassociate add
2161   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2162     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2163       return RADD;
2164   }
2165   // fold ((0-A) + B) -> B-A
2166   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2167     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2168
2169   // fold (A + (0-B)) -> A-B
2170   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2171     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2172
2173   // fold (A+(B-A)) -> B
2174   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2175     return N1.getOperand(0);
2176
2177   // fold ((B-A)+A) -> B
2178   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2179     return N0.getOperand(0);
2180
2181   // fold ((A-B)+(C-A)) -> (C-B)
2182   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2183       N0.getOperand(0) == N1.getOperand(1))
2184     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2185                        N0.getOperand(1));
2186
2187   // fold ((A-B)+(B-C)) -> (A-C)
2188   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2189       N0.getOperand(1) == N1.getOperand(0))
2190     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2191                        N1.getOperand(1));
2192
2193   // fold (A+(B-(A+C))) to (B-C)
2194   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2195       N0 == N1.getOperand(1).getOperand(0))
2196     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2197                        N1.getOperand(1).getOperand(1));
2198
2199   // fold (A+(B-(C+A))) to (B-C)
2200   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2201       N0 == N1.getOperand(1).getOperand(1))
2202     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2203                        N1.getOperand(1).getOperand(0));
2204
2205   // fold (A+((B-A)+or-C)) to (B+or-C)
2206   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2207       N1.getOperand(0).getOpcode() == ISD::SUB &&
2208       N0 == N1.getOperand(0).getOperand(1))
2209     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2210                        N1.getOperand(1));
2211
2212   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2213   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2214     SDValue N00 = N0.getOperand(0);
2215     SDValue N01 = N0.getOperand(1);
2216     SDValue N10 = N1.getOperand(0);
2217     SDValue N11 = N1.getOperand(1);
2218
2219     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2220       return DAG.getNode(ISD::SUB, DL, VT,
2221                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2222                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2223   }
2224
2225   // fold (add (umax X, C), -C) --> (usubsat X, C)
2226   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2227     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2228       return (!Max && !Op) ||
2229              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2230     };
2231     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2232                                   /*AllowUndefs*/ true))
2233       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2234                          N0.getOperand(1));
2235   }
2236
2237   if (SimplifyDemandedBits(SDValue(N, 0)))
2238     return SDValue(N, 0);
2239
2240   if (isOneOrOneSplat(N1)) {
2241     // fold (add (xor a, -1), 1) -> (sub 0, a)
2242     if (isBitwiseNot(N0))
2243       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2244                          N0.getOperand(0));
2245
2246     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2247     if (N0.getOpcode() == ISD::ADD ||
2248         N0.getOpcode() == ISD::UADDO ||
2249         N0.getOpcode() == ISD::SADDO) {
2250       SDValue A, Xor;
2251
2252       if (isBitwiseNot(N0.getOperand(0))) {
2253         A = N0.getOperand(1);
2254         Xor = N0.getOperand(0);
2255       } else if (isBitwiseNot(N0.getOperand(1))) {
2256         A = N0.getOperand(0);
2257         Xor = N0.getOperand(1);
2258       }
2259
2260       if (Xor)
2261         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2262     }
2263
2264     // Look for:
2265     //   add (add x, y), 1
2266     // And if the target does not like this form then turn into:
2267     //   sub y, (xor x, -1)
2268     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2269         N0.getOpcode() == ISD::ADD) {
2270       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2271                                 DAG.getAllOnesConstant(DL, VT));
2272       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2273     }
2274   }
2275
2276   // (x - y) + -1  ->  add (xor y, -1), x
2277   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2278       isAllOnesOrAllOnesSplat(N1)) {
2279     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2280     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2281   }
2282
2283   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2284     return Combined;
2285
2286   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2287     return Combined;
2288
2289   return SDValue();
2290 }
2291
2292 SDValue DAGCombiner::visitADD(SDNode *N) {
2293   SDValue N0 = N->getOperand(0);
2294   SDValue N1 = N->getOperand(1);
2295   EVT VT = N0.getValueType();
2296   SDLoc DL(N);
2297
2298   if (SDValue Combined = visitADDLike(N))
2299     return Combined;
2300
2301   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2302     return V;
2303
2304   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2305     return V;
2306
2307   // fold (a+b) -> (a|b) iff a and b share no bits.
2308   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2309       DAG.haveNoCommonBitsSet(N0, N1))
2310     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2311
2312   return SDValue();
2313 }
2314
2315 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2316   unsigned Opcode = N->getOpcode();
2317   SDValue N0 = N->getOperand(0);
2318   SDValue N1 = N->getOperand(1);
2319   EVT VT = N0.getValueType();
2320   SDLoc DL(N);
2321
2322   // fold vector ops
2323   if (VT.isVector()) {
2324     // TODO SimplifyVBinOp
2325
2326     // fold (add_sat x, 0) -> x, vector edition
2327     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2328       return N0;
2329     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2330       return N1;
2331   }
2332
2333   // fold (add_sat x, undef) -> -1
2334   if (N0.isUndef() || N1.isUndef())
2335     return DAG.getAllOnesConstant(DL, VT);
2336
2337   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2338     // canonicalize constant to RHS
2339     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2340       return DAG.getNode(Opcode, DL, VT, N1, N0);
2341     // fold (add_sat c1, c2) -> c3
2342     return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2343                                       N1.getNode());
2344   }
2345
2346   // fold (add_sat x, 0) -> x
2347   if (isNullConstant(N1))
2348     return N0;
2349
2350   // If it cannot overflow, transform into an add.
2351   if (Opcode == ISD::UADDSAT)
2352     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2353       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2354
2355   return SDValue();
2356 }
2357
2358 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2359   bool Masked = false;
2360
2361   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2362   while (true) {
2363     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2364       V = V.getOperand(0);
2365       continue;
2366     }
2367
2368     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2369       Masked = true;
2370       V = V.getOperand(0);
2371       continue;
2372     }
2373
2374     break;
2375   }
2376
2377   // If this is not a carry, return.
2378   if (V.getResNo() != 1)
2379     return SDValue();
2380
2381   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2382       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2383     return SDValue();
2384
2385   EVT VT = V.getNode()->getValueType(0);
2386   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2387     return SDValue();
2388
2389   // If the result is masked, then no matter what kind of bool it is we can
2390   // return. If it isn't, then we need to make sure the bool type is either 0 or
2391   // 1 and not other values.
2392   if (Masked ||
2393       TLI.getBooleanContents(V.getValueType()) ==
2394           TargetLoweringBase::ZeroOrOneBooleanContent)
2395     return V;
2396
2397   return SDValue();
2398 }
2399
2400 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2401 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2402 /// the opcode and bypass the mask operation.
2403 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2404                                  SelectionDAG &DAG, const SDLoc &DL) {
2405   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2406     return SDValue();
2407
2408   EVT VT = N0.getValueType();
2409   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2410     return SDValue();
2411
2412   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2413   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2414   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2415 }
2416
2417 /// Helper for doing combines based on N0 and N1 being added to each other.
2418 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2419                                           SDNode *LocReference) {
2420   EVT VT = N0.getValueType();
2421   SDLoc DL(LocReference);
2422
2423   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2424   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2425       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2426     return DAG.getNode(ISD::SUB, DL, VT, N0,
2427                        DAG.getNode(ISD::SHL, DL, VT,
2428                                    N1.getOperand(0).getOperand(1),
2429                                    N1.getOperand(1)));
2430
2431   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2432     return V;
2433
2434   // Look for:
2435   //   add (add x, 1), y
2436   // And if the target does not like this form then turn into:
2437   //   sub y, (xor x, -1)
2438   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2439       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2440     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2441                               DAG.getAllOnesConstant(DL, VT));
2442     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2443   }
2444
2445   // Hoist one-use subtraction by non-opaque constant:
2446   //   (x - C) + y  ->  (x + y) - C
2447   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2448   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2449       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2450     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2451     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2452   }
2453   // Hoist one-use subtraction from non-opaque constant:
2454   //   (C - x) + y  ->  (y - x) + C
2455   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2456       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2457     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2458     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2459   }
2460
2461   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2462   // rather than 'add 0/-1' (the zext should get folded).
2463   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2464   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2465       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2466       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2467     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2468     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2469   }
2470
2471   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2472   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2473     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2474     if (TN->getVT() == MVT::i1) {
2475       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2476                                  DAG.getConstant(1, DL, VT));
2477       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2478     }
2479   }
2480
2481   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2482   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2483       N1.getResNo() == 0)
2484     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2485                        N0, N1.getOperand(0), N1.getOperand(2));
2486
2487   // (add X, Carry) -> (addcarry X, 0, Carry)
2488   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2489     if (SDValue Carry = getAsCarry(TLI, N1))
2490       return DAG.getNode(ISD::ADDCARRY, DL,
2491                          DAG.getVTList(VT, Carry.getValueType()), N0,
2492                          DAG.getConstant(0, DL, VT), Carry);
2493
2494   return SDValue();
2495 }
2496
2497 SDValue DAGCombiner::visitADDC(SDNode *N) {
2498   SDValue N0 = N->getOperand(0);
2499   SDValue N1 = N->getOperand(1);
2500   EVT VT = N0.getValueType();
2501   SDLoc DL(N);
2502
2503   // If the flag result is dead, turn this into an ADD.
2504   if (!N->hasAnyUseOfValue(1))
2505     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2506                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2507
2508   // canonicalize constant to RHS.
2509   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2510   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2511   if (N0C && !N1C)
2512     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2513
2514   // fold (addc x, 0) -> x + no carry out
2515   if (isNullConstant(N1))
2516     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2517                                         DL, MVT::Glue));
2518
2519   // If it cannot overflow, transform into an add.
2520   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2521     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2522                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2523
2524   return SDValue();
2525 }
2526
2527 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2528                            SelectionDAG &DAG, const TargetLowering &TLI) {
2529   EVT VT = V.getValueType();
2530
2531   SDValue Cst;
2532   switch (TLI.getBooleanContents(VT)) {
2533   case TargetLowering::ZeroOrOneBooleanContent:
2534   case TargetLowering::UndefinedBooleanContent:
2535     Cst = DAG.getConstant(1, DL, VT);
2536     break;
2537   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2538     Cst = DAG.getAllOnesConstant(DL, VT);
2539     break;
2540   }
2541
2542   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2543 }
2544
2545 /**
2546  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2547  * then the flip also occurs if computing the inverse is the same cost.
2548  * This function returns an empty SDValue in case it cannot flip the boolean
2549  * without increasing the cost of the computation. If you want to flip a boolean
2550  * no matter what, use flipBoolean.
2551  */
2552 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2553                                   const TargetLowering &TLI,
2554                                   bool Force) {
2555   if (Force && isa<ConstantSDNode>(V))
2556     return flipBoolean(V, SDLoc(V), DAG, TLI);
2557
2558   if (V.getOpcode() != ISD::XOR)
2559     return SDValue();
2560
2561   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2562   if (!Const)
2563     return SDValue();
2564
2565   EVT VT = V.getValueType();
2566
2567   bool IsFlip = false;
2568   switch(TLI.getBooleanContents(VT)) {
2569     case TargetLowering::ZeroOrOneBooleanContent:
2570       IsFlip = Const->isOne();
2571       break;
2572     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2573       IsFlip = Const->isAllOnesValue();
2574       break;
2575     case TargetLowering::UndefinedBooleanContent:
2576       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2577       break;
2578   }
2579
2580   if (IsFlip)
2581     return V.getOperand(0);
2582   if (Force)
2583     return flipBoolean(V, SDLoc(V), DAG, TLI);
2584   return SDValue();
2585 }
2586
2587 SDValue DAGCombiner::visitADDO(SDNode *N) {
2588   SDValue N0 = N->getOperand(0);
2589   SDValue N1 = N->getOperand(1);
2590   EVT VT = N0.getValueType();
2591   bool IsSigned = (ISD::SADDO == N->getOpcode());
2592
2593   EVT CarryVT = N->getValueType(1);
2594   SDLoc DL(N);
2595
2596   // If the flag result is dead, turn this into an ADD.
2597   if (!N->hasAnyUseOfValue(1))
2598     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2599                      DAG.getUNDEF(CarryVT));
2600
2601   // canonicalize constant to RHS.
2602   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2603       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2604     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2605
2606   // fold (addo x, 0) -> x + no carry out
2607   if (isNullOrNullSplat(N1))
2608     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2609
2610   if (!IsSigned) {
2611     // If it cannot overflow, transform into an add.
2612     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2613       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2614                        DAG.getConstant(0, DL, CarryVT));
2615
2616     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2617     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2618       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2619                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2620       return CombineTo(N, Sub,
2621                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2622     }
2623
2624     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2625       return Combined;
2626
2627     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2628       return Combined;
2629   }
2630
2631   return SDValue();
2632 }
2633
2634 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2635   EVT VT = N0.getValueType();
2636   if (VT.isVector())
2637     return SDValue();
2638
2639   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2640   // If Y + 1 cannot overflow.
2641   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2642     SDValue Y = N1.getOperand(0);
2643     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2644     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2645       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2646                          N1.getOperand(2));
2647   }
2648
2649   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2650   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2651     if (SDValue Carry = getAsCarry(TLI, N1))
2652       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2653                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2654
2655   return SDValue();
2656 }
2657
2658 SDValue DAGCombiner::visitADDE(SDNode *N) {
2659   SDValue N0 = N->getOperand(0);
2660   SDValue N1 = N->getOperand(1);
2661   SDValue CarryIn = N->getOperand(2);
2662
2663   // canonicalize constant to RHS
2664   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2665   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2666   if (N0C && !N1C)
2667     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2668                        N1, N0, CarryIn);
2669
2670   // fold (adde x, y, false) -> (addc x, y)
2671   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2672     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2673
2674   return SDValue();
2675 }
2676
2677 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2678   SDValue N0 = N->getOperand(0);
2679   SDValue N1 = N->getOperand(1);
2680   SDValue CarryIn = N->getOperand(2);
2681   SDLoc DL(N);
2682
2683   // canonicalize constant to RHS
2684   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2685   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2686   if (N0C && !N1C)
2687     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2688
2689   // fold (addcarry x, y, false) -> (uaddo x, y)
2690   if (isNullConstant(CarryIn)) {
2691     if (!LegalOperations ||
2692         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2693       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2694   }
2695
2696   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2697   if (isNullConstant(N0) && isNullConstant(N1)) {
2698     EVT VT = N0.getValueType();
2699     EVT CarryVT = CarryIn.getValueType();
2700     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2701     AddToWorklist(CarryExt.getNode());
2702     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2703                                     DAG.getConstant(1, DL, VT)),
2704                      DAG.getConstant(0, DL, CarryVT));
2705   }
2706
2707   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2708     return Combined;
2709
2710   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2711     return Combined;
2712
2713   return SDValue();
2714 }
2715
2716 /**
2717  * If we are facing some sort of diamond carry propapagtion pattern try to
2718  * break it up to generate something like:
2719  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2720  *
2721  * The end result is usually an increase in operation required, but because the
2722  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2723  *
2724  * Patterns typically look something like
2725  *            (uaddo A, B)
2726  *             /       \
2727  *          Carry      Sum
2728  *            |          \
2729  *            | (addcarry *, 0, Z)
2730  *            |       /
2731  *             \   Carry
2732  *              |   /
2733  * (addcarry X, *, *)
2734  *
2735  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2736  * produce a combine with a single path for carry propagation.
2737  */
2738 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2739                                       SDValue X, SDValue Carry0, SDValue Carry1,
2740                                       SDNode *N) {
2741   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2742     return SDValue();
2743   if (Carry1.getOpcode() != ISD::UADDO)
2744     return SDValue();
2745
2746   SDValue Z;
2747
2748   /**
2749    * First look for a suitable Z. It will present itself in the form of
2750    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2751    */
2752   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2753       isNullConstant(Carry0.getOperand(1))) {
2754     Z = Carry0.getOperand(2);
2755   } else if (Carry0.getOpcode() == ISD::UADDO &&
2756              isOneConstant(Carry0.getOperand(1))) {
2757     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2758     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
2759   } else {
2760     // We couldn't find a suitable Z.
2761     return SDValue();
2762   }
2763
2764
2765   auto cancelDiamond = [&](SDValue A,SDValue B) {
2766     SDLoc DL(N);
2767     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
2768     Combiner.AddToWorklist(NewY.getNode());
2769     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
2770                        DAG.getConstant(0, DL, X.getValueType()),
2771                        NewY.getValue(1));
2772   };
2773
2774   /**
2775    *      (uaddo A, B)
2776    *           |
2777    *          Sum
2778    *           |
2779    * (addcarry *, 0, Z)
2780    */
2781   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
2782     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
2783   }
2784
2785   /**
2786    * (addcarry A, 0, Z)
2787    *         |
2788    *        Sum
2789    *         |
2790    *  (uaddo *, B)
2791    */
2792   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
2793     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
2794   }
2795
2796   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
2797     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
2798   }
2799
2800   return SDValue();
2801 }
2802
2803 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
2804                                        SDNode *N) {
2805   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
2806   if (isBitwiseNot(N0))
2807     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
2808       SDLoc DL(N);
2809       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
2810                                 N0.getOperand(0), NotC);
2811       return CombineTo(N, Sub,
2812                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2813     }
2814
2815   // Iff the flag result is dead:
2816   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
2817   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
2818   // or the dependency between the instructions.
2819   if ((N0.getOpcode() == ISD::ADD ||
2820        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
2821         N0.getValue(1) != CarryIn)) &&
2822       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
2823     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
2824                        N0.getOperand(0), N0.getOperand(1), CarryIn);
2825
2826   /**
2827    * When one of the addcarry argument is itself a carry, we may be facing
2828    * a diamond carry propagation. In which case we try to transform the DAG
2829    * to ensure linear carry propagation if that is possible.
2830    */
2831   if (auto Y = getAsCarry(TLI, N1)) {
2832     // Because both are carries, Y and Z can be swapped.
2833     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
2834       return R;
2835     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
2836       return R;
2837   }
2838
2839   return SDValue();
2840 }
2841
2842 // Since it may not be valid to emit a fold to zero for vector initializers
2843 // check if we can before folding.
2844 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
2845                              SelectionDAG &DAG, bool LegalOperations) {
2846   if (!VT.isVector())
2847     return DAG.getConstant(0, DL, VT);
2848   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
2849     return DAG.getConstant(0, DL, VT);
2850   return SDValue();
2851 }
2852
2853 SDValue DAGCombiner::visitSUB(SDNode *N) {
2854   SDValue N0 = N->getOperand(0);
2855   SDValue N1 = N->getOperand(1);
2856   EVT VT = N0.getValueType();
2857   SDLoc DL(N);
2858
2859   // fold vector ops
2860   if (VT.isVector()) {
2861     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2862       return FoldedVOp;
2863
2864     // fold (sub x, 0) -> x, vector edition
2865     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2866       return N0;
2867   }
2868
2869   // fold (sub x, x) -> 0
2870   // FIXME: Refactor this and xor and other similar operations together.
2871   if (N0 == N1)
2872     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
2873   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2874       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
2875     // fold (sub c1, c2) -> c1-c2
2876     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2877                                       N1.getNode());
2878   }
2879
2880   if (SDValue NewSel = foldBinOpIntoSelect(N))
2881     return NewSel;
2882
2883   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
2884
2885   // fold (sub x, c) -> (add x, -c)
2886   if (N1C) {
2887     return DAG.getNode(ISD::ADD, DL, VT, N0,
2888                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
2889   }
2890
2891   if (isNullOrNullSplat(N0)) {
2892     unsigned BitWidth = VT.getScalarSizeInBits();
2893     // Right-shifting everything out but the sign bit followed by negation is
2894     // the same as flipping arithmetic/logical shift type without the negation:
2895     // -(X >>u 31) -> (X >>s 31)
2896     // -(X >>s 31) -> (X >>u 31)
2897     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
2898       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
2899       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
2900         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
2901         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
2902           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
2903       }
2904     }
2905
2906     // 0 - X --> 0 if the sub is NUW.
2907     if (N->getFlags().hasNoUnsignedWrap())
2908       return N0;
2909
2910     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
2911       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
2912       // N1 must be 0 because negating the minimum signed value is undefined.
2913       if (N->getFlags().hasNoSignedWrap())
2914         return N0;
2915
2916       // 0 - X --> X if X is 0 or the minimum signed value.
2917       return N1;
2918     }
2919   }
2920
2921   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
2922   if (isAllOnesOrAllOnesSplat(N0))
2923     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
2924
2925   // fold (A - (0-B)) -> A+B
2926   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2927     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
2928
2929   // fold A-(A-B) -> B
2930   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
2931     return N1.getOperand(1);
2932
2933   // fold (A+B)-A -> B
2934   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
2935     return N0.getOperand(1);
2936
2937   // fold (A+B)-B -> A
2938   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
2939     return N0.getOperand(0);
2940
2941   // fold (A+C1)-C2 -> A+(C1-C2)
2942   if (N0.getOpcode() == ISD::ADD &&
2943       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2944       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2945     SDValue NewC = DAG.FoldConstantArithmetic(
2946         ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
2947     assert(NewC && "Constant folding failed");
2948     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
2949   }
2950
2951   // fold C2-(A+C1) -> (C2-C1)-A
2952   if (N1.getOpcode() == ISD::ADD) {
2953     SDValue N11 = N1.getOperand(1);
2954     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
2955         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
2956       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
2957                                                 N11.getNode());
2958       assert(NewC && "Constant folding failed");
2959       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
2960     }
2961   }
2962
2963   // fold (A-C1)-C2 -> A-(C1+C2)
2964   if (N0.getOpcode() == ISD::SUB &&
2965       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2966       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
2967     SDValue NewC = DAG.FoldConstantArithmetic(
2968         ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
2969     assert(NewC && "Constant folding failed");
2970     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
2971   }
2972
2973   // fold (c1-A)-c2 -> (c1-c2)-A
2974   if (N0.getOpcode() == ISD::SUB &&
2975       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
2976       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
2977     SDValue NewC = DAG.FoldConstantArithmetic(
2978         ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
2979     assert(NewC && "Constant folding failed");
2980     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
2981   }
2982
2983   // fold ((A+(B+or-C))-B) -> A+or-C
2984   if (N0.getOpcode() == ISD::ADD &&
2985       (N0.getOperand(1).getOpcode() == ISD::SUB ||
2986        N0.getOperand(1).getOpcode() == ISD::ADD) &&
2987       N0.getOperand(1).getOperand(0) == N1)
2988     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
2989                        N0.getOperand(1).getOperand(1));
2990
2991   // fold ((A+(C+B))-B) -> A+C
2992   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
2993       N0.getOperand(1).getOperand(1) == N1)
2994     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
2995                        N0.getOperand(1).getOperand(0));
2996
2997   // fold ((A-(B-C))-C) -> A-B
2998   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
2999       N0.getOperand(1).getOperand(1) == N1)
3000     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3001                        N0.getOperand(1).getOperand(0));
3002
3003   // fold (A-(B-C)) -> A+(C-B)
3004   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3005     return DAG.getNode(ISD::ADD, DL, VT, N0,
3006                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3007                                    N1.getOperand(0)));
3008
3009   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3010   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3011     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3012         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3013       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3014                                 N1.getOperand(0).getOperand(1),
3015                                 N1.getOperand(1));
3016       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3017     }
3018     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3019         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3020       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3021                                 N1.getOperand(0),
3022                                 N1.getOperand(1).getOperand(1));
3023       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3024     }
3025   }
3026
3027   // If either operand of a sub is undef, the result is undef
3028   if (N0.isUndef())
3029     return N0;
3030   if (N1.isUndef())
3031     return N1;
3032
3033   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3034     return V;
3035
3036   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3037     return V;
3038
3039   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3040     return V;
3041
3042   // (x - y) - 1  ->  add (xor y, -1), x
3043   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3044     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3045                               DAG.getAllOnesConstant(DL, VT));
3046     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3047   }
3048
3049   // Look for:
3050   //   sub y, (xor x, -1)
3051   // And if the target does not like this form then turn into:
3052   //   add (add x, y), 1
3053   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3054     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3055     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3056   }
3057
3058   // Hoist one-use addition by non-opaque constant:
3059   //   (x + C) - y  ->  (x - y) + C
3060   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3061       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3062     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3063     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3064   }
3065   // y - (x + C)  ->  (y - x) - C
3066   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3067       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3068     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3069     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3070   }
3071   // (x - C) - y  ->  (x - y) - C
3072   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3073   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3074       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3075     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3076     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3077   }
3078   // (C - x) - y  ->  C - (x + y)
3079   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3080       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3081     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3082     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3083   }
3084
3085   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3086   // rather than 'sub 0/1' (the sext should get folded).
3087   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3088   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3089       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3090       TLI.getBooleanContents(VT) ==
3091           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3092     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3093     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3094   }
3095
3096   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3097   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3098     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3099       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3100       SDValue S0 = N1.getOperand(0);
3101       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3102         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3103         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3104           if (C->getAPIntValue() == (OpSizeInBits - 1))
3105             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3106       }
3107     }
3108   }
3109
3110   // If the relocation model supports it, consider symbol offsets.
3111   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3112     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3113       // fold (sub Sym, c) -> Sym-c
3114       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3115         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3116                                     GA->getOffset() -
3117                                         (uint64_t)N1C->getSExtValue());
3118       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3119       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3120         if (GA->getGlobal() == GB->getGlobal())
3121           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3122                                  DL, VT);
3123     }
3124
3125   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3126   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3127     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3128     if (TN->getVT() == MVT::i1) {
3129       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3130                                  DAG.getConstant(1, DL, VT));
3131       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3132     }
3133   }
3134
3135   // Prefer an add for more folding potential and possibly better codegen:
3136   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3137   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3138     SDValue ShAmt = N1.getOperand(1);
3139     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3140     if (ShAmtC &&
3141         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3142       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3143       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3144     }
3145   }
3146
3147   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
3148     // (sub Carry, X)  ->  (addcarry (sub 0, X), 0, Carry)
3149     if (SDValue Carry = getAsCarry(TLI, N0)) {
3150       SDValue X = N1;
3151       SDValue Zero = DAG.getConstant(0, DL, VT);
3152       SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
3153       return DAG.getNode(ISD::ADDCARRY, DL,
3154                          DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
3155                          Carry);
3156     }
3157   }
3158
3159   return SDValue();
3160 }
3161
3162 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3163   SDValue N0 = N->getOperand(0);
3164   SDValue N1 = N->getOperand(1);
3165   EVT VT = N0.getValueType();
3166   SDLoc DL(N);
3167
3168   // fold vector ops
3169   if (VT.isVector()) {
3170     // TODO SimplifyVBinOp
3171
3172     // fold (sub_sat x, 0) -> x, vector edition
3173     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3174       return N0;
3175   }
3176
3177   // fold (sub_sat x, undef) -> 0
3178   if (N0.isUndef() || N1.isUndef())
3179     return DAG.getConstant(0, DL, VT);
3180
3181   // fold (sub_sat x, x) -> 0
3182   if (N0 == N1)
3183     return DAG.getConstant(0, DL, VT);
3184
3185   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3186       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
3187     // fold (sub_sat c1, c2) -> c3
3188     return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3189                                       N1.getNode());
3190   }
3191
3192   // fold (sub_sat x, 0) -> x
3193   if (isNullConstant(N1))
3194     return N0;
3195
3196   return SDValue();
3197 }
3198
3199 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3200   SDValue N0 = N->getOperand(0);
3201   SDValue N1 = N->getOperand(1);
3202   EVT VT = N0.getValueType();
3203   SDLoc DL(N);
3204
3205   // If the flag result is dead, turn this into an SUB.
3206   if (!N->hasAnyUseOfValue(1))
3207     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3208                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3209
3210   // fold (subc x, x) -> 0 + no borrow
3211   if (N0 == N1)
3212     return CombineTo(N, DAG.getConstant(0, DL, VT),
3213                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3214
3215   // fold (subc x, 0) -> x + no borrow
3216   if (isNullConstant(N1))
3217     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3218
3219   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3220   if (isAllOnesConstant(N0))
3221     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3222                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3223
3224   return SDValue();
3225 }
3226
3227 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3228   SDValue N0 = N->getOperand(0);
3229   SDValue N1 = N->getOperand(1);
3230   EVT VT = N0.getValueType();
3231   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3232
3233   EVT CarryVT = N->getValueType(1);
3234   SDLoc DL(N);
3235
3236   // If the flag result is dead, turn this into an SUB.
3237   if (!N->hasAnyUseOfValue(1))
3238     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3239                      DAG.getUNDEF(CarryVT));
3240
3241   // fold (subo x, x) -> 0 + no borrow
3242   if (N0 == N1)
3243     return CombineTo(N, DAG.getConstant(0, DL, VT),
3244                      DAG.getConstant(0, DL, CarryVT));
3245
3246   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3247
3248   // fold (subox, c) -> (addo x, -c)
3249   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3250     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3251                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3252   }
3253
3254   // fold (subo x, 0) -> x + no borrow
3255   if (isNullOrNullSplat(N1))
3256     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3257
3258   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3259   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3260     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3261                      DAG.getConstant(0, DL, CarryVT));
3262
3263   return SDValue();
3264 }
3265
3266 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3267   SDValue N0 = N->getOperand(0);
3268   SDValue N1 = N->getOperand(1);
3269   SDValue CarryIn = N->getOperand(2);
3270
3271   // fold (sube x, y, false) -> (subc x, y)
3272   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3273     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3274
3275   return SDValue();
3276 }
3277
3278 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3279   SDValue N0 = N->getOperand(0);
3280   SDValue N1 = N->getOperand(1);
3281   SDValue CarryIn = N->getOperand(2);
3282
3283   // fold (subcarry x, y, false) -> (usubo x, y)
3284   if (isNullConstant(CarryIn)) {
3285     if (!LegalOperations ||
3286         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3287       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3288   }
3289
3290   return SDValue();
3291 }
3292
3293 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
3294 // UMULFIXSAT here.
3295 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3296   SDValue N0 = N->getOperand(0);
3297   SDValue N1 = N->getOperand(1);
3298   SDValue Scale = N->getOperand(2);
3299   EVT VT = N0.getValueType();
3300
3301   // fold (mulfix x, undef, scale) -> 0
3302   if (N0.isUndef() || N1.isUndef())
3303     return DAG.getConstant(0, SDLoc(N), VT);
3304
3305   // Canonicalize constant to RHS (vector doesn't have to splat)
3306   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3307      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3308     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3309
3310   // fold (mulfix x, 0, scale) -> 0
3311   if (isNullConstant(N1))
3312     return DAG.getConstant(0, SDLoc(N), VT);
3313
3314   return SDValue();
3315 }
3316
3317 SDValue DAGCombiner::visitMUL(SDNode *N) {
3318   SDValue N0 = N->getOperand(0);
3319   SDValue N1 = N->getOperand(1);
3320   EVT VT = N0.getValueType();
3321
3322   // fold (mul x, undef) -> 0
3323   if (N0.isUndef() || N1.isUndef())
3324     return DAG.getConstant(0, SDLoc(N), VT);
3325
3326   bool N0IsConst = false;
3327   bool N1IsConst = false;
3328   bool N1IsOpaqueConst = false;
3329   bool N0IsOpaqueConst = false;
3330   APInt ConstValue0, ConstValue1;
3331   // fold vector ops
3332   if (VT.isVector()) {
3333     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3334       return FoldedVOp;
3335
3336     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3337     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3338     assert((!N0IsConst ||
3339             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3340            "Splat APInt should be element width");
3341     assert((!N1IsConst ||
3342             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3343            "Splat APInt should be element width");
3344   } else {
3345     N0IsConst = isa<ConstantSDNode>(N0);
3346     if (N0IsConst) {
3347       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3348       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3349     }
3350     N1IsConst = isa<ConstantSDNode>(N1);
3351     if (N1IsConst) {
3352       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3353       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3354     }
3355   }
3356
3357   // fold (mul c1, c2) -> c1*c2
3358   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3359     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3360                                       N0.getNode(), N1.getNode());
3361
3362   // canonicalize constant to RHS (vector doesn't have to splat)
3363   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3364      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3365     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3366   // fold (mul x, 0) -> 0
3367   if (N1IsConst && ConstValue1.isNullValue())
3368     return N1;
3369   // fold (mul x, 1) -> x
3370   if (N1IsConst && ConstValue1.isOneValue())
3371     return N0;
3372
3373   if (SDValue NewSel = foldBinOpIntoSelect(N))
3374     return NewSel;
3375
3376   // fold (mul x, -1) -> 0-x
3377   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3378     SDLoc DL(N);
3379     return DAG.getNode(ISD::SUB, DL, VT,
3380                        DAG.getConstant(0, DL, VT), N0);
3381   }
3382   // fold (mul x, (1 << c)) -> x << c
3383   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3384       DAG.isKnownToBeAPowerOfTwo(N1) &&
3385       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3386     SDLoc DL(N);
3387     SDValue LogBase2 = BuildLogBase2(N1, DL);
3388     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3389     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3390     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3391   }
3392   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3393   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3394     unsigned Log2Val = (-ConstValue1).logBase2();
3395     SDLoc DL(N);
3396     // FIXME: If the input is something that is easily negated (e.g. a
3397     // single-use add), we should put the negate there.
3398     return DAG.getNode(ISD::SUB, DL, VT,
3399                        DAG.getConstant(0, DL, VT),
3400                        DAG.getNode(ISD::SHL, DL, VT, N0,
3401                             DAG.getConstant(Log2Val, DL,
3402                                       getShiftAmountTy(N0.getValueType()))));
3403   }
3404
3405   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3406   // mul x, (2^N + 1) --> add (shl x, N), x
3407   // mul x, (2^N - 1) --> sub (shl x, N), x
3408   // Examples: x * 33 --> (x << 5) + x
3409   //           x * 15 --> (x << 4) - x
3410   //           x * -33 --> -((x << 5) + x)
3411   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3412   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3413     // TODO: We could handle more general decomposition of any constant by
3414     //       having the target set a limit on number of ops and making a
3415     //       callback to determine that sequence (similar to sqrt expansion).
3416     unsigned MathOp = ISD::DELETED_NODE;
3417     APInt MulC = ConstValue1.abs();
3418     if ((MulC - 1).isPowerOf2())
3419       MathOp = ISD::ADD;
3420     else if ((MulC + 1).isPowerOf2())
3421       MathOp = ISD::SUB;
3422
3423     if (MathOp != ISD::DELETED_NODE) {
3424       unsigned ShAmt =
3425           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3426       assert(ShAmt < VT.getScalarSizeInBits() &&
3427              "multiply-by-constant generated out of bounds shift");
3428       SDLoc DL(N);
3429       SDValue Shl =
3430           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3431       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3432       if (ConstValue1.isNegative())
3433         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3434       return R;
3435     }
3436   }
3437
3438   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3439   if (N0.getOpcode() == ISD::SHL &&
3440       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3441       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3442     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3443     if (isConstantOrConstantVector(C3))
3444       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3445   }
3446
3447   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3448   // use.
3449   {
3450     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3451
3452     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3453     if (N0.getOpcode() == ISD::SHL &&
3454         isConstantOrConstantVector(N0.getOperand(1)) &&
3455         N0.getNode()->hasOneUse()) {
3456       Sh = N0; Y = N1;
3457     } else if (N1.getOpcode() == ISD::SHL &&
3458                isConstantOrConstantVector(N1.getOperand(1)) &&
3459                N1.getNode()->hasOneUse()) {
3460       Sh = N1; Y = N0;
3461     }
3462
3463     if (Sh.getNode()) {
3464       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3465       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3466     }
3467   }
3468
3469   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3470   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3471       N0.getOpcode() == ISD::ADD &&
3472       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3473       isMulAddWithConstProfitable(N, N0, N1))
3474       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3475                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3476                                      N0.getOperand(0), N1),
3477                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3478                                      N0.getOperand(1), N1));
3479
3480   // reassociate mul
3481   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3482     return RMUL;
3483
3484   return SDValue();
3485 }
3486
3487 /// Return true if divmod libcall is available.
3488 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3489                                      const TargetLowering &TLI) {
3490   RTLIB::Libcall LC;
3491   EVT NodeType = Node->getValueType(0);
3492   if (!NodeType.isSimple())
3493     return false;
3494   switch (NodeType.getSimpleVT().SimpleTy) {
3495   default: return false; // No libcall for vector types.
3496   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3497   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3498   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3499   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3500   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3501   }
3502
3503   return TLI.getLibcallName(LC) != nullptr;
3504 }
3505
3506 /// Issue divrem if both quotient and remainder are needed.
3507 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3508   if (Node->use_empty())
3509     return SDValue(); // This is a dead node, leave it alone.
3510
3511   unsigned Opcode = Node->getOpcode();
3512   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3513   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3514
3515   // DivMod lib calls can still work on non-legal types if using lib-calls.
3516   EVT VT = Node->getValueType(0);
3517   if (VT.isVector() || !VT.isInteger())
3518     return SDValue();
3519
3520   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3521     return SDValue();
3522
3523   // If DIVREM is going to get expanded into a libcall,
3524   // but there is no libcall available, then don't combine.
3525   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3526       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3527     return SDValue();
3528
3529   // If div is legal, it's better to do the normal expansion
3530   unsigned OtherOpcode = 0;
3531   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3532     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3533     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3534       return SDValue();
3535   } else {
3536     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3537     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3538       return SDValue();
3539   }
3540
3541   SDValue Op0 = Node->getOperand(0);
3542   SDValue Op1 = Node->getOperand(1);
3543   SDValue combined;
3544   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3545          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3546     SDNode *User = *UI;
3547     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3548         User->use_empty())
3549       continue;
3550     // Convert the other matching node(s), too;
3551     // otherwise, the DIVREM may get target-legalized into something
3552     // target-specific that we won't be able to recognize.
3553     unsigned UserOpc = User->getOpcode();
3554     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3555         User->getOperand(0) == Op0 &&
3556         User->getOperand(1) == Op1) {
3557       if (!combined) {
3558         if (UserOpc == OtherOpcode) {
3559           SDVTList VTs = DAG.getVTList(VT, VT);
3560           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3561         } else if (UserOpc == DivRemOpc) {
3562           combined = SDValue(User, 0);
3563         } else {
3564           assert(UserOpc == Opcode);
3565           continue;
3566         }
3567       }
3568       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3569         CombineTo(User, combined);
3570       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3571         CombineTo(User, combined.getValue(1));
3572     }
3573   }
3574   return combined;
3575 }
3576
3577 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3578   SDValue N0 = N->getOperand(0);
3579   SDValue N1 = N->getOperand(1);
3580   EVT VT = N->getValueType(0);
3581   SDLoc DL(N);
3582
3583   unsigned Opc = N->getOpcode();
3584   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3585   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3586
3587   // X / undef -> undef
3588   // X % undef -> undef
3589   // X / 0 -> undef
3590   // X % 0 -> undef
3591   // NOTE: This includes vectors where any divisor element is zero/undef.
3592   if (DAG.isUndef(Opc, {N0, N1}))
3593     return DAG.getUNDEF(VT);
3594
3595   // undef / X -> 0
3596   // undef % X -> 0
3597   if (N0.isUndef())
3598     return DAG.getConstant(0, DL, VT);
3599
3600   // 0 / X -> 0
3601   // 0 % X -> 0
3602   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3603   if (N0C && N0C->isNullValue())
3604     return N0;
3605
3606   // X / X -> 1
3607   // X % X -> 0
3608   if (N0 == N1)
3609     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3610
3611   // X / 1 -> X
3612   // X % 1 -> 0
3613   // If this is a boolean op (single-bit element type), we can't have
3614   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3615   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3616   // it's a 1.
3617   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3618     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3619
3620   return SDValue();
3621 }
3622
3623 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3624   SDValue N0 = N->getOperand(0);
3625   SDValue N1 = N->getOperand(1);
3626   EVT VT = N->getValueType(0);
3627   EVT CCVT = getSetCCResultType(VT);
3628
3629   // fold vector ops
3630   if (VT.isVector())
3631     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3632       return FoldedVOp;
3633
3634   SDLoc DL(N);
3635
3636   // fold (sdiv c1, c2) -> c1/c2
3637   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3638   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3639   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3640     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3641   // fold (sdiv X, -1) -> 0-X
3642   if (N1C && N1C->isAllOnesValue())
3643     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3644   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3645   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3646     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3647                          DAG.getConstant(1, DL, VT),
3648                          DAG.getConstant(0, DL, VT));
3649
3650   if (SDValue V = simplifyDivRem(N, DAG))
3651     return V;
3652
3653   if (SDValue NewSel = foldBinOpIntoSelect(N))
3654     return NewSel;
3655
3656   // If we know the sign bits of both operands are zero, strength reduce to a
3657   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3658   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3659     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3660
3661   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3662     // If the corresponding remainder node exists, update its users with
3663     // (Dividend - (Quotient * Divisor).
3664     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3665                                               { N0, N1 })) {
3666       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3667       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3668       AddToWorklist(Mul.getNode());
3669       AddToWorklist(Sub.getNode());
3670       CombineTo(RemNode, Sub);
3671     }
3672     return V;
3673   }
3674
3675   // sdiv, srem -> sdivrem
3676   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3677   // true.  Otherwise, we break the simplification logic in visitREM().
3678   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3679   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3680     if (SDValue DivRem = useDivRem(N))
3681         return DivRem;
3682
3683   return SDValue();
3684 }
3685
3686 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3687   SDLoc DL(N);
3688   EVT VT = N->getValueType(0);
3689   EVT CCVT = getSetCCResultType(VT);
3690   unsigned BitWidth = VT.getScalarSizeInBits();
3691
3692   // Helper for determining whether a value is a power-2 constant scalar or a
3693   // vector of such elements.
3694   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3695     if (C->isNullValue() || C->isOpaque())
3696       return false;
3697     if (C->getAPIntValue().isPowerOf2())
3698       return true;
3699     if ((-C->getAPIntValue()).isPowerOf2())
3700       return true;
3701     return false;
3702   };
3703
3704   // fold (sdiv X, pow2) -> simple ops after legalize
3705   // FIXME: We check for the exact bit here because the generic lowering gives
3706   // better results in that case. The target-specific lowering should learn how
3707   // to handle exact sdivs efficiently.
3708   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3709     // Target-specific implementation of sdiv x, pow2.
3710     if (SDValue Res = BuildSDIVPow2(N))
3711       return Res;
3712
3713     // Create constants that are functions of the shift amount value.
3714     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3715     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3716     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3717     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3718     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3719     if (!isConstantOrConstantVector(Inexact))
3720       return SDValue();
3721
3722     // Splat the sign bit into the register
3723     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3724                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3725     AddToWorklist(Sign.getNode());
3726
3727     // Add (N0 < 0) ? abs2 - 1 : 0;
3728     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3729     AddToWorklist(Srl.getNode());
3730     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3731     AddToWorklist(Add.getNode());
3732     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3733     AddToWorklist(Sra.getNode());
3734
3735     // Special case: (sdiv X, 1) -> X
3736     // Special Case: (sdiv X, -1) -> 0-X
3737     SDValue One = DAG.getConstant(1, DL, VT);
3738     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3739     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3740     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3741     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3742     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3743
3744     // If dividing by a positive value, we're done. Otherwise, the result must
3745     // be negated.
3746     SDValue Zero = DAG.getConstant(0, DL, VT);
3747     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3748
3749     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3750     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3751     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3752     return Res;
3753   }
3754
3755   // If integer divide is expensive and we satisfy the requirements, emit an
3756   // alternate sequence.  Targets may check function attributes for size/speed
3757   // trade-offs.
3758   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3759   if (isConstantOrConstantVector(N1) &&
3760       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3761     if (SDValue Op = BuildSDIV(N))
3762       return Op;
3763
3764   return SDValue();
3765 }
3766
3767 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3768   SDValue N0 = N->getOperand(0);
3769   SDValue N1 = N->getOperand(1);
3770   EVT VT = N->getValueType(0);
3771   EVT CCVT = getSetCCResultType(VT);
3772
3773   // fold vector ops
3774   if (VT.isVector())
3775     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3776       return FoldedVOp;
3777
3778   SDLoc DL(N);
3779
3780   // fold (udiv c1, c2) -> c1/c2
3781   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3782   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3783   if (N0C && N1C)
3784     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
3785                                                     N0C, N1C))
3786       return Folded;
3787   // fold (udiv X, -1) -> select(X == -1, 1, 0)
3788   if (N1C && N1C->getAPIntValue().isAllOnesValue())
3789     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3790                          DAG.getConstant(1, DL, VT),
3791                          DAG.getConstant(0, DL, VT));
3792
3793   if (SDValue V = simplifyDivRem(N, DAG))
3794     return V;
3795
3796   if (SDValue NewSel = foldBinOpIntoSelect(N))
3797     return NewSel;
3798
3799   if (SDValue V = visitUDIVLike(N0, N1, N)) {
3800     // If the corresponding remainder node exists, update its users with
3801     // (Dividend - (Quotient * Divisor).
3802     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
3803                                               { N0, N1 })) {
3804       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3805       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3806       AddToWorklist(Mul.getNode());
3807       AddToWorklist(Sub.getNode());
3808       CombineTo(RemNode, Sub);
3809     }
3810     return V;
3811   }
3812
3813   // sdiv, srem -> sdivrem
3814   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3815   // true.  Otherwise, we break the simplification logic in visitREM().
3816   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3817   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3818     if (SDValue DivRem = useDivRem(N))
3819         return DivRem;
3820
3821   return SDValue();
3822 }
3823
3824 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3825   SDLoc DL(N);
3826   EVT VT = N->getValueType(0);
3827
3828   // fold (udiv x, (1 << c)) -> x >>u c
3829   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3830       DAG.isKnownToBeAPowerOfTwo(N1)) {
3831     SDValue LogBase2 = BuildLogBase2(N1, DL);
3832     AddToWorklist(LogBase2.getNode());
3833
3834     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3835     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3836     AddToWorklist(Trunc.getNode());
3837     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
3838   }
3839
3840   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
3841   if (N1.getOpcode() == ISD::SHL) {
3842     SDValue N10 = N1.getOperand(0);
3843     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
3844         DAG.isKnownToBeAPowerOfTwo(N10)) {
3845       SDValue LogBase2 = BuildLogBase2(N10, DL);
3846       AddToWorklist(LogBase2.getNode());
3847
3848       EVT ADDVT = N1.getOperand(1).getValueType();
3849       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
3850       AddToWorklist(Trunc.getNode());
3851       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
3852       AddToWorklist(Add.getNode());
3853       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
3854     }
3855   }
3856
3857   // fold (udiv x, c) -> alternate
3858   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3859   if (isConstantOrConstantVector(N1) &&
3860       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3861     if (SDValue Op = BuildUDIV(N))
3862       return Op;
3863
3864   return SDValue();
3865 }
3866
3867 // handles ISD::SREM and ISD::UREM
3868 SDValue DAGCombiner::visitREM(SDNode *N) {
3869   unsigned Opcode = N->getOpcode();
3870   SDValue N0 = N->getOperand(0);
3871   SDValue N1 = N->getOperand(1);
3872   EVT VT = N->getValueType(0);
3873   EVT CCVT = getSetCCResultType(VT);
3874
3875   bool isSigned = (Opcode == ISD::SREM);
3876   SDLoc DL(N);
3877
3878   // fold (rem c1, c2) -> c1%c2
3879   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3880   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3881   if (N0C && N1C)
3882     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
3883       return Folded;
3884   // fold (urem X, -1) -> select(X == -1, 0, x)
3885   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
3886     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3887                          DAG.getConstant(0, DL, VT), N0);
3888
3889   if (SDValue V = simplifyDivRem(N, DAG))
3890     return V;
3891
3892   if (SDValue NewSel = foldBinOpIntoSelect(N))
3893     return NewSel;
3894
3895   if (isSigned) {
3896     // If we know the sign bits of both operands are zero, strength reduce to a
3897     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
3898     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3899       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
3900   } else {
3901     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
3902     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
3903       // fold (urem x, pow2) -> (and x, pow2-1)
3904       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3905       AddToWorklist(Add.getNode());
3906       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3907     }
3908     if (N1.getOpcode() == ISD::SHL &&
3909         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
3910       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
3911       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
3912       AddToWorklist(Add.getNode());
3913       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
3914     }
3915   }
3916
3917   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3918
3919   // If X/C can be simplified by the division-by-constant logic, lower
3920   // X%C to the equivalent of X-X/C*C.
3921   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
3922   // speculative DIV must not cause a DIVREM conversion.  We guard against this
3923   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
3924   // combine will not return a DIVREM.  Regardless, checking cheapness here
3925   // makes sense since the simplification results in fatter code.
3926   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
3927     SDValue OptimizedDiv =
3928         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
3929     if (OptimizedDiv.getNode()) {
3930       // If the equivalent Div node also exists, update its users.
3931       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3932       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
3933                                                 { N0, N1 }))
3934         CombineTo(DivNode, OptimizedDiv);
3935       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
3936       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3937       AddToWorklist(OptimizedDiv.getNode());
3938       AddToWorklist(Mul.getNode());
3939       return Sub;
3940     }
3941   }
3942
3943   // sdiv, srem -> sdivrem
3944   if (SDValue DivRem = useDivRem(N))
3945     return DivRem.getValue(1);
3946
3947   return SDValue();
3948 }
3949
3950 SDValue DAGCombiner::visitMULHS(SDNode *N) {
3951   SDValue N0 = N->getOperand(0);
3952   SDValue N1 = N->getOperand(1);
3953   EVT VT = N->getValueType(0);
3954   SDLoc DL(N);
3955
3956   if (VT.isVector()) {
3957     // fold (mulhs x, 0) -> 0
3958     // do not return N0/N1, because undef node may exist.
3959     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
3960         ISD::isBuildVectorAllZeros(N1.getNode()))
3961       return DAG.getConstant(0, DL, VT);
3962   }
3963
3964   // fold (mulhs x, 0) -> 0
3965   if (isNullConstant(N1))
3966     return N1;
3967   // fold (mulhs x, 1) -> (sra x, size(x)-1)
3968   if (isOneConstant(N1))
3969     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
3970                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
3971                                        getShiftAmountTy(N0.getValueType())));
3972
3973   // fold (mulhs x, undef) -> 0
3974   if (N0.isUndef() || N1.isUndef())
3975     return DAG.getConstant(0, DL, VT);
3976
3977   // If the type twice as wide is legal, transform the mulhs to a wider multiply
3978   // plus a shift.
3979   if (VT.isSimple() && !VT.isVector()) {
3980     MVT Simple = VT.getSimpleVT();
3981     unsigned SimpleSize = Simple.getSizeInBits();
3982     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
3983     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
3984       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
3985       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
3986       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
3987       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
3988             DAG.getConstant(SimpleSize, DL,
3989                             getShiftAmountTy(N1.getValueType())));
3990       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
3991     }
3992   }
3993
3994   return SDValue();
3995 }
3996
3997 SDValue DAGCombiner::visitMULHU(SDNode *N) {
3998   SDValue N0 = N->getOperand(0);
3999   SDValue N1 = N->getOperand(1);
4000   EVT VT = N->getValueType(0);
4001   SDLoc DL(N);
4002
4003   if (VT.isVector()) {
4004     // fold (mulhu x, 0) -> 0
4005     // do not return N0/N1, because undef node may exist.
4006     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4007         ISD::isBuildVectorAllZeros(N1.getNode()))
4008       return DAG.getConstant(0, DL, VT);
4009   }
4010
4011   // fold (mulhu x, 0) -> 0
4012   if (isNullConstant(N1))
4013     return N1;
4014   // fold (mulhu x, 1) -> 0
4015   if (isOneConstant(N1))
4016     return DAG.getConstant(0, DL, N0.getValueType());
4017   // fold (mulhu x, undef) -> 0
4018   if (N0.isUndef() || N1.isUndef())
4019     return DAG.getConstant(0, DL, VT);
4020
4021   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4022   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4023       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4024     unsigned NumEltBits = VT.getScalarSizeInBits();
4025     SDValue LogBase2 = BuildLogBase2(N1, DL);
4026     SDValue SRLAmt = DAG.getNode(
4027         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4028     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4029     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4030     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4031   }
4032
4033   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4034   // plus a shift.
4035   if (VT.isSimple() && !VT.isVector()) {
4036     MVT Simple = VT.getSimpleVT();
4037     unsigned SimpleSize = Simple.getSizeInBits();
4038     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4039     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4040       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4041       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4042       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4043       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4044             DAG.getConstant(SimpleSize, DL,
4045                             getShiftAmountTy(N1.getValueType())));
4046       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4047     }
4048   }
4049
4050   return SDValue();
4051 }
4052
4053 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4054 /// give the opcodes for the two computations that are being performed. Return
4055 /// true if a simplification was made.
4056 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4057                                                 unsigned HiOp) {
4058   // If the high half is not needed, just compute the low half.
4059   bool HiExists = N->hasAnyUseOfValue(1);
4060   if (!HiExists && (!LegalOperations ||
4061                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4062     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4063     return CombineTo(N, Res, Res);
4064   }
4065
4066   // If the low half is not needed, just compute the high half.
4067   bool LoExists = N->hasAnyUseOfValue(0);
4068   if (!LoExists && (!LegalOperations ||
4069                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4070     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4071     return CombineTo(N, Res, Res);
4072   }
4073
4074   // If both halves are used, return as it is.
4075   if (LoExists && HiExists)
4076     return SDValue();
4077
4078   // If the two computed results can be simplified separately, separate them.
4079   if (LoExists) {
4080     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4081     AddToWorklist(Lo.getNode());
4082     SDValue LoOpt = combine(Lo.getNode());
4083     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4084         (!LegalOperations ||
4085          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4086       return CombineTo(N, LoOpt, LoOpt);
4087   }
4088
4089   if (HiExists) {
4090     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4091     AddToWorklist(Hi.getNode());
4092     SDValue HiOpt = combine(Hi.getNode());
4093     if (HiOpt.getNode() && HiOpt != Hi &&
4094         (!LegalOperations ||
4095          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4096       return CombineTo(N, HiOpt, HiOpt);
4097   }
4098
4099   return SDValue();
4100 }
4101
4102 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4103   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4104     return Res;
4105
4106   EVT VT = N->getValueType(0);
4107   SDLoc DL(N);
4108
4109   // If the type is twice as wide is legal, transform the mulhu to a wider
4110   // multiply plus a shift.
4111   if (VT.isSimple() && !VT.isVector()) {
4112     MVT Simple = VT.getSimpleVT();
4113     unsigned SimpleSize = Simple.getSizeInBits();
4114     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4115     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4116       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4117       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4118       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4119       // Compute the high part as N1.
4120       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4121             DAG.getConstant(SimpleSize, DL,
4122                             getShiftAmountTy(Lo.getValueType())));
4123       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4124       // Compute the low part as N0.
4125       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4126       return CombineTo(N, Lo, Hi);
4127     }
4128   }
4129
4130   return SDValue();
4131 }
4132
4133 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4134   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4135     return Res;
4136
4137   EVT VT = N->getValueType(0);
4138   SDLoc DL(N);
4139
4140   // (umul_lohi N0, 0) -> (0, 0)
4141   if (isNullConstant(N->getOperand(1))) {
4142     SDValue Zero = DAG.getConstant(0, DL, VT);
4143     return CombineTo(N, Zero, Zero);
4144   }
4145
4146   // (umul_lohi N0, 1) -> (N0, 0)
4147   if (isOneConstant(N->getOperand(1))) {
4148     SDValue Zero = DAG.getConstant(0, DL, VT);
4149     return CombineTo(N, N->getOperand(0), Zero);
4150   }
4151
4152   // If the type is twice as wide is legal, transform the mulhu to a wider
4153   // multiply plus a shift.
4154   if (VT.isSimple() && !VT.isVector()) {
4155     MVT Simple = VT.getSimpleVT();
4156     unsigned SimpleSize = Simple.getSizeInBits();
4157     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4158     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4159       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4160       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4161       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4162       // Compute the high part as N1.
4163       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4164             DAG.getConstant(SimpleSize, DL,
4165                             getShiftAmountTy(Lo.getValueType())));
4166       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4167       // Compute the low part as N0.
4168       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4169       return CombineTo(N, Lo, Hi);
4170     }
4171   }
4172
4173   return SDValue();
4174 }
4175
4176 SDValue DAGCombiner::visitMULO(SDNode *N) {
4177   SDValue N0 = N->getOperand(0);
4178   SDValue N1 = N->getOperand(1);
4179   EVT VT = N0.getValueType();
4180   bool IsSigned = (ISD::SMULO == N->getOpcode());
4181
4182   EVT CarryVT = N->getValueType(1);
4183   SDLoc DL(N);
4184
4185   // canonicalize constant to RHS.
4186   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4187       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4188     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
4189
4190   // fold (mulo x, 0) -> 0 + no carry out
4191   if (isNullOrNullSplat(N1))
4192     return CombineTo(N, DAG.getConstant(0, DL, VT),
4193                      DAG.getConstant(0, DL, CarryVT));
4194
4195   // (mulo x, 2) -> (addo x, x)
4196   if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
4197     if (C2->getAPIntValue() == 2)
4198       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
4199                          N->getVTList(), N0, N0);
4200
4201   return SDValue();
4202 }
4203
4204 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4205   SDValue N0 = N->getOperand(0);
4206   SDValue N1 = N->getOperand(1);
4207   EVT VT = N0.getValueType();
4208
4209   // fold vector ops
4210   if (VT.isVector())
4211     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4212       return FoldedVOp;
4213
4214   // fold operation with constant operands.
4215   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4216   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4217   if (N0C && N1C)
4218     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4219
4220   // canonicalize constant to RHS
4221   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4222      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4223     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4224
4225   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4226   // Only do this if the current op isn't legal and the flipped is.
4227   unsigned Opcode = N->getOpcode();
4228   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4229   if (!TLI.isOperationLegal(Opcode, VT) &&
4230       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4231       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4232     unsigned AltOpcode;
4233     switch (Opcode) {
4234     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4235     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4236     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4237     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4238     default: llvm_unreachable("Unknown MINMAX opcode");
4239     }
4240     if (TLI.isOperationLegal(AltOpcode, VT))
4241       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4242   }
4243
4244   return SDValue();
4245 }
4246
4247 /// If this is a bitwise logic instruction and both operands have the same
4248 /// opcode, try to sink the other opcode after the logic instruction.
4249 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4250   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4251   EVT VT = N0.getValueType();
4252   unsigned LogicOpcode = N->getOpcode();
4253   unsigned HandOpcode = N0.getOpcode();
4254   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4255           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4256   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4257
4258   // Bail early if none of these transforms apply.
4259   if (N0.getNumOperands() == 0)
4260     return SDValue();
4261
4262   // FIXME: We should check number of uses of the operands to not increase
4263   //        the instruction count for all transforms.
4264
4265   // Handle size-changing casts.
4266   SDValue X = N0.getOperand(0);
4267   SDValue Y = N1.getOperand(0);
4268   EVT XVT = X.getValueType();
4269   SDLoc DL(N);
4270   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4271       HandOpcode == ISD::SIGN_EXTEND) {
4272     // If both operands have other uses, this transform would create extra
4273     // instructions without eliminating anything.
4274     if (!N0.hasOneUse() && !N1.hasOneUse())
4275       return SDValue();
4276     // We need matching integer source types.
4277     if (XVT != Y.getValueType())
4278       return SDValue();
4279     // Don't create an illegal op during or after legalization. Don't ever
4280     // create an unsupported vector op.
4281     if ((VT.isVector() || LegalOperations) &&
4282         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4283       return SDValue();
4284     // Avoid infinite looping with PromoteIntBinOp.
4285     // TODO: Should we apply desirable/legal constraints to all opcodes?
4286     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4287         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4288       return SDValue();
4289     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4290     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4291     return DAG.getNode(HandOpcode, DL, VT, Logic);
4292   }
4293
4294   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4295   if (HandOpcode == ISD::TRUNCATE) {
4296     // If both operands have other uses, this transform would create extra
4297     // instructions without eliminating anything.
4298     if (!N0.hasOneUse() && !N1.hasOneUse())
4299       return SDValue();
4300     // We need matching source types.
4301     if (XVT != Y.getValueType())
4302       return SDValue();
4303     // Don't create an illegal op during or after legalization.
4304     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4305       return SDValue();
4306     // Be extra careful sinking truncate. If it's free, there's no benefit in
4307     // widening a binop. Also, don't create a logic op on an illegal type.
4308     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4309       return SDValue();
4310     if (!TLI.isTypeLegal(XVT))
4311       return SDValue();
4312     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4313     return DAG.getNode(HandOpcode, DL, VT, Logic);
4314   }
4315
4316   // For binops SHL/SRL/SRA/AND:
4317   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4318   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4319        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4320       N0.getOperand(1) == N1.getOperand(1)) {
4321     // If either operand has other uses, this transform is not an improvement.
4322     if (!N0.hasOneUse() || !N1.hasOneUse())
4323       return SDValue();
4324     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4325     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4326   }
4327
4328   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4329   if (HandOpcode == ISD::BSWAP) {
4330     // If either operand has other uses, this transform is not an improvement.
4331     if (!N0.hasOneUse() || !N1.hasOneUse())
4332       return SDValue();
4333     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4334     return DAG.getNode(HandOpcode, DL, VT, Logic);
4335   }
4336
4337   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4338   // Only perform this optimization up until type legalization, before
4339   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4340   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4341   // we don't want to undo this promotion.
4342   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4343   // on scalars.
4344   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4345        Level <= AfterLegalizeTypes) {
4346     // Input types must be integer and the same.
4347     if (XVT.isInteger() && XVT == Y.getValueType() &&
4348         !(VT.isVector() && TLI.isTypeLegal(VT) &&
4349           !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
4350       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4351       return DAG.getNode(HandOpcode, DL, VT, Logic);
4352     }
4353   }
4354
4355   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4356   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4357   // If both shuffles use the same mask, and both shuffle within a single
4358   // vector, then it is worthwhile to move the swizzle after the operation.
4359   // The type-legalizer generates this pattern when loading illegal
4360   // vector types from memory. In many cases this allows additional shuffle
4361   // optimizations.
4362   // There are other cases where moving the shuffle after the xor/and/or
4363   // is profitable even if shuffles don't perform a swizzle.
4364   // If both shuffles use the same mask, and both shuffles have the same first
4365   // or second operand, then it might still be profitable to move the shuffle
4366   // after the xor/and/or operation.
4367   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4368     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4369     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4370     assert(X.getValueType() == Y.getValueType() &&
4371            "Inputs to shuffles are not the same type");
4372
4373     // Check that both shuffles use the same mask. The masks are known to be of
4374     // the same length because the result vector type is the same.
4375     // Check also that shuffles have only one use to avoid introducing extra
4376     // instructions.
4377     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4378         !SVN0->getMask().equals(SVN1->getMask()))
4379       return SDValue();
4380
4381     // Don't try to fold this node if it requires introducing a
4382     // build vector of all zeros that might be illegal at this stage.
4383     SDValue ShOp = N0.getOperand(1);
4384     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4385       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4386
4387     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4388     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4389       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4390                                   N0.getOperand(0), N1.getOperand(0));
4391       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4392     }
4393
4394     // Don't try to fold this node if it requires introducing a
4395     // build vector of all zeros that might be illegal at this stage.
4396     ShOp = N0.getOperand(0);
4397     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4398       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4399
4400     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4401     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4402       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4403                                   N1.getOperand(1));
4404       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4405     }
4406   }
4407
4408   return SDValue();
4409 }
4410
4411 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4412 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4413                                        const SDLoc &DL) {
4414   SDValue LL, LR, RL, RR, N0CC, N1CC;
4415   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4416       !isSetCCEquivalent(N1, RL, RR, N1CC))
4417     return SDValue();
4418
4419   assert(N0.getValueType() == N1.getValueType() &&
4420          "Unexpected operand types for bitwise logic op");
4421   assert(LL.getValueType() == LR.getValueType() &&
4422          RL.getValueType() == RR.getValueType() &&
4423          "Unexpected operand types for setcc");
4424
4425   // If we're here post-legalization or the logic op type is not i1, the logic
4426   // op type must match a setcc result type. Also, all folds require new
4427   // operations on the left and right operands, so those types must match.
4428   EVT VT = N0.getValueType();
4429   EVT OpVT = LL.getValueType();
4430   if (LegalOperations || VT.getScalarType() != MVT::i1)
4431     if (VT != getSetCCResultType(OpVT))
4432       return SDValue();
4433   if (OpVT != RL.getValueType())
4434     return SDValue();
4435
4436   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4437   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4438   bool IsInteger = OpVT.isInteger();
4439   if (LR == RR && CC0 == CC1 && IsInteger) {
4440     bool IsZero = isNullOrNullSplat(LR);
4441     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4442
4443     // All bits clear?
4444     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4445     // All sign bits clear?
4446     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4447     // Any bits set?
4448     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4449     // Any sign bits set?
4450     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4451
4452     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4453     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4454     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4455     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4456     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4457       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4458       AddToWorklist(Or.getNode());
4459       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4460     }
4461
4462     // All bits set?
4463     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4464     // All sign bits set?
4465     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4466     // Any bits clear?
4467     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4468     // Any sign bits clear?
4469     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4470
4471     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4472     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4473     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4474     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4475     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4476       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4477       AddToWorklist(And.getNode());
4478       return DAG.getSetCC(DL, VT, And, LR, CC1);
4479     }
4480   }
4481
4482   // TODO: What is the 'or' equivalent of this fold?
4483   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4484   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4485       IsInteger && CC0 == ISD::SETNE &&
4486       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4487        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4488     SDValue One = DAG.getConstant(1, DL, OpVT);
4489     SDValue Two = DAG.getConstant(2, DL, OpVT);
4490     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4491     AddToWorklist(Add.getNode());
4492     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4493   }
4494
4495   // Try more general transforms if the predicates match and the only user of
4496   // the compares is the 'and' or 'or'.
4497   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4498       N0.hasOneUse() && N1.hasOneUse()) {
4499     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4500     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4501     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4502       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4503       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4504       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4505       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4506       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4507     }
4508
4509     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4510     // TODO - support non-uniform vector amounts.
4511     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4512       // Match a shared variable operand and 2 non-opaque constant operands.
4513       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4514       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4515       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4516         // Canonicalize larger constant as C0.
4517         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4518           std::swap(C0, C1);
4519
4520         // The difference of the constants must be a single bit.
4521         const APInt &C0Val = C0->getAPIntValue();
4522         const APInt &C1Val = C1->getAPIntValue();
4523         if ((C0Val - C1Val).isPowerOf2()) {
4524           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4525           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4526           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4527           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4528           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4529           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4530           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4531           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4532         }
4533       }
4534     }
4535   }
4536
4537   // Canonicalize equivalent operands to LL == RL.
4538   if (LL == RR && LR == RL) {
4539     CC1 = ISD::getSetCCSwappedOperands(CC1);
4540     std::swap(RL, RR);
4541   }
4542
4543   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4544   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4545   if (LL == RL && LR == RR) {
4546     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4547                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4548     if (NewCC != ISD::SETCC_INVALID &&
4549         (!LegalOperations ||
4550          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4551           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4552       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4553   }
4554
4555   return SDValue();
4556 }
4557
4558 /// This contains all DAGCombine rules which reduce two values combined by
4559 /// an And operation to a single value. This makes them reusable in the context
4560 /// of visitSELECT(). Rules involving constants are not included as
4561 /// visitSELECT() already handles those cases.
4562 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4563   EVT VT = N1.getValueType();
4564   SDLoc DL(N);
4565
4566   // fold (and x, undef) -> 0
4567   if (N0.isUndef() || N1.isUndef())
4568     return DAG.getConstant(0, DL, VT);
4569
4570   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4571     return V;
4572
4573   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4574       VT.getSizeInBits() <= 64) {
4575     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4576       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4577         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4578         // immediate for an add, but it is legal if its top c2 bits are set,
4579         // transform the ADD so the immediate doesn't need to be materialized
4580         // in a register.
4581         APInt ADDC = ADDI->getAPIntValue();
4582         APInt SRLC = SRLI->getAPIntValue();
4583         if (ADDC.getMinSignedBits() <= 64 &&
4584             SRLC.ult(VT.getSizeInBits()) &&
4585             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4586           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4587                                              SRLC.getZExtValue());
4588           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4589             ADDC |= Mask;
4590             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4591               SDLoc DL0(N0);
4592               SDValue NewAdd =
4593                 DAG.getNode(ISD::ADD, DL0, VT,
4594                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4595               CombineTo(N0.getNode(), NewAdd);
4596               // Return N so it doesn't get rechecked!
4597               return SDValue(N, 0);
4598             }
4599           }
4600         }
4601       }
4602     }
4603   }
4604
4605   // Reduce bit extract of low half of an integer to the narrower type.
4606   // (and (srl i64:x, K), KMask) ->
4607   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4608   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4609     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4610       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4611         unsigned Size = VT.getSizeInBits();
4612         const APInt &AndMask = CAnd->getAPIntValue();
4613         unsigned ShiftBits = CShift->getZExtValue();
4614
4615         // Bail out, this node will probably disappear anyway.
4616         if (ShiftBits == 0)
4617           return SDValue();
4618
4619         unsigned MaskBits = AndMask.countTrailingOnes();
4620         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4621
4622         if (AndMask.isMask() &&
4623             // Required bits must not span the two halves of the integer and
4624             // must fit in the half size type.
4625             (ShiftBits + MaskBits <= Size / 2) &&
4626             TLI.isNarrowingProfitable(VT, HalfVT) &&
4627             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4628             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4629             TLI.isTruncateFree(VT, HalfVT) &&
4630             TLI.isZExtFree(HalfVT, VT)) {
4631           // The isNarrowingProfitable is to avoid regressions on PPC and
4632           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4633           // on downstream users of this. Those patterns could probably be
4634           // extended to handle extensions mixed in.
4635
4636           SDValue SL(N0);
4637           assert(MaskBits <= Size);
4638
4639           // Extracting the highest bit of the low half.
4640           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4641           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4642                                       N0.getOperand(0));
4643
4644           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4645           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4646           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4647           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4648           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4649         }
4650       }
4651     }
4652   }
4653
4654   return SDValue();
4655 }
4656
4657 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4658                                    EVT LoadResultTy, EVT &ExtVT) {
4659   if (!AndC->getAPIntValue().isMask())
4660     return false;
4661
4662   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4663
4664   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4665   EVT LoadedVT = LoadN->getMemoryVT();
4666
4667   if (ExtVT == LoadedVT &&
4668       (!LegalOperations ||
4669        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4670     // ZEXTLOAD will match without needing to change the size of the value being
4671     // loaded.
4672     return true;
4673   }
4674
4675   // Do not change the width of a volatile or atomic loads.
4676   if (!LoadN->isSimple())
4677     return false;
4678
4679   // Do not generate loads of non-round integer types since these can
4680   // be expensive (and would be wrong if the type is not byte sized).
4681   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4682     return false;
4683
4684   if (LegalOperations &&
4685       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4686     return false;
4687
4688   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4689     return false;
4690
4691   return true;
4692 }
4693
4694 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4695                                     ISD::LoadExtType ExtType, EVT &MemVT,
4696                                     unsigned ShAmt) {
4697   if (!LDST)
4698     return false;
4699   // Only allow byte offsets.
4700   if (ShAmt % 8)
4701     return false;
4702
4703   // Do not generate loads of non-round integer types since these can
4704   // be expensive (and would be wrong if the type is not byte sized).
4705   if (!MemVT.isRound())
4706     return false;
4707
4708   // Don't change the width of a volatile or atomic loads.
4709   if (!LDST->isSimple())
4710     return false;
4711
4712   // Verify that we are actually reducing a load width here.
4713   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4714     return false;
4715
4716   // Ensure that this isn't going to produce an unsupported memory access.
4717   if (ShAmt &&
4718       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4719                               LDST->getAddressSpace(), ShAmt / 8,
4720                               LDST->getMemOperand()->getFlags()))
4721     return false;
4722
4723   // It's not possible to generate a constant of extended or untyped type.
4724   EVT PtrType = LDST->getBasePtr().getValueType();
4725   if (PtrType == MVT::Untyped || PtrType.isExtended())
4726     return false;
4727
4728   if (isa<LoadSDNode>(LDST)) {
4729     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4730     // Don't transform one with multiple uses, this would require adding a new
4731     // load.
4732     if (!SDValue(Load, 0).hasOneUse())
4733       return false;
4734
4735     if (LegalOperations &&
4736         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4737       return false;
4738
4739     // For the transform to be legal, the load must produce only two values
4740     // (the value loaded and the chain).  Don't transform a pre-increment
4741     // load, for example, which produces an extra value.  Otherwise the
4742     // transformation is not equivalent, and the downstream logic to replace
4743     // uses gets things wrong.
4744     if (Load->getNumValues() > 2)
4745       return false;
4746
4747     // If the load that we're shrinking is an extload and we're not just
4748     // discarding the extension we can't simply shrink the load. Bail.
4749     // TODO: It would be possible to merge the extensions in some cases.
4750     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4751         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4752       return false;
4753
4754     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4755       return false;
4756   } else {
4757     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4758     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4759     // Can't write outside the original store
4760     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4761       return false;
4762
4763     if (LegalOperations &&
4764         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4765       return false;
4766   }
4767   return true;
4768 }
4769
4770 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4771                                     SmallVectorImpl<LoadSDNode*> &Loads,
4772                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4773                                     ConstantSDNode *Mask,
4774                                     SDNode *&NodeToMask) {
4775   // Recursively search for the operands, looking for loads which can be
4776   // narrowed.
4777   for (SDValue Op : N->op_values()) {
4778     if (Op.getValueType().isVector())
4779       return false;
4780
4781     // Some constants may need fixing up later if they are too large.
4782     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4783       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4784           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4785         NodesWithConsts.insert(N);
4786       continue;
4787     }
4788
4789     if (!Op.hasOneUse())
4790       return false;
4791
4792     switch(Op.getOpcode()) {
4793     case ISD::LOAD: {
4794       auto *Load = cast<LoadSDNode>(Op);
4795       EVT ExtVT;
4796       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
4797           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
4798
4799         // ZEXTLOAD is already small enough.
4800         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
4801             ExtVT.bitsGE(Load->getMemoryVT()))
4802           continue;
4803
4804         // Use LE to convert equal sized loads to zext.
4805         if (ExtVT.bitsLE(Load->getMemoryVT()))
4806           Loads.push_back(Load);
4807
4808         continue;
4809       }
4810       return false;
4811     }
4812     case ISD::ZERO_EXTEND:
4813     case ISD::AssertZext: {
4814       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
4815       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4816       EVT VT = Op.getOpcode() == ISD::AssertZext ?
4817         cast<VTSDNode>(Op.getOperand(1))->getVT() :
4818         Op.getOperand(0).getValueType();
4819
4820       // We can accept extending nodes if the mask is wider or an equal
4821       // width to the original type.
4822       if (ExtVT.bitsGE(VT))
4823         continue;
4824       break;
4825     }
4826     case ISD::OR:
4827     case ISD::XOR:
4828     case ISD::AND:
4829       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
4830                              NodeToMask))
4831         return false;
4832       continue;
4833     }
4834
4835     // Allow one node which will masked along with any loads found.
4836     if (NodeToMask)
4837       return false;
4838
4839     // Also ensure that the node to be masked only produces one data result.
4840     NodeToMask = Op.getNode();
4841     if (NodeToMask->getNumValues() > 1) {
4842       bool HasValue = false;
4843       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
4844         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
4845         if (VT != MVT::Glue && VT != MVT::Other) {
4846           if (HasValue) {
4847             NodeToMask = nullptr;
4848             return false;
4849           }
4850           HasValue = true;
4851         }
4852       }
4853       assert(HasValue && "Node to be masked has no data result?");
4854     }
4855   }
4856   return true;
4857 }
4858
4859 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
4860   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
4861   if (!Mask)
4862     return false;
4863
4864   if (!Mask->getAPIntValue().isMask())
4865     return false;
4866
4867   // No need to do anything if the and directly uses a load.
4868   if (isa<LoadSDNode>(N->getOperand(0)))
4869     return false;
4870
4871   SmallVector<LoadSDNode*, 8> Loads;
4872   SmallPtrSet<SDNode*, 2> NodesWithConsts;
4873   SDNode *FixupNode = nullptr;
4874   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
4875     if (Loads.size() == 0)
4876       return false;
4877
4878     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
4879     SDValue MaskOp = N->getOperand(1);
4880
4881     // If it exists, fixup the single node we allow in the tree that needs
4882     // masking.
4883     if (FixupNode) {
4884       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
4885       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
4886                                 FixupNode->getValueType(0),
4887                                 SDValue(FixupNode, 0), MaskOp);
4888       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
4889       if (And.getOpcode() == ISD ::AND)
4890         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
4891     }
4892
4893     // Narrow any constants that need it.
4894     for (auto *LogicN : NodesWithConsts) {
4895       SDValue Op0 = LogicN->getOperand(0);
4896       SDValue Op1 = LogicN->getOperand(1);
4897
4898       if (isa<ConstantSDNode>(Op0))
4899           std::swap(Op0, Op1);
4900
4901       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
4902                                 Op1, MaskOp);
4903
4904       DAG.UpdateNodeOperands(LogicN, Op0, And);
4905     }
4906
4907     // Create narrow loads.
4908     for (auto *Load : Loads) {
4909       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
4910       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
4911                                 SDValue(Load, 0), MaskOp);
4912       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
4913       if (And.getOpcode() == ISD ::AND)
4914         And = SDValue(
4915             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
4916       SDValue NewLoad = ReduceLoadWidth(And.getNode());
4917       assert(NewLoad &&
4918              "Shouldn't be masking the load if it can't be narrowed");
4919       CombineTo(Load, NewLoad, NewLoad.getValue(1));
4920     }
4921     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
4922     return true;
4923   }
4924   return false;
4925 }
4926
4927 // Unfold
4928 //    x &  (-1 'logical shift' y)
4929 // To
4930 //    (x 'opposite logical shift' y) 'logical shift' y
4931 // if it is better for performance.
4932 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
4933   assert(N->getOpcode() == ISD::AND);
4934
4935   SDValue N0 = N->getOperand(0);
4936   SDValue N1 = N->getOperand(1);
4937
4938   // Do we actually prefer shifts over mask?
4939   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
4940     return SDValue();
4941
4942   // Try to match  (-1 '[outer] logical shift' y)
4943   unsigned OuterShift;
4944   unsigned InnerShift; // The opposite direction to the OuterShift.
4945   SDValue Y;           // Shift amount.
4946   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
4947     if (!M.hasOneUse())
4948       return false;
4949     OuterShift = M->getOpcode();
4950     if (OuterShift == ISD::SHL)
4951       InnerShift = ISD::SRL;
4952     else if (OuterShift == ISD::SRL)
4953       InnerShift = ISD::SHL;
4954     else
4955       return false;
4956     if (!isAllOnesConstant(M->getOperand(0)))
4957       return false;
4958     Y = M->getOperand(1);
4959     return true;
4960   };
4961
4962   SDValue X;
4963   if (matchMask(N1))
4964     X = N0;
4965   else if (matchMask(N0))
4966     X = N1;
4967   else
4968     return SDValue();
4969
4970   SDLoc DL(N);
4971   EVT VT = N->getValueType(0);
4972
4973   //     tmp = x   'opposite logical shift' y
4974   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
4975   //     ret = tmp 'logical shift' y
4976   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
4977
4978   return T1;
4979 }
4980
4981 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
4982 /// For a target with a bit test, this is expected to become test + set and save
4983 /// at least 1 instruction.
4984 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
4985   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
4986
4987   // This is probably not worthwhile without a supported type.
4988   EVT VT = And->getValueType(0);
4989   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4990   if (!TLI.isTypeLegal(VT))
4991     return SDValue();
4992
4993   // Look through an optional extension and find a 'not'.
4994   // TODO: Should we favor test+set even without the 'not' op?
4995   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
4996   if (Not.getOpcode() == ISD::ANY_EXTEND)
4997     Not = Not.getOperand(0);
4998   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
4999     return SDValue();
5000
5001   // Look though an optional truncation. The source operand may not be the same
5002   // type as the original 'and', but that is ok because we are masking off
5003   // everything but the low bit.
5004   SDValue Srl = Not.getOperand(0);
5005   if (Srl.getOpcode() == ISD::TRUNCATE)
5006     Srl = Srl.getOperand(0);
5007
5008   // Match a shift-right by constant.
5009   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5010       !isa<ConstantSDNode>(Srl.getOperand(1)))
5011     return SDValue();
5012
5013   // We might have looked through casts that make this transform invalid.
5014   // TODO: If the source type is wider than the result type, do the mask and
5015   //       compare in the source type.
5016   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5017   unsigned VTBitWidth = VT.getSizeInBits();
5018   if (ShiftAmt.uge(VTBitWidth))
5019     return SDValue();
5020
5021   // Turn this into a bit-test pattern using mask op + setcc:
5022   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5023   SDLoc DL(And);
5024   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5025   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5026   SDValue Mask = DAG.getConstant(
5027       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5028   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5029   SDValue Zero = DAG.getConstant(0, DL, VT);
5030   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5031   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5032 }
5033
5034 SDValue DAGCombiner::visitAND(SDNode *N) {
5035   SDValue N0 = N->getOperand(0);
5036   SDValue N1 = N->getOperand(1);
5037   EVT VT = N1.getValueType();
5038
5039   // x & x --> x
5040   if (N0 == N1)
5041     return N0;
5042
5043   // fold vector ops
5044   if (VT.isVector()) {
5045     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5046       return FoldedVOp;
5047
5048     // fold (and x, 0) -> 0, vector edition
5049     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5050       // do not return N0, because undef node may exist in N0
5051       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5052                              SDLoc(N), N0.getValueType());
5053     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5054       // do not return N1, because undef node may exist in N1
5055       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5056                              SDLoc(N), N1.getValueType());
5057
5058     // fold (and x, -1) -> x, vector edition
5059     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5060       return N1;
5061     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5062       return N0;
5063   }
5064
5065   // fold (and c1, c2) -> c1&c2
5066   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5067   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5068   if (N0C && N1C && !N1C->isOpaque())
5069     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
5070   // canonicalize constant to RHS
5071   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5072       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5073     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5074   // fold (and x, -1) -> x
5075   if (isAllOnesConstant(N1))
5076     return N0;
5077   // if (and x, c) is known to be zero, return 0
5078   unsigned BitWidth = VT.getScalarSizeInBits();
5079   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5080                                    APInt::getAllOnesValue(BitWidth)))
5081     return DAG.getConstant(0, SDLoc(N), VT);
5082
5083   if (SDValue NewSel = foldBinOpIntoSelect(N))
5084     return NewSel;
5085
5086   // reassociate and
5087   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5088     return RAND;
5089
5090   // Try to convert a constant mask AND into a shuffle clear mask.
5091   if (VT.isVector())
5092     if (SDValue Shuffle = XformToShuffleWithZero(N))
5093       return Shuffle;
5094
5095   // fold (and (or x, C), D) -> D if (C & D) == D
5096   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5097     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5098   };
5099   if (N0.getOpcode() == ISD::OR &&
5100       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5101     return N1;
5102   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5103   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5104     SDValue N0Op0 = N0.getOperand(0);
5105     APInt Mask = ~N1C->getAPIntValue();
5106     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5107     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5108       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5109                                  N0.getValueType(), N0Op0);
5110
5111       // Replace uses of the AND with uses of the Zero extend node.
5112       CombineTo(N, Zext);
5113
5114       // We actually want to replace all uses of the any_extend with the
5115       // zero_extend, to avoid duplicating things.  This will later cause this
5116       // AND to be folded.
5117       CombineTo(N0.getNode(), Zext);
5118       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5119     }
5120   }
5121
5122   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5123   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5124   // already be zero by virtue of the width of the base type of the load.
5125   //
5126   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5127   // more cases.
5128   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5129        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5130        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5131        N0.getOperand(0).getResNo() == 0) ||
5132       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5133     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5134                                          N0 : N0.getOperand(0) );
5135
5136     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5137     // This can be a pure constant or a vector splat, in which case we treat the
5138     // vector as a scalar and use the splat value.
5139     APInt Constant = APInt::getNullValue(1);
5140     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5141       Constant = C->getAPIntValue();
5142     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5143       APInt SplatValue, SplatUndef;
5144       unsigned SplatBitSize;
5145       bool HasAnyUndefs;
5146       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5147                                              SplatBitSize, HasAnyUndefs);
5148       if (IsSplat) {
5149         // Undef bits can contribute to a possible optimisation if set, so
5150         // set them.
5151         SplatValue |= SplatUndef;
5152
5153         // The splat value may be something like "0x00FFFFFF", which means 0 for
5154         // the first vector value and FF for the rest, repeating. We need a mask
5155         // that will apply equally to all members of the vector, so AND all the
5156         // lanes of the constant together.
5157         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5158
5159         // If the splat value has been compressed to a bitlength lower
5160         // than the size of the vector lane, we need to re-expand it to
5161         // the lane size.
5162         if (EltBitWidth > SplatBitSize)
5163           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5164                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5165             SplatValue |= SplatValue.shl(SplatBitSize);
5166
5167         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5168         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5169         if ((SplatBitSize % EltBitWidth) == 0) {
5170           Constant = APInt::getAllOnesValue(EltBitWidth);
5171           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5172             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5173         }
5174       }
5175     }
5176
5177     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5178     // actually legal and isn't going to get expanded, else this is a false
5179     // optimisation.
5180     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5181                                                     Load->getValueType(0),
5182                                                     Load->getMemoryVT());
5183
5184     // Resize the constant to the same size as the original memory access before
5185     // extension. If it is still the AllOnesValue then this AND is completely
5186     // unneeded.
5187     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5188
5189     bool B;
5190     switch (Load->getExtensionType()) {
5191     default: B = false; break;
5192     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5193     case ISD::ZEXTLOAD:
5194     case ISD::NON_EXTLOAD: B = true; break;
5195     }
5196
5197     if (B && Constant.isAllOnesValue()) {
5198       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5199       // preserve semantics once we get rid of the AND.
5200       SDValue NewLoad(Load, 0);
5201
5202       // Fold the AND away. NewLoad may get replaced immediately.
5203       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5204
5205       if (Load->getExtensionType() == ISD::EXTLOAD) {
5206         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5207                               Load->getValueType(0), SDLoc(Load),
5208                               Load->getChain(), Load->getBasePtr(),
5209                               Load->getOffset(), Load->getMemoryVT(),
5210                               Load->getMemOperand());
5211         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5212         if (Load->getNumValues() == 3) {
5213           // PRE/POST_INC loads have 3 values.
5214           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5215                            NewLoad.getValue(2) };
5216           CombineTo(Load, To, 3, true);
5217         } else {
5218           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5219         }
5220       }
5221
5222       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5223     }
5224   }
5225
5226   // fold (and (load x), 255) -> (zextload x, i8)
5227   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5228   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5229   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5230                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5231                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5232     if (SDValue Res = ReduceLoadWidth(N)) {
5233       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5234         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5235       AddToWorklist(N);
5236       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5237       return SDValue(N, 0);
5238     }
5239   }
5240
5241   if (Level >= AfterLegalizeTypes) {
5242     // Attempt to propagate the AND back up to the leaves which, if they're
5243     // loads, can be combined to narrow loads and the AND node can be removed.
5244     // Perform after legalization so that extend nodes will already be
5245     // combined into the loads.
5246     if (BackwardsPropagateMask(N, DAG)) {
5247       return SDValue(N, 0);
5248     }
5249   }
5250
5251   if (SDValue Combined = visitANDLike(N0, N1, N))
5252     return Combined;
5253
5254   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5255   if (N0.getOpcode() == N1.getOpcode())
5256     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5257       return V;
5258
5259   // Masking the negated extension of a boolean is just the zero-extended
5260   // boolean:
5261   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5262   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5263   //
5264   // Note: the SimplifyDemandedBits fold below can make an information-losing
5265   // transform, and then we have no way to find this better fold.
5266   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5267     if (isNullOrNullSplat(N0.getOperand(0))) {
5268       SDValue SubRHS = N0.getOperand(1);
5269       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5270           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5271         return SubRHS;
5272       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5273           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5274         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5275     }
5276   }
5277
5278   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5279   // fold (and (sra)) -> (and (srl)) when possible.
5280   if (SimplifyDemandedBits(SDValue(N, 0)))
5281     return SDValue(N, 0);
5282
5283   // fold (zext_inreg (extload x)) -> (zextload x)
5284   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5285   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5286       (ISD::isEXTLoad(N0.getNode()) ||
5287        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5288     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5289     EVT MemVT = LN0->getMemoryVT();
5290     // If we zero all the possible extended bits, then we can turn this into
5291     // a zextload if we are running before legalize or the operation is legal.
5292     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5293     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5294     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5295     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5296         ((!LegalOperations && LN0->isSimple()) ||
5297          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5298       SDValue ExtLoad =
5299           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5300                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5301       AddToWorklist(N);
5302       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5303       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5304     }
5305   }
5306
5307   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5308   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5309     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5310                                            N0.getOperand(1), false))
5311       return BSwap;
5312   }
5313
5314   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5315     return Shifts;
5316
5317   if (TLI.hasBitTest(N0, N1))
5318     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5319       return V;
5320
5321   return SDValue();
5322 }
5323
5324 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5325 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5326                                         bool DemandHighBits) {
5327   if (!LegalOperations)
5328     return SDValue();
5329
5330   EVT VT = N->getValueType(0);
5331   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5332     return SDValue();
5333   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5334     return SDValue();
5335
5336   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5337   bool LookPassAnd0 = false;
5338   bool LookPassAnd1 = false;
5339   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5340       std::swap(N0, N1);
5341   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5342       std::swap(N0, N1);
5343   if (N0.getOpcode() == ISD::AND) {
5344     if (!N0.getNode()->hasOneUse())
5345       return SDValue();
5346     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5347     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5348     // This is needed for X86.
5349     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5350                   N01C->getZExtValue() != 0xFFFF))
5351       return SDValue();
5352     N0 = N0.getOperand(0);
5353     LookPassAnd0 = true;
5354   }
5355
5356   if (N1.getOpcode() == ISD::AND) {
5357     if (!N1.getNode()->hasOneUse())
5358       return SDValue();
5359     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5360     if (!N11C || N11C->getZExtValue() != 0xFF)
5361       return SDValue();
5362     N1 = N1.getOperand(0);
5363     LookPassAnd1 = true;
5364   }
5365
5366   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5367     std::swap(N0, N1);
5368   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5369     return SDValue();
5370   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5371     return SDValue();
5372
5373   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5374   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5375   if (!N01C || !N11C)
5376     return SDValue();
5377   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5378     return SDValue();
5379
5380   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5381   SDValue N00 = N0->getOperand(0);
5382   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5383     if (!N00.getNode()->hasOneUse())
5384       return SDValue();
5385     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5386     if (!N001C || N001C->getZExtValue() != 0xFF)
5387       return SDValue();
5388     N00 = N00.getOperand(0);
5389     LookPassAnd0 = true;
5390   }
5391
5392   SDValue N10 = N1->getOperand(0);
5393   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5394     if (!N10.getNode()->hasOneUse())
5395       return SDValue();
5396     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5397     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5398     // for X86.
5399     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5400                    N101C->getZExtValue() != 0xFFFF))
5401       return SDValue();
5402     N10 = N10.getOperand(0);
5403     LookPassAnd1 = true;
5404   }
5405
5406   if (N00 != N10)
5407     return SDValue();
5408
5409   // Make sure everything beyond the low halfword gets set to zero since the SRL
5410   // 16 will clear the top bits.
5411   unsigned OpSizeInBits = VT.getSizeInBits();
5412   if (DemandHighBits && OpSizeInBits > 16) {
5413     // If the left-shift isn't masked out then the only way this is a bswap is
5414     // if all bits beyond the low 8 are 0. In that case the entire pattern
5415     // reduces to a left shift anyway: leave it for other parts of the combiner.
5416     if (!LookPassAnd0)
5417       return SDValue();
5418
5419     // However, if the right shift isn't masked out then it might be because
5420     // it's not needed. See if we can spot that too.
5421     if (!LookPassAnd1 &&
5422         !DAG.MaskedValueIsZero(
5423             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5424       return SDValue();
5425   }
5426
5427   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5428   if (OpSizeInBits > 16) {
5429     SDLoc DL(N);
5430     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5431                       DAG.getConstant(OpSizeInBits - 16, DL,
5432                                       getShiftAmountTy(VT)));
5433   }
5434   return Res;
5435 }
5436
5437 /// Return true if the specified node is an element that makes up a 32-bit
5438 /// packed halfword byteswap.
5439 /// ((x & 0x000000ff) << 8) |
5440 /// ((x & 0x0000ff00) >> 8) |
5441 /// ((x & 0x00ff0000) << 8) |
5442 /// ((x & 0xff000000) >> 8)
5443 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5444   if (!N.getNode()->hasOneUse())
5445     return false;
5446
5447   unsigned Opc = N.getOpcode();
5448   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5449     return false;
5450
5451   SDValue N0 = N.getOperand(0);
5452   unsigned Opc0 = N0.getOpcode();
5453   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5454     return false;
5455
5456   ConstantSDNode *N1C = nullptr;
5457   // SHL or SRL: look upstream for AND mask operand
5458   if (Opc == ISD::AND)
5459     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5460   else if (Opc0 == ISD::AND)
5461     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5462   if (!N1C)
5463     return false;
5464
5465   unsigned MaskByteOffset;
5466   switch (N1C->getZExtValue()) {
5467   default:
5468     return false;
5469   case 0xFF:       MaskByteOffset = 0; break;
5470   case 0xFF00:     MaskByteOffset = 1; break;
5471   case 0xFFFF:
5472     // In case demanded bits didn't clear the bits that will be shifted out.
5473     // This is needed for X86.
5474     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5475       MaskByteOffset = 1;
5476       break;
5477     }
5478     return false;
5479   case 0xFF0000:   MaskByteOffset = 2; break;
5480   case 0xFF000000: MaskByteOffset = 3; break;
5481   }
5482
5483   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5484   if (Opc == ISD::AND) {
5485     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5486       // (x >> 8) & 0xff
5487       // (x >> 8) & 0xff0000
5488       if (Opc0 != ISD::SRL)
5489         return false;
5490       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5491       if (!C || C->getZExtValue() != 8)
5492         return false;
5493     } else {
5494       // (x << 8) & 0xff00
5495       // (x << 8) & 0xff000000
5496       if (Opc0 != ISD::SHL)
5497         return false;
5498       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5499       if (!C || C->getZExtValue() != 8)
5500         return false;
5501     }
5502   } else if (Opc == ISD::SHL) {
5503     // (x & 0xff) << 8
5504     // (x & 0xff0000) << 8
5505     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5506       return false;
5507     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5508     if (!C || C->getZExtValue() != 8)
5509       return false;
5510   } else { // Opc == ISD::SRL
5511     // (x & 0xff00) >> 8
5512     // (x & 0xff000000) >> 8
5513     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5514       return false;
5515     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5516     if (!C || C->getZExtValue() != 8)
5517       return false;
5518   }
5519
5520   if (Parts[MaskByteOffset])
5521     return false;
5522
5523   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5524   return true;
5525 }
5526
5527 // Match 2 elements of a packed halfword bswap.
5528 static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
5529   if (N.getOpcode() == ISD::OR)
5530     return isBSwapHWordElement(N.getOperand(0), Parts) &&
5531            isBSwapHWordElement(N.getOperand(1), Parts);
5532
5533   if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
5534     ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
5535     if (!C || C->getAPIntValue() != 16)
5536       return false;
5537     Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
5538     return true;
5539   }
5540
5541   return false;
5542 }
5543
5544 /// Match a 32-bit packed halfword bswap. That is
5545 /// ((x & 0x000000ff) << 8) |
5546 /// ((x & 0x0000ff00) >> 8) |
5547 /// ((x & 0x00ff0000) << 8) |
5548 /// ((x & 0xff000000) >> 8)
5549 /// => (rotl (bswap x), 16)
5550 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5551   if (!LegalOperations)
5552     return SDValue();
5553
5554   EVT VT = N->getValueType(0);
5555   if (VT != MVT::i32)
5556     return SDValue();
5557   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5558     return SDValue();
5559
5560   // Look for either
5561   // (or (bswaphpair), (bswaphpair))
5562   // (or (or (bswaphpair), (and)), (and))
5563   // (or (or (and), (bswaphpair)), (and))
5564   SDNode *Parts[4] = {};
5565
5566   if (isBSwapHWordPair(N0, Parts)) {
5567     // (or (or (and), (and)), (or (and), (and)))
5568     if (!isBSwapHWordPair(N1, Parts))
5569       return SDValue();
5570   } else if (N0.getOpcode() == ISD::OR) {
5571     // (or (or (or (and), (and)), (and)), (and))
5572     if (!isBSwapHWordElement(N1, Parts))
5573       return SDValue();
5574     SDValue N00 = N0.getOperand(0);
5575     SDValue N01 = N0.getOperand(1);
5576     if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
5577         !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
5578       return SDValue();
5579   } else
5580     return SDValue();
5581
5582   // Make sure the parts are all coming from the same node.
5583   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5584     return SDValue();
5585
5586   SDLoc DL(N);
5587   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5588                               SDValue(Parts[0], 0));
5589
5590   // Result of the bswap should be rotated by 16. If it's not legal, then
5591   // do  (x << 16) | (x >> 16).
5592   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5593   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5594     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5595   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5596     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5597   return DAG.getNode(ISD::OR, DL, VT,
5598                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5599                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5600 }
5601
5602 /// This contains all DAGCombine rules which reduce two values combined by
5603 /// an Or operation to a single value \see visitANDLike().
5604 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5605   EVT VT = N1.getValueType();
5606   SDLoc DL(N);
5607
5608   // fold (or x, undef) -> -1
5609   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5610     return DAG.getAllOnesConstant(DL, VT);
5611
5612   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5613     return V;
5614
5615   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5616   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5617       // Don't increase # computations.
5618       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5619     // We can only do this xform if we know that bits from X that are set in C2
5620     // but not in C1 are already zero.  Likewise for Y.
5621     if (const ConstantSDNode *N0O1C =
5622         getAsNonOpaqueConstant(N0.getOperand(1))) {
5623       if (const ConstantSDNode *N1O1C =
5624           getAsNonOpaqueConstant(N1.getOperand(1))) {
5625         // We can only do this xform if we know that bits from X that are set in
5626         // C2 but not in C1 are already zero.  Likewise for Y.
5627         const APInt &LHSMask = N0O1C->getAPIntValue();
5628         const APInt &RHSMask = N1O1C->getAPIntValue();
5629
5630         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5631             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5632           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5633                                   N0.getOperand(0), N1.getOperand(0));
5634           return DAG.getNode(ISD::AND, DL, VT, X,
5635                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5636         }
5637       }
5638     }
5639   }
5640
5641   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5642   if (N0.getOpcode() == ISD::AND &&
5643       N1.getOpcode() == ISD::AND &&
5644       N0.getOperand(0) == N1.getOperand(0) &&
5645       // Don't increase # computations.
5646       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5647     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5648                             N0.getOperand(1), N1.getOperand(1));
5649     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5650   }
5651
5652   return SDValue();
5653 }
5654
5655 /// OR combines for which the commuted variant will be tried as well.
5656 static SDValue visitORCommutative(
5657     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5658   EVT VT = N0.getValueType();
5659   if (N0.getOpcode() == ISD::AND) {
5660     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5661     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5662       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5663
5664     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5665     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5666       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5667   }
5668
5669   return SDValue();
5670 }
5671
5672 SDValue DAGCombiner::visitOR(SDNode *N) {
5673   SDValue N0 = N->getOperand(0);
5674   SDValue N1 = N->getOperand(1);
5675   EVT VT = N1.getValueType();
5676
5677   // x | x --> x
5678   if (N0 == N1)
5679     return N0;
5680
5681   // fold vector ops
5682   if (VT.isVector()) {
5683     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5684       return FoldedVOp;
5685
5686     // fold (or x, 0) -> x, vector edition
5687     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5688       return N1;
5689     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5690       return N0;
5691
5692     // fold (or x, -1) -> -1, vector edition
5693     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5694       // do not return N0, because undef node may exist in N0
5695       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5696     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5697       // do not return N1, because undef node may exist in N1
5698       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5699
5700     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5701     // Do this only if the resulting shuffle is legal.
5702     if (isa<ShuffleVectorSDNode>(N0) &&
5703         isa<ShuffleVectorSDNode>(N1) &&
5704         // Avoid folding a node with illegal type.
5705         TLI.isTypeLegal(VT)) {
5706       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5707       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5708       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5709       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5710       // Ensure both shuffles have a zero input.
5711       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5712         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5713         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5714         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5715         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5716         bool CanFold = true;
5717         int NumElts = VT.getVectorNumElements();
5718         SmallVector<int, 4> Mask(NumElts);
5719
5720         for (int i = 0; i != NumElts; ++i) {
5721           int M0 = SV0->getMaskElt(i);
5722           int M1 = SV1->getMaskElt(i);
5723
5724           // Determine if either index is pointing to a zero vector.
5725           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5726           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5727
5728           // If one element is zero and the otherside is undef, keep undef.
5729           // This also handles the case that both are undef.
5730           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5731             Mask[i] = -1;
5732             continue;
5733           }
5734
5735           // Make sure only one of the elements is zero.
5736           if (M0Zero == M1Zero) {
5737             CanFold = false;
5738             break;
5739           }
5740
5741           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5742
5743           // We have a zero and non-zero element. If the non-zero came from
5744           // SV0 make the index a LHS index. If it came from SV1, make it
5745           // a RHS index. We need to mod by NumElts because we don't care
5746           // which operand it came from in the original shuffles.
5747           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5748         }
5749
5750         if (CanFold) {
5751           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5752           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5753
5754           SDValue LegalShuffle =
5755               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
5756                                           Mask, DAG);
5757           if (LegalShuffle)
5758             return LegalShuffle;
5759         }
5760       }
5761     }
5762   }
5763
5764   // fold (or c1, c2) -> c1|c2
5765   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5766   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5767   if (N0C && N1C && !N1C->isOpaque())
5768     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5769   // canonicalize constant to RHS
5770   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5771      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5772     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5773   // fold (or x, 0) -> x
5774   if (isNullConstant(N1))
5775     return N0;
5776   // fold (or x, -1) -> -1
5777   if (isAllOnesConstant(N1))
5778     return N1;
5779
5780   if (SDValue NewSel = foldBinOpIntoSelect(N))
5781     return NewSel;
5782
5783   // fold (or x, c) -> c iff (x & ~c) == 0
5784   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5785     return N1;
5786
5787   if (SDValue Combined = visitORLike(N0, N1, N))
5788     return Combined;
5789
5790   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
5791   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
5792     return BSwap;
5793   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
5794     return BSwap;
5795
5796   // reassociate or
5797   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
5798     return ROR;
5799
5800   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
5801   // iff (c1 & c2) != 0 or c1/c2 are undef.
5802   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
5803     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
5804   };
5805   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
5806       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
5807     if (SDValue COR = DAG.FoldConstantArithmetic(
5808             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
5809       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
5810       AddToWorklist(IOR.getNode());
5811       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
5812     }
5813   }
5814
5815   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
5816     return Combined;
5817   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
5818     return Combined;
5819
5820   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
5821   if (N0.getOpcode() == N1.getOpcode())
5822     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5823       return V;
5824
5825   // See if this is some rotate idiom.
5826   if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
5827     return Rot;
5828
5829   if (SDValue Load = MatchLoadCombine(N))
5830     return Load;
5831
5832   // Simplify the operands using demanded-bits information.
5833   if (SimplifyDemandedBits(SDValue(N, 0)))
5834     return SDValue(N, 0);
5835
5836   // If OR can be rewritten into ADD, try combines based on ADD.
5837   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
5838       DAG.haveNoCommonBitsSet(N0, N1))
5839     if (SDValue Combined = visitADDLike(N))
5840       return Combined;
5841
5842   return SDValue();
5843 }
5844
5845 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
5846   if (Op.getOpcode() == ISD::AND &&
5847       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
5848     Mask = Op.getOperand(1);
5849     return Op.getOperand(0);
5850   }
5851   return Op;
5852 }
5853
5854 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
5855 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
5856                             SDValue &Mask) {
5857   Op = stripConstantMask(DAG, Op, Mask);
5858   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
5859     Shift = Op;
5860     return true;
5861   }
5862   return false;
5863 }
5864
5865 /// Helper function for visitOR to extract the needed side of a rotate idiom
5866 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
5867 /// InstCombine merged some outside op with one of the shifts from
5868 /// the rotate pattern.
5869 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
5870 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
5871 /// patterns:
5872 ///
5873 ///   (or (add v v) (shrl v bitwidth-1)):
5874 ///     expands (add v v) -> (shl v 1)
5875 ///
5876 ///   (or (mul v c0) (shrl (mul v c1) c2)):
5877 ///     expands (mul v c0) -> (shl (mul v c1) c3)
5878 ///
5879 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
5880 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
5881 ///
5882 ///   (or (shl v c0) (shrl (shl v c1) c2)):
5883 ///     expands (shl v c0) -> (shl (shl v c1) c3)
5884 ///
5885 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
5886 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
5887 ///
5888 /// Such that in all cases, c3+c2==bitwidth(op v c1).
5889 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
5890                                      SDValue ExtractFrom, SDValue &Mask,
5891                                      const SDLoc &DL) {
5892   assert(OppShift && ExtractFrom && "Empty SDValue");
5893   assert(
5894       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
5895       "Existing shift must be valid as a rotate half");
5896
5897   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
5898
5899   // Value and Type of the shift.
5900   SDValue OppShiftLHS = OppShift.getOperand(0);
5901   EVT ShiftedVT = OppShiftLHS.getValueType();
5902
5903   // Amount of the existing shift.
5904   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
5905
5906   // (add v v) -> (shl v 1)
5907   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
5908       ExtractFrom.getOpcode() == ISD::ADD &&
5909       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
5910       ExtractFrom.getOperand(0) == OppShiftLHS &&
5911       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
5912     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
5913                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
5914
5915   // Preconditions:
5916   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
5917   //
5918   // Find opcode of the needed shift to be extracted from (op0 v c0).
5919   unsigned Opcode = ISD::DELETED_NODE;
5920   bool IsMulOrDiv = false;
5921   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
5922   // opcode or its arithmetic (mul or udiv) variant.
5923   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
5924     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
5925     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
5926       return false;
5927     Opcode = NeededShift;
5928     return true;
5929   };
5930   // op0 must be either the needed shift opcode or the mul/udiv equivalent
5931   // that the needed shift can be extracted from.
5932   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
5933       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
5934     return SDValue();
5935
5936   // op0 must be the same opcode on both sides, have the same LHS argument,
5937   // and produce the same value type.
5938   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
5939       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
5940       ShiftedVT != ExtractFrom.getValueType())
5941     return SDValue();
5942
5943   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
5944   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
5945   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
5946   ConstantSDNode *ExtractFromCst =
5947       isConstOrConstSplat(ExtractFrom.getOperand(1));
5948   // TODO: We should be able to handle non-uniform constant vectors for these values
5949   // Check that we have constant values.
5950   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
5951       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
5952       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
5953     return SDValue();
5954
5955   // Compute the shift amount we need to extract to complete the rotate.
5956   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
5957   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
5958     return SDValue();
5959   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
5960   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
5961   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
5962   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
5963   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
5964
5965   // Now try extract the needed shift from the ExtractFrom op and see if the
5966   // result matches up with the existing shift's LHS op.
5967   if (IsMulOrDiv) {
5968     // Op to extract from is a mul or udiv by a constant.
5969     // Check:
5970     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
5971     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
5972     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
5973                                                  NeededShiftAmt.getZExtValue());
5974     APInt ResultAmt;
5975     APInt Rem;
5976     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
5977     if (Rem != 0 || ResultAmt != OppLHSAmt)
5978       return SDValue();
5979   } else {
5980     // Op to extract from is a shift by a constant.
5981     // Check:
5982     //      c2 - (bitwidth(op0 v c0) - c1) == c0
5983     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
5984                                           ExtractFromAmt.getBitWidth()))
5985       return SDValue();
5986   }
5987
5988   // Return the expanded shift op that should allow a rotate to be formed.
5989   EVT ShiftVT = OppShift.getOperand(1).getValueType();
5990   EVT ResVT = ExtractFrom.getValueType();
5991   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
5992   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
5993 }
5994
5995 // Return true if we can prove that, whenever Neg and Pos are both in the
5996 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
5997 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
5998 //
5999 //     (or (shift1 X, Neg), (shift2 X, Pos))
6000 //
6001 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6002 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6003 // to consider shift amounts with defined behavior.
6004 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6005                            SelectionDAG &DAG) {
6006   // If EltSize is a power of 2 then:
6007   //
6008   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6009   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6010   //
6011   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6012   // for the stronger condition:
6013   //
6014   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6015   //
6016   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6017   // we can just replace Neg with Neg' for the rest of the function.
6018   //
6019   // In other cases we check for the even stronger condition:
6020   //
6021   //     Neg == EltSize - Pos                                    [B]
6022   //
6023   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6024   // behavior if Pos == 0 (and consequently Neg == EltSize).
6025   //
6026   // We could actually use [A] whenever EltSize is a power of 2, but the
6027   // only extra cases that it would match are those uninteresting ones
6028   // where Neg and Pos are never in range at the same time.  E.g. for
6029   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6030   // as well as (sub 32, Pos), but:
6031   //
6032   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6033   //
6034   // always invokes undefined behavior for 32-bit X.
6035   //
6036   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6037   unsigned MaskLoBits = 0;
6038   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6039     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6040       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6041       unsigned Bits = Log2_64(EltSize);
6042       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6043           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6044         Neg = Neg.getOperand(0);
6045         MaskLoBits = Bits;
6046       }
6047     }
6048   }
6049
6050   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6051   if (Neg.getOpcode() != ISD::SUB)
6052     return false;
6053   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6054   if (!NegC)
6055     return false;
6056   SDValue NegOp1 = Neg.getOperand(1);
6057
6058   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6059   // Pos'.  The truncation is redundant for the purpose of the equality.
6060   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6061     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6062       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6063       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6064           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6065            MaskLoBits))
6066         Pos = Pos.getOperand(0);
6067     }
6068   }
6069
6070   // The condition we need is now:
6071   //
6072   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6073   //
6074   // If NegOp1 == Pos then we need:
6075   //
6076   //              EltSize & Mask == NegC & Mask
6077   //
6078   // (because "x & Mask" is a truncation and distributes through subtraction).
6079   APInt Width;
6080   if (Pos == NegOp1)
6081     Width = NegC->getAPIntValue();
6082
6083   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6084   // Then the condition we want to prove becomes:
6085   //
6086   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6087   //
6088   // which, again because "x & Mask" is a truncation, becomes:
6089   //
6090   //                NegC & Mask == (EltSize - PosC) & Mask
6091   //             EltSize & Mask == (NegC + PosC) & Mask
6092   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6093     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6094       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6095     else
6096       return false;
6097   } else
6098     return false;
6099
6100   // Now we just need to check that EltSize & Mask == Width & Mask.
6101   if (MaskLoBits)
6102     // EltSize & Mask is 0 since Mask is EltSize - 1.
6103     return Width.getLoBits(MaskLoBits) == 0;
6104   return Width == EltSize;
6105 }
6106
6107 // A subroutine of MatchRotate used once we have found an OR of two opposite
6108 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6109 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6110 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6111 // Neg with outer conversions stripped away.
6112 SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6113                                        SDValue Neg, SDValue InnerPos,
6114                                        SDValue InnerNeg, unsigned PosOpcode,
6115                                        unsigned NegOpcode, const SDLoc &DL) {
6116   // fold (or (shl x, (*ext y)),
6117   //          (srl x, (*ext (sub 32, y)))) ->
6118   //   (rotl x, y) or (rotr x, (sub 32, y))
6119   //
6120   // fold (or (shl x, (*ext (sub 32, y))),
6121   //          (srl x, (*ext y))) ->
6122   //   (rotr x, y) or (rotl x, (sub 32, y))
6123   EVT VT = Shifted.getValueType();
6124   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6125     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6126     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6127                        HasPos ? Pos : Neg);
6128   }
6129
6130   return SDValue();
6131 }
6132
6133 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6134 // idioms for rotate, and if the target supports rotation instructions, generate
6135 // a rot[lr].
6136 SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6137   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6138   EVT VT = LHS.getValueType();
6139   if (!TLI.isTypeLegal(VT))
6140     return SDValue();
6141
6142   // The target must have at least one rotate flavor.
6143   bool HasROTL = hasOperation(ISD::ROTL, VT);
6144   bool HasROTR = hasOperation(ISD::ROTR, VT);
6145   if (!HasROTL && !HasROTR)
6146     return SDValue();
6147
6148   // Check for truncated rotate.
6149   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6150       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6151     assert(LHS.getValueType() == RHS.getValueType());
6152     if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6153       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
6154     }
6155   }
6156
6157   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6158   SDValue LHSShift;   // The shift.
6159   SDValue LHSMask;    // AND value if any.
6160   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6161
6162   SDValue RHSShift;   // The shift.
6163   SDValue RHSMask;    // AND value if any.
6164   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6165
6166   // If neither side matched a rotate half, bail
6167   if (!LHSShift && !RHSShift)
6168     return SDValue();
6169
6170   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6171   // side of the rotate, so try to handle that here. In all cases we need to
6172   // pass the matched shift from the opposite side to compute the opcode and
6173   // needed shift amount to extract.  We still want to do this if both sides
6174   // matched a rotate half because one half may be a potential overshift that
6175   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6176   // single one).
6177
6178   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6179   if (LHSShift)
6180     if (SDValue NewRHSShift =
6181             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6182       RHSShift = NewRHSShift;
6183   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6184   if (RHSShift)
6185     if (SDValue NewLHSShift =
6186             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6187       LHSShift = NewLHSShift;
6188
6189   // If a side is still missing, nothing else we can do.
6190   if (!RHSShift || !LHSShift)
6191     return SDValue();
6192
6193   // At this point we've matched or extracted a shift op on each side.
6194
6195   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
6196     return SDValue(); // Not shifting the same value.
6197
6198   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6199     return SDValue(); // Shifts must disagree.
6200
6201   // Canonicalize shl to left side in a shl/srl pair.
6202   if (RHSShift.getOpcode() == ISD::SHL) {
6203     std::swap(LHS, RHS);
6204     std::swap(LHSShift, RHSShift);
6205     std::swap(LHSMask, RHSMask);
6206   }
6207
6208   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6209   SDValue LHSShiftArg = LHSShift.getOperand(0);
6210   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6211   SDValue RHSShiftArg = RHSShift.getOperand(0);
6212   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6213
6214   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6215   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6216   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6217                                         ConstantSDNode *RHS) {
6218     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6219   };
6220   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6221     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
6222                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
6223
6224     // If there is an AND of either shifted operand, apply it to the result.
6225     if (LHSMask.getNode() || RHSMask.getNode()) {
6226       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6227       SDValue Mask = AllOnes;
6228
6229       if (LHSMask.getNode()) {
6230         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6231         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6232                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6233       }
6234       if (RHSMask.getNode()) {
6235         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6236         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6237                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6238       }
6239
6240       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
6241     }
6242
6243     return Rot;
6244   }
6245
6246   // If there is a mask here, and we have a variable shift, we can't be sure
6247   // that we're masking out the right stuff.
6248   if (LHSMask.getNode() || RHSMask.getNode())
6249     return SDValue();
6250
6251   // If the shift amount is sign/zext/any-extended just peel it off.
6252   SDValue LExtOp0 = LHSShiftAmt;
6253   SDValue RExtOp0 = RHSShiftAmt;
6254   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6255        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6256        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6257        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6258       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6259        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6260        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6261        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6262     LExtOp0 = LHSShiftAmt.getOperand(0);
6263     RExtOp0 = RHSShiftAmt.getOperand(0);
6264   }
6265
6266   SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6267                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6268   if (TryL)
6269     return TryL;
6270
6271   SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6272                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6273   if (TryR)
6274     return TryR;
6275
6276   return SDValue();
6277 }
6278
6279 namespace {
6280
6281 /// Represents known origin of an individual byte in load combine pattern. The
6282 /// value of the byte is either constant zero or comes from memory.
6283 struct ByteProvider {
6284   // For constant zero providers Load is set to nullptr. For memory providers
6285   // Load represents the node which loads the byte from memory.
6286   // ByteOffset is the offset of the byte in the value produced by the load.
6287   LoadSDNode *Load = nullptr;
6288   unsigned ByteOffset = 0;
6289
6290   ByteProvider() = default;
6291
6292   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6293     return ByteProvider(Load, ByteOffset);
6294   }
6295
6296   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6297
6298   bool isConstantZero() const { return !Load; }
6299   bool isMemory() const { return Load; }
6300
6301   bool operator==(const ByteProvider &Other) const {
6302     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6303   }
6304
6305 private:
6306   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6307       : Load(Load), ByteOffset(ByteOffset) {}
6308 };
6309
6310 } // end anonymous namespace
6311
6312 /// Recursively traverses the expression calculating the origin of the requested
6313 /// byte of the given value. Returns None if the provider can't be calculated.
6314 ///
6315 /// For all the values except the root of the expression verifies that the value
6316 /// has exactly one use and if it's not true return None. This way if the origin
6317 /// of the byte is returned it's guaranteed that the values which contribute to
6318 /// the byte are not used outside of this expression.
6319 ///
6320 /// Because the parts of the expression are not allowed to have more than one
6321 /// use this function iterates over trees, not DAGs. So it never visits the same
6322 /// node more than once.
6323 static const Optional<ByteProvider>
6324 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6325                       bool Root = false) {
6326   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6327   if (Depth == 10)
6328     return None;
6329
6330   if (!Root && !Op.hasOneUse())
6331     return None;
6332
6333   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6334   unsigned BitWidth = Op.getValueSizeInBits();
6335   if (BitWidth % 8 != 0)
6336     return None;
6337   unsigned ByteWidth = BitWidth / 8;
6338   assert(Index < ByteWidth && "invalid index requested");
6339   (void) ByteWidth;
6340
6341   switch (Op.getOpcode()) {
6342   case ISD::OR: {
6343     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6344     if (!LHS)
6345       return None;
6346     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6347     if (!RHS)
6348       return None;
6349
6350     if (LHS->isConstantZero())
6351       return RHS;
6352     if (RHS->isConstantZero())
6353       return LHS;
6354     return None;
6355   }
6356   case ISD::SHL: {
6357     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6358     if (!ShiftOp)
6359       return None;
6360
6361     uint64_t BitShift = ShiftOp->getZExtValue();
6362     if (BitShift % 8 != 0)
6363       return None;
6364     uint64_t ByteShift = BitShift / 8;
6365
6366     return Index < ByteShift
6367                ? ByteProvider::getConstantZero()
6368                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6369                                        Depth + 1);
6370   }
6371   case ISD::ANY_EXTEND:
6372   case ISD::SIGN_EXTEND:
6373   case ISD::ZERO_EXTEND: {
6374     SDValue NarrowOp = Op->getOperand(0);
6375     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6376     if (NarrowBitWidth % 8 != 0)
6377       return None;
6378     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6379
6380     if (Index >= NarrowByteWidth)
6381       return Op.getOpcode() == ISD::ZERO_EXTEND
6382                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6383                  : None;
6384     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6385   }
6386   case ISD::BSWAP:
6387     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6388                                  Depth + 1);
6389   case ISD::LOAD: {
6390     auto L = cast<LoadSDNode>(Op.getNode());
6391     if (!L->isSimple() || L->isIndexed())
6392       return None;
6393
6394     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6395     if (NarrowBitWidth % 8 != 0)
6396       return None;
6397     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6398
6399     if (Index >= NarrowByteWidth)
6400       return L->getExtensionType() == ISD::ZEXTLOAD
6401                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6402                  : None;
6403     return ByteProvider::getMemory(L, Index);
6404   }
6405   }
6406
6407   return None;
6408 }
6409
6410 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6411   return i;
6412 }
6413
6414 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6415   return BW - i - 1;
6416 }
6417
6418 // Check if the bytes offsets we are looking at match with either big or
6419 // little endian value loaded. Return true for big endian, false for little
6420 // endian, and None if match failed.
6421 static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
6422                                   int64_t FirstOffset) {
6423   // The endian can be decided only when it is 2 bytes at least.
6424   unsigned Width = ByteOffsets.size();
6425   if (Width < 2)
6426     return None;
6427
6428   bool BigEndian = true, LittleEndian = true;
6429   for (unsigned i = 0; i < Width; i++) {
6430     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6431     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6432     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6433     if (!BigEndian && !LittleEndian)
6434       return None;
6435   }
6436
6437   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6438                                         "little endian");
6439   return BigEndian;
6440 }
6441
6442 static SDValue stripTruncAndExt(SDValue Value) {
6443   switch (Value.getOpcode()) {
6444   case ISD::TRUNCATE:
6445   case ISD::ZERO_EXTEND:
6446   case ISD::SIGN_EXTEND:
6447   case ISD::ANY_EXTEND:
6448     return stripTruncAndExt(Value.getOperand(0));
6449   }
6450   return Value;
6451 }
6452
6453 /// Match a pattern where a wide type scalar value is stored by several narrow
6454 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6455 /// supports it.
6456 ///
6457 /// Assuming little endian target:
6458 ///  i8 *p = ...
6459 ///  i32 val = ...
6460 ///  p[0] = (val >> 0) & 0xFF;
6461 ///  p[1] = (val >> 8) & 0xFF;
6462 ///  p[2] = (val >> 16) & 0xFF;
6463 ///  p[3] = (val >> 24) & 0xFF;
6464 /// =>
6465 ///  *((i32)p) = val;
6466 ///
6467 ///  i8 *p = ...
6468 ///  i32 val = ...
6469 ///  p[0] = (val >> 24) & 0xFF;
6470 ///  p[1] = (val >> 16) & 0xFF;
6471 ///  p[2] = (val >> 8) & 0xFF;
6472 ///  p[3] = (val >> 0) & 0xFF;
6473 /// =>
6474 ///  *((i32)p) = BSWAP(val);
6475 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6476   // Collect all the stores in the chain.
6477   SDValue Chain;
6478   SmallVector<StoreSDNode *, 8> Stores;
6479   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6480     // TODO: Allow unordered atomics when wider type is legal (see D66309)
6481     if (Store->getMemoryVT() != MVT::i8 ||
6482         !Store->isSimple() || Store->isIndexed())
6483       return SDValue();
6484     Stores.push_back(Store);
6485     Chain = Store->getChain();
6486   }
6487   // Handle the simple type only.
6488   unsigned Width = Stores.size();
6489   EVT VT = EVT::getIntegerVT(
6490     *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6491   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6492     return SDValue();
6493
6494   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6495   if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6496     return SDValue();
6497
6498   // Check if all the bytes of the combined value we are looking at are stored
6499   // to the same base address. Collect bytes offsets from Base address into
6500   // ByteOffsets.
6501   SDValue CombinedValue;
6502   SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
6503   int64_t FirstOffset = INT64_MAX;
6504   StoreSDNode *FirstStore = nullptr;
6505   Optional<BaseIndexOffset> Base;
6506   for (auto Store : Stores) {
6507     // All the stores store different byte of the CombinedValue. A truncate is
6508     // required to get that byte value.
6509     SDValue Trunc = Store->getValue();
6510     if (Trunc.getOpcode() != ISD::TRUNCATE)
6511       return SDValue();
6512     // A shift operation is required to get the right byte offset, except the
6513     // first byte.
6514     int64_t Offset = 0;
6515     SDValue Value = Trunc.getOperand(0);
6516     if (Value.getOpcode() == ISD::SRL ||
6517         Value.getOpcode() == ISD::SRA) {
6518       ConstantSDNode *ShiftOffset =
6519         dyn_cast<ConstantSDNode>(Value.getOperand(1));
6520       // Trying to match the following pattern. The shift offset must be
6521       // a constant and a multiple of 8. It is the byte offset in "y".
6522       //
6523       // x = srl y, offset
6524       // i8 z = trunc x
6525       // store z, ...
6526       if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6527         return SDValue();
6528
6529      Offset = ShiftOffset->getSExtValue()/8;
6530      Value = Value.getOperand(0);
6531     }
6532
6533     // Stores must share the same combined value with different offsets.
6534     if (!CombinedValue)
6535       CombinedValue = Value;
6536     else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6537       return SDValue();
6538
6539     // The trunc and all the extend operation should be stripped to get the
6540     // real value we are stored.
6541     else if (CombinedValue.getValueType() != VT) {
6542       if (Value.getValueType() == VT ||
6543           Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6544         CombinedValue = Value;
6545       // Give up if the combined value type is smaller than the store size.
6546       if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6547         return SDValue();
6548     }
6549
6550     // Stores must share the same base address
6551     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6552     int64_t ByteOffsetFromBase = 0;
6553     if (!Base)
6554       Base = Ptr;
6555     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6556       return SDValue();
6557
6558     // Remember the first byte store
6559     if (ByteOffsetFromBase < FirstOffset) {
6560       FirstStore = Store;
6561       FirstOffset = ByteOffsetFromBase;
6562     }
6563     // Map the offset in the store and the offset in the combined value, and
6564     // early return if it has been set before.
6565     if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6566       return SDValue();
6567     ByteOffsets[Offset] = ByteOffsetFromBase;
6568   }
6569
6570   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6571   assert(FirstStore && "First store must be set");
6572
6573   // Check if the bytes of the combined value we are looking at match with
6574   // either big or little endian value store.
6575   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6576   if (!IsBigEndian.hasValue())
6577     return SDValue();
6578
6579   // The node we are looking at matches with the pattern, check if we can
6580   // replace it with a single bswap if needed and store.
6581
6582   // If the store needs byte swap check if the target supports it
6583   bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6584
6585   // Before legalize we can introduce illegal bswaps which will be later
6586   // converted to an explicit bswap sequence. This way we end up with a single
6587   // store and byte shuffling instead of several stores and byte shuffling.
6588   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6589     return SDValue();
6590
6591   // Check that a store of the wide type is both allowed and fast on the target
6592   bool Fast = false;
6593   bool Allowed =
6594       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6595                              *FirstStore->getMemOperand(), &Fast);
6596   if (!Allowed || !Fast)
6597     return SDValue();
6598
6599   if (VT != CombinedValue.getValueType()) {
6600     assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6601            "Get unexpected store value to combine");
6602     CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6603                              CombinedValue);
6604   }
6605
6606   if (NeedsBswap)
6607     CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6608
6609   SDValue NewStore =
6610     DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
6611                  FirstStore->getPointerInfo(), FirstStore->getAlignment());
6612
6613   // Rely on other DAG combine rules to remove the other individual stores.
6614   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6615   return NewStore;
6616 }
6617
6618 /// Match a pattern where a wide type scalar value is loaded by several narrow
6619 /// loads and combined by shifts and ors. Fold it into a single load or a load
6620 /// and a BSWAP if the targets supports it.
6621 ///
6622 /// Assuming little endian target:
6623 ///  i8 *a = ...
6624 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6625 /// =>
6626 ///  i32 val = *((i32)a)
6627 ///
6628 ///  i8 *a = ...
6629 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6630 /// =>
6631 ///  i32 val = BSWAP(*((i32)a))
6632 ///
6633 /// TODO: This rule matches complex patterns with OR node roots and doesn't
6634 /// interact well with the worklist mechanism. When a part of the pattern is
6635 /// updated (e.g. one of the loads) its direct users are put into the worklist,
6636 /// but the root node of the pattern which triggers the load combine is not
6637 /// necessarily a direct user of the changed node. For example, once the address
6638 /// of t28 load is reassociated load combine won't be triggered:
6639 ///             t25: i32 = add t4, Constant:i32<2>
6640 ///           t26: i64 = sign_extend t25
6641 ///        t27: i64 = add t2, t26
6642 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6643 ///     t29: i32 = zero_extend t28
6644 ///   t32: i32 = shl t29, Constant:i8<8>
6645 /// t33: i32 = or t23, t32
6646 /// As a possible fix visitLoad can check if the load can be a part of a load
6647 /// combine pattern and add corresponding OR roots to the worklist.
6648 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6649   assert(N->getOpcode() == ISD::OR &&
6650          "Can only match load combining against OR nodes");
6651
6652   // Handles simple types only
6653   EVT VT = N->getValueType(0);
6654   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6655     return SDValue();
6656   unsigned ByteWidth = VT.getSizeInBits() / 8;
6657
6658   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6659   // Before legalize we can introduce too wide illegal loads which will be later
6660   // split into legal sized loads. This enables us to combine i64 load by i8
6661   // patterns to a couple of i32 loads on 32 bit targets.
6662   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
6663     return SDValue();
6664
6665   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6666   auto MemoryByteOffset = [&] (ByteProvider P) {
6667     assert(P.isMemory() && "Must be a memory byte provider");
6668     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6669     assert(LoadBitWidth % 8 == 0 &&
6670            "can only analyze providers for individual bytes not bit");
6671     unsigned LoadByteWidth = LoadBitWidth / 8;
6672     return IsBigEndianTarget
6673             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6674             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6675   };
6676
6677   Optional<BaseIndexOffset> Base;
6678   SDValue Chain;
6679
6680   SmallPtrSet<LoadSDNode *, 8> Loads;
6681   Optional<ByteProvider> FirstByteProvider;
6682   int64_t FirstOffset = INT64_MAX;
6683
6684   // Check if all the bytes of the OR we are looking at are loaded from the same
6685   // base address. Collect bytes offsets from Base address in ByteOffsets.
6686   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6687   for (unsigned i = 0; i < ByteWidth; i++) {
6688     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6689     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6690       return SDValue();
6691
6692     LoadSDNode *L = P->Load;
6693     assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
6694            !L->isIndexed() &&
6695            "Must be enforced by calculateByteProvider");
6696     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6697
6698     // All loads must share the same chain
6699     SDValue LChain = L->getChain();
6700     if (!Chain)
6701       Chain = LChain;
6702     else if (Chain != LChain)
6703       return SDValue();
6704
6705     // Loads must share the same base address
6706     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6707     int64_t ByteOffsetFromBase = 0;
6708     if (!Base)
6709       Base = Ptr;
6710     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6711       return SDValue();
6712
6713     // Calculate the offset of the current byte from the base address
6714     ByteOffsetFromBase += MemoryByteOffset(*P);
6715     ByteOffsets[i] = ByteOffsetFromBase;
6716
6717     // Remember the first byte load
6718     if (ByteOffsetFromBase < FirstOffset) {
6719       FirstByteProvider = P;
6720       FirstOffset = ByteOffsetFromBase;
6721     }
6722
6723     Loads.insert(L);
6724   }
6725   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6726          "memory, so there must be at least one load which produces the value");
6727   assert(Base && "Base address of the accessed memory location must be set");
6728   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6729
6730   // Check if the bytes of the OR we are looking at match with either big or
6731   // little endian value load
6732   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6733   if (!IsBigEndian.hasValue())
6734     return SDValue();
6735
6736   assert(FirstByteProvider && "must be set");
6737
6738   // Ensure that the first byte is loaded from zero offset of the first load.
6739   // So the combined value can be loaded from the first load address.
6740   if (MemoryByteOffset(*FirstByteProvider) != 0)
6741     return SDValue();
6742   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6743
6744   // The node we are looking at matches with the pattern, check if we can
6745   // replace it with a single load and bswap if needed.
6746
6747   // If the load needs byte swap check if the target supports it
6748   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6749
6750   // Before legalize we can introduce illegal bswaps which will be later
6751   // converted to an explicit bswap sequence. This way we end up with a single
6752   // load and byte shuffling instead of several loads and byte shuffling.
6753   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6754     return SDValue();
6755
6756   // Check that a load of the wide type is both allowed and fast on the target
6757   bool Fast = false;
6758   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6759                                         VT, *FirstLoad->getMemOperand(), &Fast);
6760   if (!Allowed || !Fast)
6761     return SDValue();
6762
6763   SDValue NewLoad =
6764       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6765                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6766
6767   // Transfer chain users from old loads to the new load.
6768   for (LoadSDNode *L : Loads)
6769     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6770
6771   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6772 }
6773
6774 // If the target has andn, bsl, or a similar bit-select instruction,
6775 // we want to unfold masked merge, with canonical pattern of:
6776 //   |        A  |  |B|
6777 //   ((x ^ y) & m) ^ y
6778 //    |  D  |
6779 // Into:
6780 //   (x & m) | (y & ~m)
6781 // If y is a constant, and the 'andn' does not work with immediates,
6782 // we unfold into a different pattern:
6783 //   ~(~x & m) & (m | y)
6784 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6785 //       the very least that breaks andnpd / andnps patterns, and because those
6786 //       patterns are simplified in IR and shouldn't be created in the DAG
6787 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6788   assert(N->getOpcode() == ISD::XOR);
6789
6790   // Don't touch 'not' (i.e. where y = -1).
6791   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6792     return SDValue();
6793
6794   EVT VT = N->getValueType(0);
6795
6796   // There are 3 commutable operators in the pattern,
6797   // so we have to deal with 8 possible variants of the basic pattern.
6798   SDValue X, Y, M;
6799   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
6800     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
6801       return false;
6802     SDValue Xor = And.getOperand(XorIdx);
6803     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
6804       return false;
6805     SDValue Xor0 = Xor.getOperand(0);
6806     SDValue Xor1 = Xor.getOperand(1);
6807     // Don't touch 'not' (i.e. where y = -1).
6808     if (isAllOnesOrAllOnesSplat(Xor1))
6809       return false;
6810     if (Other == Xor0)
6811       std::swap(Xor0, Xor1);
6812     if (Other != Xor1)
6813       return false;
6814     X = Xor0;
6815     Y = Xor1;
6816     M = And.getOperand(XorIdx ? 0 : 1);
6817     return true;
6818   };
6819
6820   SDValue N0 = N->getOperand(0);
6821   SDValue N1 = N->getOperand(1);
6822   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
6823       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
6824     return SDValue();
6825
6826   // Don't do anything if the mask is constant. This should not be reachable.
6827   // InstCombine should have already unfolded this pattern, and DAGCombiner
6828   // probably shouldn't produce it, too.
6829   if (isa<ConstantSDNode>(M.getNode()))
6830     return SDValue();
6831
6832   // We can transform if the target has AndNot
6833   if (!TLI.hasAndNot(M))
6834     return SDValue();
6835
6836   SDLoc DL(N);
6837
6838   // If Y is a constant, check that 'andn' works with immediates.
6839   if (!TLI.hasAndNot(Y)) {
6840     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
6841     // If not, we need to do a bit more work to make sure andn is still used.
6842     SDValue NotX = DAG.getNOT(DL, X, VT);
6843     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
6844     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
6845     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
6846     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
6847   }
6848
6849   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
6850   SDValue NotM = DAG.getNOT(DL, M, VT);
6851   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
6852
6853   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
6854 }
6855
6856 SDValue DAGCombiner::visitXOR(SDNode *N) {
6857   SDValue N0 = N->getOperand(0);
6858   SDValue N1 = N->getOperand(1);
6859   EVT VT = N0.getValueType();
6860
6861   // fold vector ops
6862   if (VT.isVector()) {
6863     if (SDValue FoldedVOp = SimplifyVBinOp(N))
6864       return FoldedVOp;
6865
6866     // fold (xor x, 0) -> x, vector edition
6867     if (ISD::isBuildVectorAllZeros(N0.getNode()))
6868       return N1;
6869     if (ISD::isBuildVectorAllZeros(N1.getNode()))
6870       return N0;
6871   }
6872
6873   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
6874   SDLoc DL(N);
6875   if (N0.isUndef() && N1.isUndef())
6876     return DAG.getConstant(0, DL, VT);
6877   // fold (xor x, undef) -> undef
6878   if (N0.isUndef())
6879     return N0;
6880   if (N1.isUndef())
6881     return N1;
6882   // fold (xor c1, c2) -> c1^c2
6883   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
6884   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
6885   if (N0C && N1C)
6886     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
6887   // canonicalize constant to RHS
6888   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
6889      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
6890     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
6891   // fold (xor x, 0) -> x
6892   if (isNullConstant(N1))
6893     return N0;
6894
6895   if (SDValue NewSel = foldBinOpIntoSelect(N))
6896     return NewSel;
6897
6898   // reassociate xor
6899   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
6900     return RXOR;
6901
6902   // fold !(x cc y) -> (x !cc y)
6903   unsigned N0Opcode = N0.getOpcode();
6904   SDValue LHS, RHS, CC;
6905   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
6906     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
6907                                                LHS.getValueType().isInteger());
6908     if (!LegalOperations ||
6909         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
6910       switch (N0Opcode) {
6911       default:
6912         llvm_unreachable("Unhandled SetCC Equivalent!");
6913       case ISD::SETCC:
6914         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
6915       case ISD::SELECT_CC:
6916         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
6917                                N0.getOperand(3), NotCC);
6918       }
6919     }
6920   }
6921
6922   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
6923   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
6924       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
6925     SDValue V = N0.getOperand(0);
6926     SDLoc DL0(N0);
6927     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
6928                     DAG.getConstant(1, DL0, V.getValueType()));
6929     AddToWorklist(V.getNode());
6930     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
6931   }
6932
6933   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
6934   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
6935       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6936     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
6937     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
6938       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6939       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
6940       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
6941       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
6942       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
6943     }
6944   }
6945   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
6946   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
6947       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
6948     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
6949     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
6950       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
6951       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
6952       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
6953       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
6954       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
6955     }
6956   }
6957
6958   // fold (not (neg x)) -> (add X, -1)
6959   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
6960   // Y is a constant or the subtract has a single use.
6961   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
6962       isNullConstant(N0.getOperand(0))) {
6963     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
6964                        DAG.getAllOnesConstant(DL, VT));
6965   }
6966
6967   // fold (xor (and x, y), y) -> (and (not x), y)
6968   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
6969     SDValue X = N0.getOperand(0);
6970     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
6971     AddToWorklist(NotX.getNode());
6972     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
6973   }
6974
6975   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
6976     ConstantSDNode *XorC = isConstOrConstSplat(N1);
6977     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
6978     unsigned BitWidth = VT.getScalarSizeInBits();
6979     if (XorC && ShiftC) {
6980       // Don't crash on an oversized shift. We can not guarantee that a bogus
6981       // shift has been simplified to undef.
6982       uint64_t ShiftAmt = ShiftC->getLimitedValue();
6983       if (ShiftAmt < BitWidth) {
6984         APInt Ones = APInt::getAllOnesValue(BitWidth);
6985         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
6986         if (XorC->getAPIntValue() == Ones) {
6987           // If the xor constant is a shifted -1, do a 'not' before the shift:
6988           // xor (X << ShiftC), XorC --> (not X) << ShiftC
6989           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
6990           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
6991           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
6992         }
6993       }
6994     }
6995   }
6996
6997   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
6998   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
6999     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7000     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7001     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7002       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7003       SDValue S0 = S.getOperand(0);
7004       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7005         unsigned OpSizeInBits = VT.getScalarSizeInBits();
7006         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7007           if (C->getAPIntValue() == (OpSizeInBits - 1))
7008             return DAG.getNode(ISD::ABS, DL, VT, S0);
7009       }
7010     }
7011   }
7012
7013   // fold (xor x, x) -> 0
7014   if (N0 == N1)
7015     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7016
7017   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7018   // Here is a concrete example of this equivalence:
7019   // i16   x ==  14
7020   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7021   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7022   //
7023   // =>
7024   //
7025   // i16     ~1      == 0b1111111111111110
7026   // i16 rol(~1, 14) == 0b1011111111111111
7027   //
7028   // Some additional tips to help conceptualize this transform:
7029   // - Try to see the operation as placing a single zero in a value of all ones.
7030   // - There exists no value for x which would allow the result to contain zero.
7031   // - Values of x larger than the bitwidth are undefined and do not require a
7032   //   consistent result.
7033   // - Pushing the zero left requires shifting one bits in from the right.
7034   // A rotate left of ~1 is a nice way of achieving the desired result.
7035   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7036       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7037     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7038                        N0.getOperand(1));
7039   }
7040
7041   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7042   if (N0Opcode == N1.getOpcode())
7043     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7044       return V;
7045
7046   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7047   if (SDValue MM = unfoldMaskedMerge(N))
7048     return MM;
7049
7050   // Simplify the expression using non-local knowledge.
7051   if (SimplifyDemandedBits(SDValue(N, 0)))
7052     return SDValue(N, 0);
7053
7054   return SDValue();
7055 }
7056
7057 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7058 /// shift-by-constant operand with identical opcode, we may be able to convert
7059 /// that into 2 independent shifts followed by the logic op. This is a
7060 /// throughput improvement.
7061 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7062   // Match a one-use bitwise logic op.
7063   SDValue LogicOp = Shift->getOperand(0);
7064   if (!LogicOp.hasOneUse())
7065     return SDValue();
7066
7067   unsigned LogicOpcode = LogicOp.getOpcode();
7068   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7069       LogicOpcode != ISD::XOR)
7070     return SDValue();
7071
7072   // Find a matching one-use shift by constant.
7073   unsigned ShiftOpcode = Shift->getOpcode();
7074   SDValue C1 = Shift->getOperand(1);
7075   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7076   assert(C1Node && "Expected a shift with constant operand");
7077   const APInt &C1Val = C1Node->getAPIntValue();
7078   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7079                              const APInt *&ShiftAmtVal) {
7080     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7081       return false;
7082
7083     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7084     if (!ShiftCNode)
7085       return false;
7086
7087     // Capture the shifted operand and shift amount value.
7088     ShiftOp = V.getOperand(0);
7089     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7090
7091     // Shift amount types do not have to match their operand type, so check that
7092     // the constants are the same width.
7093     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7094       return false;
7095
7096     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7097     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7098       return false;
7099
7100     return true;
7101   };
7102
7103   // Logic ops are commutative, so check each operand for a match.
7104   SDValue X, Y;
7105   const APInt *C0Val;
7106   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7107     Y = LogicOp.getOperand(1);
7108   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7109     Y = LogicOp.getOperand(0);
7110   else
7111     return SDValue();
7112
7113   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7114   SDLoc DL(Shift);
7115   EVT VT = Shift->getValueType(0);
7116   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7117   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7118   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7119   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7120   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7121 }
7122
7123 /// Handle transforms common to the three shifts, when the shift amount is a
7124 /// constant.
7125 /// We are looking for: (shift being one of shl/sra/srl)
7126 ///   shift (binop X, C0), C1
7127 /// And want to transform into:
7128 ///   binop (shift X, C1), (shift C0, C1)
7129 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7130   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7131
7132   // Do not turn a 'not' into a regular xor.
7133   if (isBitwiseNot(N->getOperand(0)))
7134     return SDValue();
7135
7136   // The inner binop must be one-use, since we want to replace it.
7137   SDValue LHS = N->getOperand(0);
7138   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7139     return SDValue();
7140
7141   // TODO: This is limited to early combining because it may reveal regressions
7142   //       otherwise. But since we just checked a target hook to see if this is
7143   //       desirable, that should have filtered out cases where this interferes
7144   //       with some other pattern matching.
7145   if (!LegalTypes)
7146     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7147       return R;
7148
7149   // We want to pull some binops through shifts, so that we have (and (shift))
7150   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7151   // thing happens with address calculations, so it's important to canonicalize
7152   // it.
7153   switch (LHS.getOpcode()) {
7154   default:
7155     return SDValue();
7156   case ISD::OR:
7157   case ISD::XOR:
7158   case ISD::AND:
7159     break;
7160   case ISD::ADD:
7161     if (N->getOpcode() != ISD::SHL)
7162       return SDValue(); // only shl(add) not sr[al](add).
7163     break;
7164   }
7165
7166   // We require the RHS of the binop to be a constant and not opaque as well.
7167   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7168   if (!BinOpCst)
7169     return SDValue();
7170
7171   // FIXME: disable this unless the input to the binop is a shift by a constant
7172   // or is copy/select. Enable this in other cases when figure out it's exactly
7173   // profitable.
7174   SDValue BinOpLHSVal = LHS.getOperand(0);
7175   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7176                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7177                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7178                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7179   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7180                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7181
7182   if (!IsShiftByConstant && !IsCopyOrSelect)
7183     return SDValue();
7184
7185   if (IsCopyOrSelect && N->hasOneUse())
7186     return SDValue();
7187
7188   // Fold the constants, shifting the binop RHS by the shift amount.
7189   SDLoc DL(N);
7190   EVT VT = N->getValueType(0);
7191   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7192                                N->getOperand(1));
7193   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7194
7195   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7196                                  N->getOperand(1));
7197   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7198 }
7199
7200 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7201   assert(N->getOpcode() == ISD::TRUNCATE);
7202   assert(N->getOperand(0).getOpcode() == ISD::AND);
7203
7204   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7205   EVT TruncVT = N->getValueType(0);
7206   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7207       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7208     SDValue N01 = N->getOperand(0).getOperand(1);
7209     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7210       SDLoc DL(N);
7211       SDValue N00 = N->getOperand(0).getOperand(0);
7212       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7213       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7214       AddToWorklist(Trunc00.getNode());
7215       AddToWorklist(Trunc01.getNode());
7216       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7217     }
7218   }
7219
7220   return SDValue();
7221 }
7222
7223 SDValue DAGCombiner::visitRotate(SDNode *N) {
7224   SDLoc dl(N);
7225   SDValue N0 = N->getOperand(0);
7226   SDValue N1 = N->getOperand(1);
7227   EVT VT = N->getValueType(0);
7228   unsigned Bitsize = VT.getScalarSizeInBits();
7229
7230   // fold (rot x, 0) -> x
7231   if (isNullOrNullSplat(N1))
7232     return N0;
7233
7234   // fold (rot x, c) -> x iff (c % BitSize) == 0
7235   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7236     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7237     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7238       return N0;
7239   }
7240
7241   // fold (rot x, c) -> (rot x, c % BitSize)
7242   // TODO - support non-uniform vector amounts.
7243   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
7244     if (Cst->getAPIntValue().uge(Bitsize)) {
7245       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
7246       return DAG.getNode(N->getOpcode(), dl, VT, N0,
7247                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
7248     }
7249   }
7250
7251   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7252   if (N1.getOpcode() == ISD::TRUNCATE &&
7253       N1.getOperand(0).getOpcode() == ISD::AND) {
7254     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7255       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7256   }
7257
7258   unsigned NextOp = N0.getOpcode();
7259   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7260   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7261     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7262     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7263     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7264       EVT ShiftVT = C1->getValueType(0);
7265       bool SameSide = (N->getOpcode() == NextOp);
7266       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7267       if (SDValue CombinedShift =
7268               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
7269         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7270         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7271             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
7272             BitsizeC.getNode());
7273         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7274                            CombinedShiftNorm);
7275       }
7276     }
7277   }
7278   return SDValue();
7279 }
7280
7281 SDValue DAGCombiner::visitSHL(SDNode *N) {
7282   SDValue N0 = N->getOperand(0);
7283   SDValue N1 = N->getOperand(1);
7284   if (SDValue V = DAG.simplifyShift(N0, N1))
7285     return V;
7286
7287   EVT VT = N0.getValueType();
7288   EVT ShiftVT = N1.getValueType();
7289   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7290
7291   // fold vector ops
7292   if (VT.isVector()) {
7293     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7294       return FoldedVOp;
7295
7296     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7297     // If setcc produces all-one true value then:
7298     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7299     if (N1CV && N1CV->isConstant()) {
7300       if (N0.getOpcode() == ISD::AND) {
7301         SDValue N00 = N0->getOperand(0);
7302         SDValue N01 = N0->getOperand(1);
7303         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7304
7305         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7306             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7307                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7308           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
7309                                                      N01CV, N1CV))
7310             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7311         }
7312       }
7313     }
7314   }
7315
7316   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7317
7318   // fold (shl c1, c2) -> c1<<c2
7319   // TODO - support non-uniform vector shift amounts.
7320   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7321   if (N0C && N1C && !N1C->isOpaque())
7322     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
7323
7324   if (SDValue NewSel = foldBinOpIntoSelect(N))
7325     return NewSel;
7326
7327   // if (shl x, c) is known to be zero, return 0
7328   if (DAG.MaskedValueIsZero(SDValue(N, 0),
7329                             APInt::getAllOnesValue(OpSizeInBits)))
7330     return DAG.getConstant(0, SDLoc(N), VT);
7331
7332   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7333   if (N1.getOpcode() == ISD::TRUNCATE &&
7334       N1.getOperand(0).getOpcode() == ISD::AND) {
7335     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7336       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7337   }
7338
7339   // TODO - support non-uniform vector shift amounts.
7340   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7341     return SDValue(N, 0);
7342
7343   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7344   if (N0.getOpcode() == ISD::SHL) {
7345     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7346                                           ConstantSDNode *RHS) {
7347       APInt c1 = LHS->getAPIntValue();
7348       APInt c2 = RHS->getAPIntValue();
7349       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7350       return (c1 + c2).uge(OpSizeInBits);
7351     };
7352     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7353       return DAG.getConstant(0, SDLoc(N), VT);
7354
7355     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7356                                        ConstantSDNode *RHS) {
7357       APInt c1 = LHS->getAPIntValue();
7358       APInt c2 = RHS->getAPIntValue();
7359       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7360       return (c1 + c2).ult(OpSizeInBits);
7361     };
7362     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7363       SDLoc DL(N);
7364       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7365       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7366     }
7367   }
7368
7369   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7370   // For this to be valid, the second form must not preserve any of the bits
7371   // that are shifted out by the inner shift in the first form.  This means
7372   // the outer shift size must be >= the number of bits added by the ext.
7373   // As a corollary, we don't care what kind of ext it is.
7374   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7375        N0.getOpcode() == ISD::ANY_EXTEND ||
7376        N0.getOpcode() == ISD::SIGN_EXTEND) &&
7377       N0.getOperand(0).getOpcode() == ISD::SHL) {
7378     SDValue N0Op0 = N0.getOperand(0);
7379     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7380     EVT InnerVT = N0Op0.getValueType();
7381     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7382
7383     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7384                                                          ConstantSDNode *RHS) {
7385       APInt c1 = LHS->getAPIntValue();
7386       APInt c2 = RHS->getAPIntValue();
7387       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7388       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7389              (c1 + c2).uge(OpSizeInBits);
7390     };
7391     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7392                                   /*AllowUndefs*/ false,
7393                                   /*AllowTypeMismatch*/ true))
7394       return DAG.getConstant(0, SDLoc(N), VT);
7395
7396     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7397                                                       ConstantSDNode *RHS) {
7398       APInt c1 = LHS->getAPIntValue();
7399       APInt c2 = RHS->getAPIntValue();
7400       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7401       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7402              (c1 + c2).ult(OpSizeInBits);
7403     };
7404     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7405                                   /*AllowUndefs*/ false,
7406                                   /*AllowTypeMismatch*/ true)) {
7407       SDLoc DL(N);
7408       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7409       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7410       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7411       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7412     }
7413   }
7414
7415   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7416   // Only fold this if the inner zext has no other uses to avoid increasing
7417   // the total number of instructions.
7418   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7419       N0.getOperand(0).getOpcode() == ISD::SRL) {
7420     SDValue N0Op0 = N0.getOperand(0);
7421     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7422
7423     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7424       APInt c1 = LHS->getAPIntValue();
7425       APInt c2 = RHS->getAPIntValue();
7426       zeroExtendToMatch(c1, c2);
7427       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7428     };
7429     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7430                                   /*AllowUndefs*/ false,
7431                                   /*AllowTypeMismatch*/ true)) {
7432       SDLoc DL(N);
7433       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7434       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7435       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7436       AddToWorklist(NewSHL.getNode());
7437       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7438     }
7439   }
7440
7441   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
7442   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
7443   // TODO - support non-uniform vector shift amounts.
7444   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7445       N0->getFlags().hasExact()) {
7446     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7447       uint64_t C1 = N0C1->getZExtValue();
7448       uint64_t C2 = N1C->getZExtValue();
7449       SDLoc DL(N);
7450       if (C1 <= C2)
7451         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7452                            DAG.getConstant(C2 - C1, DL, ShiftVT));
7453       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7454                          DAG.getConstant(C1 - C2, DL, ShiftVT));
7455     }
7456   }
7457
7458   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7459   //                               (and (srl x, (sub c1, c2), MASK)
7460   // Only fold this if the inner shift has no other uses -- if it does, folding
7461   // this will increase the total number of instructions.
7462   // TODO - drop hasOneUse requirement if c1 == c2?
7463   // TODO - support non-uniform vector shift amounts.
7464   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7465       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7466     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7467       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7468         uint64_t c1 = N0C1->getZExtValue();
7469         uint64_t c2 = N1C->getZExtValue();
7470         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7471         SDValue Shift;
7472         if (c2 > c1) {
7473           Mask <<= c2 - c1;
7474           SDLoc DL(N);
7475           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7476                               DAG.getConstant(c2 - c1, DL, ShiftVT));
7477         } else {
7478           Mask.lshrInPlace(c1 - c2);
7479           SDLoc DL(N);
7480           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7481                               DAG.getConstant(c1 - c2, DL, ShiftVT));
7482         }
7483         SDLoc DL(N0);
7484         return DAG.getNode(ISD::AND, DL, VT, Shift,
7485                            DAG.getConstant(Mask, DL, VT));
7486       }
7487     }
7488   }
7489
7490   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7491   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7492       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7493     SDLoc DL(N);
7494     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7495     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7496     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7497   }
7498
7499   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7500   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7501   // Variant of version done on multiply, except mul by a power of 2 is turned
7502   // into a shift.
7503   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7504       N0.getNode()->hasOneUse() &&
7505       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7506       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7507       TLI.isDesirableToCommuteWithShift(N, Level)) {
7508     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7509     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7510     AddToWorklist(Shl0.getNode());
7511     AddToWorklist(Shl1.getNode());
7512     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7513   }
7514
7515   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7516   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7517       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7518       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7519     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7520     if (isConstantOrConstantVector(Shl))
7521       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7522   }
7523
7524   if (N1C && !N1C->isOpaque())
7525     if (SDValue NewSHL = visitShiftByConstant(N))
7526       return NewSHL;
7527
7528   return SDValue();
7529 }
7530
7531 SDValue DAGCombiner::visitSRA(SDNode *N) {
7532   SDValue N0 = N->getOperand(0);
7533   SDValue N1 = N->getOperand(1);
7534   if (SDValue V = DAG.simplifyShift(N0, N1))
7535     return V;
7536
7537   EVT VT = N0.getValueType();
7538   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7539
7540   // Arithmetic shifting an all-sign-bit value is a no-op.
7541   // fold (sra 0, x) -> 0
7542   // fold (sra -1, x) -> -1
7543   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
7544     return N0;
7545
7546   // fold vector ops
7547   if (VT.isVector())
7548     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7549       return FoldedVOp;
7550
7551   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7552
7553   // fold (sra c1, c2) -> (sra c1, c2)
7554   // TODO - support non-uniform vector shift amounts.
7555   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7556   if (N0C && N1C && !N1C->isOpaque())
7557     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
7558
7559   if (SDValue NewSel = foldBinOpIntoSelect(N))
7560     return NewSel;
7561
7562   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7563   // sext_inreg.
7564   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7565     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7566     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7567     if (VT.isVector())
7568       ExtVT = EVT::getVectorVT(*DAG.getContext(),
7569                                ExtVT, VT.getVectorNumElements());
7570     if ((!LegalOperations ||
7571          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
7572       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7573                          N0.getOperand(0), DAG.getValueType(ExtVT));
7574   }
7575
7576   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7577   // clamp (add c1, c2) to max shift.
7578   if (N0.getOpcode() == ISD::SRA) {
7579     SDLoc DL(N);
7580     EVT ShiftVT = N1.getValueType();
7581     EVT ShiftSVT = ShiftVT.getScalarType();
7582     SmallVector<SDValue, 16> ShiftValues;
7583
7584     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7585       APInt c1 = LHS->getAPIntValue();
7586       APInt c2 = RHS->getAPIntValue();
7587       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7588       APInt Sum = c1 + c2;
7589       unsigned ShiftSum =
7590           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7591       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7592       return true;
7593     };
7594     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7595       SDValue ShiftValue;
7596       if (VT.isVector())
7597         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7598       else
7599         ShiftValue = ShiftValues[0];
7600       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7601     }
7602   }
7603
7604   // fold (sra (shl X, m), (sub result_size, n))
7605   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7606   // result_size - n != m.
7607   // If truncate is free for the target sext(shl) is likely to result in better
7608   // code.
7609   if (N0.getOpcode() == ISD::SHL && N1C) {
7610     // Get the two constanst of the shifts, CN0 = m, CN = n.
7611     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7612     if (N01C) {
7613       LLVMContext &Ctx = *DAG.getContext();
7614       // Determine what the truncate's result bitsize and type would be.
7615       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7616
7617       if (VT.isVector())
7618         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7619
7620       // Determine the residual right-shift amount.
7621       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7622
7623       // If the shift is not a no-op (in which case this should be just a sign
7624       // extend already), the truncated to type is legal, sign_extend is legal
7625       // on that type, and the truncate to that type is both legal and free,
7626       // perform the transform.
7627       if ((ShiftAmt > 0) &&
7628           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
7629           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
7630           TLI.isTruncateFree(VT, TruncVT)) {
7631         SDLoc DL(N);
7632         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7633             getShiftAmountTy(N0.getOperand(0).getValueType()));
7634         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7635                                     N0.getOperand(0), Amt);
7636         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7637                                     Shift);
7638         return DAG.getNode(ISD::SIGN_EXTEND, DL,
7639                            N->getValueType(0), Trunc);
7640       }
7641     }
7642   }
7643
7644   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
7645   //   sra (add (shl X, N1C), AddC), N1C -->
7646   //   sext (add (trunc X to (width - N1C)), AddC')
7647   if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
7648       N0.getOperand(0).getOpcode() == ISD::SHL &&
7649       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
7650     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
7651       SDValue Shl = N0.getOperand(0);
7652       // Determine what the truncate's type would be and ask the target if that
7653       // is a free operation.
7654       LLVMContext &Ctx = *DAG.getContext();
7655       unsigned ShiftAmt = N1C->getZExtValue();
7656       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
7657       if (VT.isVector())
7658         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7659
7660       // TODO: The simple type check probably belongs in the default hook
7661       //       implementation and/or target-specific overrides (because
7662       //       non-simple types likely require masking when legalized), but that
7663       //       restriction may conflict with other transforms.
7664       if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
7665         SDLoc DL(N);
7666         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
7667         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
7668                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
7669         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
7670         return DAG.getSExtOrTrunc(Add, DL, VT);
7671       }
7672     }
7673   }
7674
7675   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7676   if (N1.getOpcode() == ISD::TRUNCATE &&
7677       N1.getOperand(0).getOpcode() == ISD::AND) {
7678     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7679       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7680   }
7681
7682   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
7683   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7684   //      if c1 is equal to the number of bits the trunc removes
7685   // TODO - support non-uniform vector shift amounts.
7686   if (N0.getOpcode() == ISD::TRUNCATE &&
7687       (N0.getOperand(0).getOpcode() == ISD::SRL ||
7688        N0.getOperand(0).getOpcode() == ISD::SRA) &&
7689       N0.getOperand(0).hasOneUse() &&
7690       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
7691     SDValue N0Op0 = N0.getOperand(0);
7692     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7693       EVT LargeVT = N0Op0.getValueType();
7694       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
7695       if (LargeShift->getAPIntValue() == TruncBits) {
7696         SDLoc DL(N);
7697         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
7698                                       getShiftAmountTy(LargeVT));
7699         SDValue SRA =
7700             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
7701         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7702       }
7703     }
7704   }
7705
7706   // Simplify, based on bits shifted out of the LHS.
7707   // TODO - support non-uniform vector shift amounts.
7708   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7709     return SDValue(N, 0);
7710
7711   // If the sign bit is known to be zero, switch this to a SRL.
7712   if (DAG.SignBitIsZero(N0))
7713     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7714
7715   if (N1C && !N1C->isOpaque())
7716     if (SDValue NewSRA = visitShiftByConstant(N))
7717       return NewSRA;
7718
7719   return SDValue();
7720 }
7721
7722 SDValue DAGCombiner::visitSRL(SDNode *N) {
7723   SDValue N0 = N->getOperand(0);
7724   SDValue N1 = N->getOperand(1);
7725   if (SDValue V = DAG.simplifyShift(N0, N1))
7726     return V;
7727
7728   EVT VT = N0.getValueType();
7729   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7730
7731   // fold vector ops
7732   if (VT.isVector())
7733     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7734       return FoldedVOp;
7735
7736   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7737
7738   // fold (srl c1, c2) -> c1 >>u c2
7739   // TODO - support non-uniform vector shift amounts.
7740   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7741   if (N0C && N1C && !N1C->isOpaque())
7742     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
7743
7744   if (SDValue NewSel = foldBinOpIntoSelect(N))
7745     return NewSel;
7746
7747   // if (srl x, c) is known to be zero, return 0
7748   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7749                                    APInt::getAllOnesValue(OpSizeInBits)))
7750     return DAG.getConstant(0, SDLoc(N), VT);
7751
7752   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7753   if (N0.getOpcode() == ISD::SRL) {
7754     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7755                                           ConstantSDNode *RHS) {
7756       APInt c1 = LHS->getAPIntValue();
7757       APInt c2 = RHS->getAPIntValue();
7758       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7759       return (c1 + c2).uge(OpSizeInBits);
7760     };
7761     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7762       return DAG.getConstant(0, SDLoc(N), VT);
7763
7764     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7765                                        ConstantSDNode *RHS) {
7766       APInt c1 = LHS->getAPIntValue();
7767       APInt c2 = RHS->getAPIntValue();
7768       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7769       return (c1 + c2).ult(OpSizeInBits);
7770     };
7771     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7772       SDLoc DL(N);
7773       EVT ShiftVT = N1.getValueType();
7774       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7775       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7776     }
7777   }
7778
7779   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7780   // TODO - support non-uniform vector shift amounts.
7781   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7782       N0.getOperand(0).getOpcode() == ISD::SRL) {
7783     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7784       uint64_t c1 = N001C->getZExtValue();
7785       uint64_t c2 = N1C->getZExtValue();
7786       EVT InnerShiftVT = N0.getOperand(0).getValueType();
7787       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7788       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7789       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7790       if (c1 + OpSizeInBits == InnerShiftSize) {
7791         SDLoc DL(N0);
7792         if (c1 + c2 >= InnerShiftSize)
7793           return DAG.getConstant(0, DL, VT);
7794         return DAG.getNode(ISD::TRUNCATE, DL, VT,
7795                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
7796                                        N0.getOperand(0).getOperand(0),
7797                                        DAG.getConstant(c1 + c2, DL,
7798                                                        ShiftCountVT)));
7799       }
7800     }
7801   }
7802
7803   // fold (srl (shl x, c), c) -> (and x, cst2)
7804   // TODO - (srl (shl x, c1), c2).
7805   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
7806       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
7807     SDLoc DL(N);
7808     SDValue Mask =
7809         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
7810     AddToWorklist(Mask.getNode());
7811     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
7812   }
7813
7814   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
7815   // TODO - support non-uniform vector shift amounts.
7816   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
7817     // Shifting in all undef bits?
7818     EVT SmallVT = N0.getOperand(0).getValueType();
7819     unsigned BitSize = SmallVT.getScalarSizeInBits();
7820     if (N1C->getAPIntValue().uge(BitSize))
7821       return DAG.getUNDEF(VT);
7822
7823     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
7824       uint64_t ShiftAmt = N1C->getZExtValue();
7825       SDLoc DL0(N0);
7826       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
7827                                        N0.getOperand(0),
7828                           DAG.getConstant(ShiftAmt, DL0,
7829                                           getShiftAmountTy(SmallVT)));
7830       AddToWorklist(SmallShift.getNode());
7831       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
7832       SDLoc DL(N);
7833       return DAG.getNode(ISD::AND, DL, VT,
7834                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
7835                          DAG.getConstant(Mask, DL, VT));
7836     }
7837   }
7838
7839   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
7840   // bit, which is unmodified by sra.
7841   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
7842     if (N0.getOpcode() == ISD::SRA)
7843       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
7844   }
7845
7846   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
7847   if (N1C && N0.getOpcode() == ISD::CTLZ &&
7848       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
7849     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
7850
7851     // If any of the input bits are KnownOne, then the input couldn't be all
7852     // zeros, thus the result of the srl will always be zero.
7853     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
7854
7855     // If all of the bits input the to ctlz node are known to be zero, then
7856     // the result of the ctlz is "32" and the result of the shift is one.
7857     APInt UnknownBits = ~Known.Zero;
7858     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
7859
7860     // Otherwise, check to see if there is exactly one bit input to the ctlz.
7861     if (UnknownBits.isPowerOf2()) {
7862       // Okay, we know that only that the single bit specified by UnknownBits
7863       // could be set on input to the CTLZ node. If this bit is set, the SRL
7864       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
7865       // to an SRL/XOR pair, which is likely to simplify more.
7866       unsigned ShAmt = UnknownBits.countTrailingZeros();
7867       SDValue Op = N0.getOperand(0);
7868
7869       if (ShAmt) {
7870         SDLoc DL(N0);
7871         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
7872                   DAG.getConstant(ShAmt, DL,
7873                                   getShiftAmountTy(Op.getValueType())));
7874         AddToWorklist(Op.getNode());
7875       }
7876
7877       SDLoc DL(N);
7878       return DAG.getNode(ISD::XOR, DL, VT,
7879                          Op, DAG.getConstant(1, DL, VT));
7880     }
7881   }
7882
7883   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
7884   if (N1.getOpcode() == ISD::TRUNCATE &&
7885       N1.getOperand(0).getOpcode() == ISD::AND) {
7886     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7887       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
7888   }
7889
7890   // fold operands of srl based on knowledge that the low bits are not
7891   // demanded.
7892   // TODO - support non-uniform vector shift amounts.
7893   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7894     return SDValue(N, 0);
7895
7896   if (N1C && !N1C->isOpaque())
7897     if (SDValue NewSRL = visitShiftByConstant(N))
7898       return NewSRL;
7899
7900   // Attempt to convert a srl of a load into a narrower zero-extending load.
7901   if (SDValue NarrowLoad = ReduceLoadWidth(N))
7902     return NarrowLoad;
7903
7904   // Here is a common situation. We want to optimize:
7905   //
7906   //   %a = ...
7907   //   %b = and i32 %a, 2
7908   //   %c = srl i32 %b, 1
7909   //   brcond i32 %c ...
7910   //
7911   // into
7912   //
7913   //   %a = ...
7914   //   %b = and %a, 2
7915   //   %c = setcc eq %b, 0
7916   //   brcond %c ...
7917   //
7918   // However when after the source operand of SRL is optimized into AND, the SRL
7919   // itself may not be optimized further. Look for it and add the BRCOND into
7920   // the worklist.
7921   if (N->hasOneUse()) {
7922     SDNode *Use = *N->use_begin();
7923     if (Use->getOpcode() == ISD::BRCOND)
7924       AddToWorklist(Use);
7925     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
7926       // Also look pass the truncate.
7927       Use = *Use->use_begin();
7928       if (Use->getOpcode() == ISD::BRCOND)
7929         AddToWorklist(Use);
7930     }
7931   }
7932
7933   return SDValue();
7934 }
7935
7936 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
7937   EVT VT = N->getValueType(0);
7938   SDValue N0 = N->getOperand(0);
7939   SDValue N1 = N->getOperand(1);
7940   SDValue N2 = N->getOperand(2);
7941   bool IsFSHL = N->getOpcode() == ISD::FSHL;
7942   unsigned BitWidth = VT.getScalarSizeInBits();
7943
7944   // fold (fshl N0, N1, 0) -> N0
7945   // fold (fshr N0, N1, 0) -> N1
7946   if (isPowerOf2_32(BitWidth))
7947     if (DAG.MaskedValueIsZero(
7948             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
7949       return IsFSHL ? N0 : N1;
7950
7951   auto IsUndefOrZero = [](SDValue V) {
7952     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
7953   };
7954
7955   // TODO - support non-uniform vector shift amounts.
7956   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
7957     EVT ShAmtTy = N2.getValueType();
7958
7959     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
7960     if (Cst->getAPIntValue().uge(BitWidth)) {
7961       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
7962       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
7963                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
7964     }
7965
7966     unsigned ShAmt = Cst->getZExtValue();
7967     if (ShAmt == 0)
7968       return IsFSHL ? N0 : N1;
7969
7970     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
7971     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
7972     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
7973     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
7974     if (IsUndefOrZero(N0))
7975       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
7976                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
7977                                          SDLoc(N), ShAmtTy));
7978     if (IsUndefOrZero(N1))
7979       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
7980                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
7981                                          SDLoc(N), ShAmtTy));
7982   }
7983
7984   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
7985   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
7986   // iff We know the shift amount is in range.
7987   // TODO: when is it worth doing SUB(BW, N2) as well?
7988   if (isPowerOf2_32(BitWidth)) {
7989     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
7990     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7991       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
7992     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
7993       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
7994   }
7995
7996   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
7997   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
7998   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
7999   // is legal as well we might be better off avoiding non-constant (BW - N2).
8000   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8001   if (N0 == N1 && hasOperation(RotOpc, VT))
8002     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8003
8004   // Simplify, based on bits shifted out of N0/N1.
8005   if (SimplifyDemandedBits(SDValue(N, 0)))
8006     return SDValue(N, 0);
8007
8008   return SDValue();
8009 }
8010
8011 SDValue DAGCombiner::visitABS(SDNode *N) {
8012   SDValue N0 = N->getOperand(0);
8013   EVT VT = N->getValueType(0);
8014
8015   // fold (abs c1) -> c2
8016   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8017     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8018   // fold (abs (abs x)) -> (abs x)
8019   if (N0.getOpcode() == ISD::ABS)
8020     return N0;
8021   // fold (abs x) -> x iff not-negative
8022   if (DAG.SignBitIsZero(N0))
8023     return N0;
8024   return SDValue();
8025 }
8026
8027 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8028   SDValue N0 = N->getOperand(0);
8029   EVT VT = N->getValueType(0);
8030
8031   // fold (bswap c1) -> c2
8032   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8033     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8034   // fold (bswap (bswap x)) -> x
8035   if (N0.getOpcode() == ISD::BSWAP)
8036     return N0->getOperand(0);
8037   return SDValue();
8038 }
8039
8040 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8041   SDValue N0 = N->getOperand(0);
8042   EVT VT = N->getValueType(0);
8043
8044   // fold (bitreverse c1) -> c2
8045   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8046     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8047   // fold (bitreverse (bitreverse x)) -> x
8048   if (N0.getOpcode() == ISD::BITREVERSE)
8049     return N0.getOperand(0);
8050   return SDValue();
8051 }
8052
8053 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8054   SDValue N0 = N->getOperand(0);
8055   EVT VT = N->getValueType(0);
8056
8057   // fold (ctlz c1) -> c2
8058   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8059     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8060
8061   // If the value is known never to be zero, switch to the undef version.
8062   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8063     if (DAG.isKnownNeverZero(N0))
8064       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8065   }
8066
8067   return SDValue();
8068 }
8069
8070 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8071   SDValue N0 = N->getOperand(0);
8072   EVT VT = N->getValueType(0);
8073
8074   // fold (ctlz_zero_undef c1) -> c2
8075   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8076     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8077   return SDValue();
8078 }
8079
8080 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8081   SDValue N0 = N->getOperand(0);
8082   EVT VT = N->getValueType(0);
8083
8084   // fold (cttz c1) -> c2
8085   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8086     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8087
8088   // If the value is known never to be zero, switch to the undef version.
8089   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8090     if (DAG.isKnownNeverZero(N0))
8091       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8092   }
8093
8094   return SDValue();
8095 }
8096
8097 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8098   SDValue N0 = N->getOperand(0);
8099   EVT VT = N->getValueType(0);
8100
8101   // fold (cttz_zero_undef c1) -> c2
8102   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8103     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8104   return SDValue();
8105 }
8106
8107 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8108   SDValue N0 = N->getOperand(0);
8109   EVT VT = N->getValueType(0);
8110
8111   // fold (ctpop c1) -> c2
8112   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8113     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8114   return SDValue();
8115 }
8116
8117 // FIXME: This should be checking for no signed zeros on individual operands, as
8118 // well as no nans.
8119 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8120                                          SDValue RHS,
8121                                          const TargetLowering &TLI) {
8122   const TargetOptions &Options = DAG.getTarget().Options;
8123   EVT VT = LHS.getValueType();
8124
8125   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8126          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8127          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8128 }
8129
8130 /// Generate Min/Max node
8131 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8132                                    SDValue RHS, SDValue True, SDValue False,
8133                                    ISD::CondCode CC, const TargetLowering &TLI,
8134                                    SelectionDAG &DAG) {
8135   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8136     return SDValue();
8137
8138   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8139   switch (CC) {
8140   case ISD::SETOLT:
8141   case ISD::SETOLE:
8142   case ISD::SETLT:
8143   case ISD::SETLE:
8144   case ISD::SETULT:
8145   case ISD::SETULE: {
8146     // Since it's known never nan to get here already, either fminnum or
8147     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8148     // expanded in terms of it.
8149     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8150     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8151       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8152
8153     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8154     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8155       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8156     return SDValue();
8157   }
8158   case ISD::SETOGT:
8159   case ISD::SETOGE:
8160   case ISD::SETGT:
8161   case ISD::SETGE:
8162   case ISD::SETUGT:
8163   case ISD::SETUGE: {
8164     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8165     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8166       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8167
8168     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8169     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8170       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8171     return SDValue();
8172   }
8173   default:
8174     return SDValue();
8175   }
8176 }
8177
8178 /// If a (v)select has a condition value that is a sign-bit test, try to smear
8179 /// the condition operand sign-bit across the value width and use it as a mask.
8180 static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
8181   SDValue Cond = N->getOperand(0);
8182   SDValue C1 = N->getOperand(1);
8183   SDValue C2 = N->getOperand(2);
8184   assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
8185          "Expected select-of-constants");
8186
8187   EVT VT = N->getValueType(0);
8188   if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
8189       VT != Cond.getOperand(0).getValueType())
8190     return SDValue();
8191
8192   // The inverted-condition + commuted-select variants of these patterns are
8193   // canonicalized to these forms in IR.
8194   SDValue X = Cond.getOperand(0);
8195   SDValue CondC = Cond.getOperand(1);
8196   ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
8197   if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
8198       isAllOnesOrAllOnesSplat(C2)) {
8199     // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
8200     SDLoc DL(N);
8201     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8202     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8203     return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
8204   }
8205   if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
8206     // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
8207     SDLoc DL(N);
8208     SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
8209     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
8210     return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
8211   }
8212   return SDValue();
8213 }
8214
8215 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8216   SDValue Cond = N->getOperand(0);
8217   SDValue N1 = N->getOperand(1);
8218   SDValue N2 = N->getOperand(2);
8219   EVT VT = N->getValueType(0);
8220   EVT CondVT = Cond.getValueType();
8221   SDLoc DL(N);
8222
8223   if (!VT.isInteger())
8224     return SDValue();
8225
8226   auto *C1 = dyn_cast<ConstantSDNode>(N1);
8227   auto *C2 = dyn_cast<ConstantSDNode>(N2);
8228   if (!C1 || !C2)
8229     return SDValue();
8230
8231   // Only do this before legalization to avoid conflicting with target-specific
8232   // transforms in the other direction (create a select from a zext/sext). There
8233   // is also a target-independent combine here in DAGCombiner in the other
8234   // direction for (select Cond, -1, 0) when the condition is not i1.
8235   if (CondVT == MVT::i1 && !LegalOperations) {
8236     if (C1->isNullValue() && C2->isOne()) {
8237       // select Cond, 0, 1 --> zext (!Cond)
8238       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8239       if (VT != MVT::i1)
8240         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8241       return NotCond;
8242     }
8243     if (C1->isNullValue() && C2->isAllOnesValue()) {
8244       // select Cond, 0, -1 --> sext (!Cond)
8245       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8246       if (VT != MVT::i1)
8247         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8248       return NotCond;
8249     }
8250     if (C1->isOne() && C2->isNullValue()) {
8251       // select Cond, 1, 0 --> zext (Cond)
8252       if (VT != MVT::i1)
8253         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8254       return Cond;
8255     }
8256     if (C1->isAllOnesValue() && C2->isNullValue()) {
8257       // select Cond, -1, 0 --> sext (Cond)
8258       if (VT != MVT::i1)
8259         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8260       return Cond;
8261     }
8262
8263     // Use a target hook because some targets may prefer to transform in the
8264     // other direction.
8265     if (TLI.convertSelectOfConstantsToMath(VT)) {
8266       // For any constants that differ by 1, we can transform the select into an
8267       // extend and add.
8268       const APInt &C1Val = C1->getAPIntValue();
8269       const APInt &C2Val = C2->getAPIntValue();
8270       if (C1Val - 1 == C2Val) {
8271         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8272         if (VT != MVT::i1)
8273           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8274         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8275       }
8276       if (C1Val + 1 == C2Val) {
8277         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8278         if (VT != MVT::i1)
8279           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8280         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8281       }
8282
8283       // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
8284       if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
8285         if (VT != MVT::i1)
8286           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8287         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
8288         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
8289       }
8290
8291       if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8292         return V;
8293     }
8294
8295     return SDValue();
8296   }
8297
8298   // fold (select Cond, 0, 1) -> (xor Cond, 1)
8299   // We can't do this reliably if integer based booleans have different contents
8300   // to floating point based booleans. This is because we can't tell whether we
8301   // have an integer-based boolean or a floating-point-based boolean unless we
8302   // can find the SETCC that produced it and inspect its operands. This is
8303   // fairly easy if C is the SETCC node, but it can potentially be
8304   // undiscoverable (or not reasonably discoverable). For example, it could be
8305   // in another basic block or it could require searching a complicated
8306   // expression.
8307   if (CondVT.isInteger() &&
8308       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8309           TargetLowering::ZeroOrOneBooleanContent &&
8310       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8311           TargetLowering::ZeroOrOneBooleanContent &&
8312       C1->isNullValue() && C2->isOne()) {
8313     SDValue NotCond =
8314         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8315     if (VT.bitsEq(CondVT))
8316       return NotCond;
8317     return DAG.getZExtOrTrunc(NotCond, DL, VT);
8318   }
8319
8320   return SDValue();
8321 }
8322
8323 SDValue DAGCombiner::visitSELECT(SDNode *N) {
8324   SDValue N0 = N->getOperand(0);
8325   SDValue N1 = N->getOperand(1);
8326   SDValue N2 = N->getOperand(2);
8327   EVT VT = N->getValueType(0);
8328   EVT VT0 = N0.getValueType();
8329   SDLoc DL(N);
8330   SDNodeFlags Flags = N->getFlags();
8331
8332   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8333     return V;
8334
8335   // fold (select X, X, Y) -> (or X, Y)
8336   // fold (select X, 1, Y) -> (or C, Y)
8337   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8338     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8339
8340   if (SDValue V = foldSelectOfConstants(N))
8341     return V;
8342
8343   // fold (select C, 0, X) -> (and (not C), X)
8344   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8345     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8346     AddToWorklist(NOTNode.getNode());
8347     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8348   }
8349   // fold (select C, X, 1) -> (or (not C), X)
8350   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8351     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8352     AddToWorklist(NOTNode.getNode());
8353     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8354   }
8355   // fold (select X, Y, X) -> (and X, Y)
8356   // fold (select X, Y, 0) -> (and X, Y)
8357   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8358     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8359
8360   // If we can fold this based on the true/false value, do so.
8361   if (SimplifySelectOps(N, N1, N2))
8362     return SDValue(N, 0); // Don't revisit N.
8363
8364   if (VT0 == MVT::i1) {
8365     // The code in this block deals with the following 2 equivalences:
8366     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8367     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8368     // The target can specify its preferred form with the
8369     // shouldNormalizeToSelectSequence() callback. However we always transform
8370     // to the right anyway if we find the inner select exists in the DAG anyway
8371     // and we always transform to the left side if we know that we can further
8372     // optimize the combination of the conditions.
8373     bool normalizeToSequence =
8374         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8375     // select (and Cond0, Cond1), X, Y
8376     //   -> select Cond0, (select Cond1, X, Y), Y
8377     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8378       SDValue Cond0 = N0->getOperand(0);
8379       SDValue Cond1 = N0->getOperand(1);
8380       SDValue InnerSelect =
8381           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8382       if (normalizeToSequence || !InnerSelect.use_empty())
8383         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8384                            InnerSelect, N2, Flags);
8385       // Cleanup on failure.
8386       if (InnerSelect.use_empty())
8387         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8388     }
8389     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8390     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8391       SDValue Cond0 = N0->getOperand(0);
8392       SDValue Cond1 = N0->getOperand(1);
8393       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8394                                         Cond1, N1, N2, Flags);
8395       if (normalizeToSequence || !InnerSelect.use_empty())
8396         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8397                            InnerSelect, Flags);
8398       // Cleanup on failure.
8399       if (InnerSelect.use_empty())
8400         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8401     }
8402
8403     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8404     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8405       SDValue N1_0 = N1->getOperand(0);
8406       SDValue N1_1 = N1->getOperand(1);
8407       SDValue N1_2 = N1->getOperand(2);
8408       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
8409         // Create the actual and node if we can generate good code for it.
8410         if (!normalizeToSequence) {
8411           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
8412           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
8413                              N2, Flags);
8414         }
8415         // Otherwise see if we can optimize the "and" to a better pattern.
8416         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
8417           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
8418                              N2, Flags);
8419         }
8420       }
8421     }
8422     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
8423     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
8424       SDValue N2_0 = N2->getOperand(0);
8425       SDValue N2_1 = N2->getOperand(1);
8426       SDValue N2_2 = N2->getOperand(2);
8427       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
8428         // Create the actual or node if we can generate good code for it.
8429         if (!normalizeToSequence) {
8430           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
8431           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
8432                              N2_2, Flags);
8433         }
8434         // Otherwise see if we can optimize to a better pattern.
8435         if (SDValue Combined = visitORLike(N0, N2_0, N))
8436           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
8437                              N2_2, Flags);
8438       }
8439     }
8440   }
8441
8442   // select (not Cond), N1, N2 -> select Cond, N2, N1
8443   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
8444     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
8445     SelectOp->setFlags(Flags);
8446     return SelectOp;
8447   }
8448
8449   // Fold selects based on a setcc into other things, such as min/max/abs.
8450   if (N0.getOpcode() == ISD::SETCC) {
8451     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
8452     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8453
8454     // select (fcmp lt x, y), x, y -> fminnum x, y
8455     // select (fcmp gt x, y), x, y -> fmaxnum x, y
8456     //
8457     // This is OK if we don't care what happens if either operand is a NaN.
8458     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
8459       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
8460                                                 CC, TLI, DAG))
8461         return FMinMax;
8462
8463     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
8464     // This is conservatively limited to pre-legal-operations to give targets
8465     // a chance to reverse the transform if they want to do that. Also, it is
8466     // unlikely that the pattern would be formed late, so it's probably not
8467     // worth going through the other checks.
8468     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
8469         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
8470         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
8471       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
8472       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
8473       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
8474         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
8475         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
8476         //
8477         // The IR equivalent of this transform would have this form:
8478         //   %a = add %x, C
8479         //   %c = icmp ugt %x, ~C
8480         //   %r = select %c, -1, %a
8481         //   =>
8482         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
8483         //   %u0 = extractvalue %u, 0
8484         //   %u1 = extractvalue %u, 1
8485         //   %r = select %u1, -1, %u0
8486         SDVTList VTs = DAG.getVTList(VT, VT0);
8487         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
8488         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
8489       }
8490     }
8491
8492     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
8493         (!LegalOperations &&
8494          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
8495       // Any flags available in a select/setcc fold will be on the setcc as they
8496       // migrated from fcmp
8497       Flags = N0.getNode()->getFlags();
8498       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
8499                                        N2, N0.getOperand(2));
8500       SelectNode->setFlags(Flags);
8501       return SelectNode;
8502     }
8503
8504     return SimplifySelect(DL, N0, N1, N2);
8505   }
8506
8507   return SDValue();
8508 }
8509
8510 // This function assumes all the vselect's arguments are CONCAT_VECTOR
8511 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
8512 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
8513   SDLoc DL(N);
8514   SDValue Cond = N->getOperand(0);
8515   SDValue LHS = N->getOperand(1);
8516   SDValue RHS = N->getOperand(2);
8517   EVT VT = N->getValueType(0);
8518   int NumElems = VT.getVectorNumElements();
8519   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
8520          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
8521          Cond.getOpcode() == ISD::BUILD_VECTOR);
8522
8523   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
8524   // binary ones here.
8525   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
8526     return SDValue();
8527
8528   // We're sure we have an even number of elements due to the
8529   // concat_vectors we have as arguments to vselect.
8530   // Skip BV elements until we find one that's not an UNDEF
8531   // After we find an UNDEF element, keep looping until we get to half the
8532   // length of the BV and see if all the non-undef nodes are the same.
8533   ConstantSDNode *BottomHalf = nullptr;
8534   for (int i = 0; i < NumElems / 2; ++i) {
8535     if (Cond->getOperand(i)->isUndef())
8536       continue;
8537
8538     if (BottomHalf == nullptr)
8539       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8540     else if (Cond->getOperand(i).getNode() != BottomHalf)
8541       return SDValue();
8542   }
8543
8544   // Do the same for the second half of the BuildVector
8545   ConstantSDNode *TopHalf = nullptr;
8546   for (int i = NumElems / 2; i < NumElems; ++i) {
8547     if (Cond->getOperand(i)->isUndef())
8548       continue;
8549
8550     if (TopHalf == nullptr)
8551       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8552     else if (Cond->getOperand(i).getNode() != TopHalf)
8553       return SDValue();
8554   }
8555
8556   assert(TopHalf && BottomHalf &&
8557          "One half of the selector was all UNDEFs and the other was all the "
8558          "same value. This should have been addressed before this function.");
8559   return DAG.getNode(
8560       ISD::CONCAT_VECTORS, DL, VT,
8561       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
8562       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
8563 }
8564
8565 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
8566   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8567   SDValue Mask = MSC->getMask();
8568   SDValue Chain = MSC->getChain();
8569   SDLoc DL(N);
8570
8571   // Zap scatters with a zero mask.
8572   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8573     return Chain;
8574
8575   return SDValue();
8576 }
8577
8578 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
8579   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8580   SDValue Mask = MST->getMask();
8581   SDValue Chain = MST->getChain();
8582   SDLoc DL(N);
8583
8584   // Zap masked stores with a zero mask.
8585   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8586     return Chain;
8587
8588   return SDValue();
8589 }
8590
8591 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8592   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8593   SDValue Mask = MGT->getMask();
8594   SDLoc DL(N);
8595
8596   // Zap gathers with a zero mask.
8597   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8598     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
8599
8600   return SDValue();
8601 }
8602
8603 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8604   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
8605   SDValue Mask = MLD->getMask();
8606   SDLoc DL(N);
8607
8608   // Zap masked loads with a zero mask.
8609   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8610     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
8611
8612   return SDValue();
8613 }
8614
8615 /// A vector select of 2 constant vectors can be simplified to math/logic to
8616 /// avoid a variable select instruction and possibly avoid constant loads.
8617 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8618   SDValue Cond = N->getOperand(0);
8619   SDValue N1 = N->getOperand(1);
8620   SDValue N2 = N->getOperand(2);
8621   EVT VT = N->getValueType(0);
8622   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8623       !TLI.convertSelectOfConstantsToMath(VT) ||
8624       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8625       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8626     return SDValue();
8627
8628   // Check if we can use the condition value to increment/decrement a single
8629   // constant value. This simplifies a select to an add and removes a constant
8630   // load/materialization from the general case.
8631   bool AllAddOne = true;
8632   bool AllSubOne = true;
8633   unsigned Elts = VT.getVectorNumElements();
8634   for (unsigned i = 0; i != Elts; ++i) {
8635     SDValue N1Elt = N1.getOperand(i);
8636     SDValue N2Elt = N2.getOperand(i);
8637     if (N1Elt.isUndef() || N2Elt.isUndef())
8638       continue;
8639
8640     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8641     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8642     if (C1 != C2 + 1)
8643       AllAddOne = false;
8644     if (C1 != C2 - 1)
8645       AllSubOne = false;
8646   }
8647
8648   // Further simplifications for the extra-special cases where the constants are
8649   // all 0 or all -1 should be implemented as folds of these patterns.
8650   SDLoc DL(N);
8651   if (AllAddOne || AllSubOne) {
8652     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8653     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8654     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8655     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8656     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8657   }
8658
8659   // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
8660   APInt Pow2C;
8661   if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
8662       isNullOrNullSplat(N2)) {
8663     SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
8664     SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
8665     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
8666   }
8667
8668   if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
8669     return V;
8670
8671   // The general case for select-of-constants:
8672   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8673   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8674   // leave that to a machine-specific pass.
8675   return SDValue();
8676 }
8677
8678 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8679   SDValue N0 = N->getOperand(0);
8680   SDValue N1 = N->getOperand(1);
8681   SDValue N2 = N->getOperand(2);
8682   EVT VT = N->getValueType(0);
8683   SDLoc DL(N);
8684
8685   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8686     return V;
8687
8688   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8689   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
8690     return DAG.getSelect(DL, VT, F, N2, N1);
8691
8692   // Canonicalize integer abs.
8693   // vselect (setg[te] X,  0),  X, -X ->
8694   // vselect (setgt    X, -1),  X, -X ->
8695   // vselect (setl[te] X,  0), -X,  X ->
8696   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8697   if (N0.getOpcode() == ISD::SETCC) {
8698     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8699     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8700     bool isAbs = false;
8701     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8702
8703     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8704          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8705         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8706       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8707     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8708              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8709       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8710
8711     if (isAbs) {
8712       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8713         return DAG.getNode(ISD::ABS, DL, VT, LHS);
8714
8715       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
8716                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
8717                                                   DL, getShiftAmountTy(VT)));
8718       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8719       AddToWorklist(Shift.getNode());
8720       AddToWorklist(Add.getNode());
8721       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8722     }
8723
8724     // vselect x, y (fcmp lt x, y) -> fminnum x, y
8725     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8726     //
8727     // This is OK if we don't care about what happens if either operand is a
8728     // NaN.
8729     //
8730     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
8731       if (SDValue FMinMax =
8732               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
8733         return FMinMax;
8734     }
8735
8736     // If this select has a condition (setcc) with narrower operands than the
8737     // select, try to widen the compare to match the select width.
8738     // TODO: This should be extended to handle any constant.
8739     // TODO: This could be extended to handle non-loading patterns, but that
8740     //       requires thorough testing to avoid regressions.
8741     if (isNullOrNullSplat(RHS)) {
8742       EVT NarrowVT = LHS.getValueType();
8743       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8744       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8745       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8746       unsigned WideWidth = WideVT.getScalarSizeInBits();
8747       bool IsSigned = isSignedIntSetCC(CC);
8748       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8749       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8750           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8751           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8752           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8753         // Both compare operands can be widened for free. The LHS can use an
8754         // extended load, and the RHS is a constant:
8755         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8756         //   vselect (setcc extload(X), C'), N1, N2
8757         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8758         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8759         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8760         EVT WideSetCCVT = getSetCCResultType(WideVT);
8761         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8762         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8763       }
8764     }
8765   }
8766
8767   if (SimplifySelectOps(N, N1, N2))
8768     return SDValue(N, 0);  // Don't revisit N.
8769
8770   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8771   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8772     return N1;
8773   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8774   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8775     return N2;
8776
8777   // The ConvertSelectToConcatVector function is assuming both the above
8778   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8779   // and addressed.
8780   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8781       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8782       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8783     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8784       return CV;
8785   }
8786
8787   if (SDValue V = foldVSelectOfConstants(N))
8788     return V;
8789
8790   return SDValue();
8791 }
8792
8793 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8794   SDValue N0 = N->getOperand(0);
8795   SDValue N1 = N->getOperand(1);
8796   SDValue N2 = N->getOperand(2);
8797   SDValue N3 = N->getOperand(3);
8798   SDValue N4 = N->getOperand(4);
8799   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8800
8801   // fold select_cc lhs, rhs, x, x, cc -> x
8802   if (N2 == N3)
8803     return N2;
8804
8805   // Determine if the condition we're dealing with is constant
8806   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8807                                   CC, SDLoc(N), false)) {
8808     AddToWorklist(SCC.getNode());
8809
8810     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8811       if (!SCCC->isNullValue())
8812         return N2;    // cond always true -> true val
8813       else
8814         return N3;    // cond always false -> false val
8815     } else if (SCC->isUndef()) {
8816       // When the condition is UNDEF, just return the first operand. This is
8817       // coherent the DAG creation, no setcc node is created in this case
8818       return N2;
8819     } else if (SCC.getOpcode() == ISD::SETCC) {
8820       // Fold to a simpler select_cc
8821       SDValue SelectOp = DAG.getNode(
8822           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
8823           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
8824       SelectOp->setFlags(SCC->getFlags());
8825       return SelectOp;
8826     }
8827   }
8828
8829   // If we can fold this based on the true/false value, do so.
8830   if (SimplifySelectOps(N, N2, N3))
8831     return SDValue(N, 0);  // Don't revisit N.
8832
8833   // fold select_cc into other things, such as min/max/abs
8834   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8835 }
8836
8837 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8838   // setcc is very commonly used as an argument to brcond. This pattern
8839   // also lend itself to numerous combines and, as a result, it is desired
8840   // we keep the argument to a brcond as a setcc as much as possible.
8841   bool PreferSetCC =
8842       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8843
8844   SDValue Combined = SimplifySetCC(
8845       N->getValueType(0), N->getOperand(0), N->getOperand(1),
8846       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8847
8848   if (!Combined)
8849     return SDValue();
8850
8851   // If we prefer to have a setcc, and we don't, we'll try our best to
8852   // recreate one using rebuildSetCC.
8853   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8854     SDValue NewSetCC = rebuildSetCC(Combined);
8855
8856     // We don't have anything interesting to combine to.
8857     if (NewSetCC.getNode() == N)
8858       return SDValue();
8859
8860     if (NewSetCC)
8861       return NewSetCC;
8862   }
8863
8864   return Combined;
8865 }
8866
8867 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
8868   SDValue LHS = N->getOperand(0);
8869   SDValue RHS = N->getOperand(1);
8870   SDValue Carry = N->getOperand(2);
8871   SDValue Cond = N->getOperand(3);
8872
8873   // If Carry is false, fold to a regular SETCC.
8874   if (isNullConstant(Carry))
8875     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
8876
8877   return SDValue();
8878 }
8879
8880 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
8881 /// a build_vector of constants.
8882 /// This function is called by the DAGCombiner when visiting sext/zext/aext
8883 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
8884 /// Vector extends are not folded if operations are legal; this is to
8885 /// avoid introducing illegal build_vector dag nodes.
8886 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
8887                                          SelectionDAG &DAG, bool LegalTypes) {
8888   unsigned Opcode = N->getOpcode();
8889   SDValue N0 = N->getOperand(0);
8890   EVT VT = N->getValueType(0);
8891   SDLoc DL(N);
8892
8893   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
8894          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
8895          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
8896          && "Expected EXTEND dag node in input!");
8897
8898   // fold (sext c1) -> c1
8899   // fold (zext c1) -> c1
8900   // fold (aext c1) -> c1
8901   if (isa<ConstantSDNode>(N0))
8902     return DAG.getNode(Opcode, DL, VT, N0);
8903
8904   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
8905   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
8906   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
8907   if (N0->getOpcode() == ISD::SELECT) {
8908     SDValue Op1 = N0->getOperand(1);
8909     SDValue Op2 = N0->getOperand(2);
8910     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
8911         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
8912       // For any_extend, choose sign extension of the constants to allow a
8913       // possible further transform to sign_extend_inreg.i.e.
8914       //
8915       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
8916       // t2: i64 = any_extend t1
8917       // -->
8918       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
8919       // -->
8920       // t4: i64 = sign_extend_inreg t3
8921       unsigned FoldOpc = Opcode;
8922       if (FoldOpc == ISD::ANY_EXTEND)
8923         FoldOpc = ISD::SIGN_EXTEND;
8924       return DAG.getSelect(DL, VT, N0->getOperand(0),
8925                            DAG.getNode(FoldOpc, DL, VT, Op1),
8926                            DAG.getNode(FoldOpc, DL, VT, Op2));
8927     }
8928   }
8929
8930   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
8931   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
8932   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
8933   EVT SVT = VT.getScalarType();
8934   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
8935       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
8936     return SDValue();
8937
8938   // We can fold this node into a build_vector.
8939   unsigned VTBits = SVT.getSizeInBits();
8940   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
8941   SmallVector<SDValue, 8> Elts;
8942   unsigned NumElts = VT.getVectorNumElements();
8943
8944   // For zero-extensions, UNDEF elements still guarantee to have the upper
8945   // bits set to zero.
8946   bool IsZext =
8947       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
8948
8949   for (unsigned i = 0; i != NumElts; ++i) {
8950     SDValue Op = N0.getOperand(i);
8951     if (Op.isUndef()) {
8952       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
8953       continue;
8954     }
8955
8956     SDLoc DL(Op);
8957     // Get the constant value and if needed trunc it to the size of the type.
8958     // Nodes like build_vector might have constants wider than the scalar type.
8959     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
8960     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
8961       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
8962     else
8963       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
8964   }
8965
8966   return DAG.getBuildVector(VT, DL, Elts);
8967 }
8968
8969 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
8970 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
8971 // transformation. Returns true if extension are possible and the above
8972 // mentioned transformation is profitable.
8973 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
8974                                     unsigned ExtOpc,
8975                                     SmallVectorImpl<SDNode *> &ExtendNodes,
8976                                     const TargetLowering &TLI) {
8977   bool HasCopyToRegUses = false;
8978   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
8979   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
8980                             UE = N0.getNode()->use_end();
8981        UI != UE; ++UI) {
8982     SDNode *User = *UI;
8983     if (User == N)
8984       continue;
8985     if (UI.getUse().getResNo() != N0.getResNo())
8986       continue;
8987     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
8988     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
8989       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
8990       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
8991         // Sign bits will be lost after a zext.
8992         return false;
8993       bool Add = false;
8994       for (unsigned i = 0; i != 2; ++i) {
8995         SDValue UseOp = User->getOperand(i);
8996         if (UseOp == N0)
8997           continue;
8998         if (!isa<ConstantSDNode>(UseOp))
8999           return false;
9000         Add = true;
9001       }
9002       if (Add)
9003         ExtendNodes.push_back(User);
9004       continue;
9005     }
9006     // If truncates aren't free and there are users we can't
9007     // extend, it isn't worthwhile.
9008     if (!isTruncFree)
9009       return false;
9010     // Remember if this value is live-out.
9011     if (User->getOpcode() == ISD::CopyToReg)
9012       HasCopyToRegUses = true;
9013   }
9014
9015   if (HasCopyToRegUses) {
9016     bool BothLiveOut = false;
9017     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9018          UI != UE; ++UI) {
9019       SDUse &Use = UI.getUse();
9020       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9021         BothLiveOut = true;
9022         break;
9023       }
9024     }
9025     if (BothLiveOut)
9026       // Both unextended and extended values are live out. There had better be
9027       // a good reason for the transformation.
9028       return ExtendNodes.size();
9029   }
9030   return true;
9031 }
9032
9033 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9034                                   SDValue OrigLoad, SDValue ExtLoad,
9035                                   ISD::NodeType ExtType) {
9036   // Extend SetCC uses if necessary.
9037   SDLoc DL(ExtLoad);
9038   for (SDNode *SetCC : SetCCs) {
9039     SmallVector<SDValue, 4> Ops;
9040
9041     for (unsigned j = 0; j != 2; ++j) {
9042       SDValue SOp = SetCC->getOperand(j);
9043       if (SOp == OrigLoad)
9044         Ops.push_back(ExtLoad);
9045       else
9046         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9047     }
9048
9049     Ops.push_back(SetCC->getOperand(2));
9050     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9051   }
9052 }
9053
9054 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9055 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9056   SDValue N0 = N->getOperand(0);
9057   EVT DstVT = N->getValueType(0);
9058   EVT SrcVT = N0.getValueType();
9059
9060   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9061           N->getOpcode() == ISD::ZERO_EXTEND) &&
9062          "Unexpected node type (not an extend)!");
9063
9064   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9065   // For example, on a target with legal v4i32, but illegal v8i32, turn:
9066   //   (v8i32 (sext (v8i16 (load x))))
9067   // into:
9068   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
9069   //                          (v4i32 (sextload (x + 16)))))
9070   // Where uses of the original load, i.e.:
9071   //   (v8i16 (load x))
9072   // are replaced with:
9073   //   (v8i16 (truncate
9074   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9075   //                            (v4i32 (sextload (x + 16)))))))
9076   //
9077   // This combine is only applicable to illegal, but splittable, vectors.
9078   // All legal types, and illegal non-vector types, are handled elsewhere.
9079   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9080   //
9081   if (N0->getOpcode() != ISD::LOAD)
9082     return SDValue();
9083
9084   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9085
9086   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9087       !N0.hasOneUse() || !LN0->isSimple() ||
9088       !DstVT.isVector() || !DstVT.isPow2VectorType() ||
9089       !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9090     return SDValue();
9091
9092   SmallVector<SDNode *, 4> SetCCs;
9093   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9094     return SDValue();
9095
9096   ISD::LoadExtType ExtType =
9097       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9098
9099   // Try to split the vector types to get down to legal types.
9100   EVT SplitSrcVT = SrcVT;
9101   EVT SplitDstVT = DstVT;
9102   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9103          SplitSrcVT.getVectorNumElements() > 1) {
9104     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9105     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9106   }
9107
9108   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9109     return SDValue();
9110
9111   SDLoc DL(N);
9112   const unsigned NumSplits =
9113       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9114   const unsigned Stride = SplitSrcVT.getStoreSize();
9115   SmallVector<SDValue, 4> Loads;
9116   SmallVector<SDValue, 4> Chains;
9117
9118   SDValue BasePtr = LN0->getBasePtr();
9119   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9120     const unsigned Offset = Idx * Stride;
9121     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9122
9123     SDValue SplitLoad = DAG.getExtLoad(
9124         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9125         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9126         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9127
9128     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
9129                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
9130
9131     Loads.push_back(SplitLoad.getValue(0));
9132     Chains.push_back(SplitLoad.getValue(1));
9133   }
9134
9135   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9136   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9137
9138   // Simplify TF.
9139   AddToWorklist(NewChain.getNode());
9140
9141   CombineTo(N, NewValue);
9142
9143   // Replace uses of the original load (before extension)
9144   // with a truncate of the concatenated sextloaded vectors.
9145   SDValue Trunc =
9146       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9147   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9148   CombineTo(N0.getNode(), Trunc, NewChain);
9149   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9150 }
9151
9152 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9153 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9154 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9155   assert(N->getOpcode() == ISD::ZERO_EXTEND);
9156   EVT VT = N->getValueType(0);
9157   EVT OrigVT = N->getOperand(0).getValueType();
9158   if (TLI.isZExtFree(OrigVT, VT))
9159     return SDValue();
9160
9161   // and/or/xor
9162   SDValue N0 = N->getOperand(0);
9163   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9164         N0.getOpcode() == ISD::XOR) ||
9165       N0.getOperand(1).getOpcode() != ISD::Constant ||
9166       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9167     return SDValue();
9168
9169   // shl/shr
9170   SDValue N1 = N0->getOperand(0);
9171   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9172       N1.getOperand(1).getOpcode() != ISD::Constant ||
9173       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9174     return SDValue();
9175
9176   // load
9177   if (!isa<LoadSDNode>(N1.getOperand(0)))
9178     return SDValue();
9179   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9180   EVT MemVT = Load->getMemoryVT();
9181   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9182       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9183     return SDValue();
9184
9185
9186   // If the shift op is SHL, the logic op must be AND, otherwise the result
9187   // will be wrong.
9188   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9189     return SDValue();
9190
9191   if (!N0.hasOneUse() || !N1.hasOneUse())
9192     return SDValue();
9193
9194   SmallVector<SDNode*, 4> SetCCs;
9195   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9196                                ISD::ZERO_EXTEND, SetCCs, TLI))
9197     return SDValue();
9198
9199   // Actually do the transformation.
9200   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9201                                    Load->getChain(), Load->getBasePtr(),
9202                                    Load->getMemoryVT(), Load->getMemOperand());
9203
9204   SDLoc DL1(N1);
9205   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9206                               N1.getOperand(1));
9207
9208   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9209   Mask = Mask.zext(VT.getSizeInBits());
9210   SDLoc DL0(N0);
9211   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9212                             DAG.getConstant(Mask, DL0, VT));
9213
9214   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9215   CombineTo(N, And);
9216   if (SDValue(Load, 0).hasOneUse()) {
9217     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9218   } else {
9219     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9220                                 Load->getValueType(0), ExtLoad);
9221     CombineTo(Load, Trunc, ExtLoad.getValue(1));
9222   }
9223
9224   // N0 is dead at this point.
9225   recursivelyDeleteUnusedNodes(N0.getNode());
9226
9227   return SDValue(N,0); // Return N so it doesn't get rechecked!
9228 }
9229
9230 /// If we're narrowing or widening the result of a vector select and the final
9231 /// size is the same size as a setcc (compare) feeding the select, then try to
9232 /// apply the cast operation to the select's operands because matching vector
9233 /// sizes for a select condition and other operands should be more efficient.
9234 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9235   unsigned CastOpcode = Cast->getOpcode();
9236   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9237           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9238           CastOpcode == ISD::FP_ROUND) &&
9239          "Unexpected opcode for vector select narrowing/widening");
9240
9241   // We only do this transform before legal ops because the pattern may be
9242   // obfuscated by target-specific operations after legalization. Do not create
9243   // an illegal select op, however, because that may be difficult to lower.
9244   EVT VT = Cast->getValueType(0);
9245   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9246     return SDValue();
9247
9248   SDValue VSel = Cast->getOperand(0);
9249   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9250       VSel.getOperand(0).getOpcode() != ISD::SETCC)
9251     return SDValue();
9252
9253   // Does the setcc have the same vector size as the casted select?
9254   SDValue SetCC = VSel.getOperand(0);
9255   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9256   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9257     return SDValue();
9258
9259   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9260   SDValue A = VSel.getOperand(1);
9261   SDValue B = VSel.getOperand(2);
9262   SDValue CastA, CastB;
9263   SDLoc DL(Cast);
9264   if (CastOpcode == ISD::FP_ROUND) {
9265     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9266     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9267     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9268   } else {
9269     CastA = DAG.getNode(CastOpcode, DL, VT, A);
9270     CastB = DAG.getNode(CastOpcode, DL, VT, B);
9271   }
9272   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9273 }
9274
9275 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9276 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9277 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9278                                      const TargetLowering &TLI, EVT VT,
9279                                      bool LegalOperations, SDNode *N,
9280                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
9281   SDNode *N0Node = N0.getNode();
9282   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9283                                                    : ISD::isZEXTLoad(N0Node);
9284   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9285       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9286     return SDValue();
9287
9288   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9289   EVT MemVT = LN0->getMemoryVT();
9290   if ((LegalOperations || !LN0->isSimple() ||
9291        VT.isVector()) &&
9292       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9293     return SDValue();
9294
9295   SDValue ExtLoad =
9296       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9297                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9298   Combiner.CombineTo(N, ExtLoad);
9299   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9300   if (LN0->use_empty())
9301     Combiner.recursivelyDeleteUnusedNodes(LN0);
9302   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9303 }
9304
9305 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9306 // Only generate vector extloads when 1) they're legal, and 2) they are
9307 // deemed desirable by the target.
9308 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9309                                   const TargetLowering &TLI, EVT VT,
9310                                   bool LegalOperations, SDNode *N, SDValue N0,
9311                                   ISD::LoadExtType ExtLoadType,
9312                                   ISD::NodeType ExtOpc) {
9313   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9314       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9315       ((LegalOperations || VT.isVector() ||
9316         !cast<LoadSDNode>(N0)->isSimple()) &&
9317        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9318     return {};
9319
9320   bool DoXform = true;
9321   SmallVector<SDNode *, 4> SetCCs;
9322   if (!N0.hasOneUse())
9323     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9324   if (VT.isVector())
9325     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9326   if (!DoXform)
9327     return {};
9328
9329   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9330   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9331                                    LN0->getBasePtr(), N0.getValueType(),
9332                                    LN0->getMemOperand());
9333   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9334   // If the load value is used only by N, replace it via CombineTo N.
9335   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9336   Combiner.CombineTo(N, ExtLoad);
9337   if (NoReplaceTrunc) {
9338     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9339     Combiner.recursivelyDeleteUnusedNodes(LN0);
9340   } else {
9341     SDValue Trunc =
9342         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9343     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9344   }
9345   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9346 }
9347
9348 static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
9349                                         const TargetLowering &TLI, EVT VT,
9350                                         SDNode *N, SDValue N0,
9351                                         ISD::LoadExtType ExtLoadType,
9352                                         ISD::NodeType ExtOpc) {
9353   if (!N0.hasOneUse())
9354     return SDValue();
9355
9356   MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
9357   if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
9358     return SDValue();
9359
9360   if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
9361     return SDValue();
9362
9363   if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9364     return SDValue();
9365
9366   SDLoc dl(Ld);
9367   SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
9368   SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),
9369                                       Ld->getBasePtr(), Ld->getMask(),
9370                                       PassThru, Ld->getMemoryVT(),
9371                                       Ld->getMemOperand(), ExtLoadType,
9372                                       Ld->isExpandingLoad());
9373   DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
9374   return NewLoad;
9375 }
9376
9377 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9378                                        bool LegalOperations) {
9379   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9380           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9381
9382   SDValue SetCC = N->getOperand(0);
9383   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9384       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9385     return SDValue();
9386
9387   SDValue X = SetCC.getOperand(0);
9388   SDValue Ones = SetCC.getOperand(1);
9389   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9390   EVT VT = N->getValueType(0);
9391   EVT XVT = X.getValueType();
9392   // setge X, C is canonicalized to setgt, so we do not need to match that
9393   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9394   // not require the 'not' op.
9395   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9396     // Invert and smear/shift the sign bit:
9397     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9398     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
9399     SDLoc DL(N);
9400     SDValue NotX = DAG.getNOT(DL, X, VT);
9401     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
9402     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
9403     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
9404   }
9405   return SDValue();
9406 }
9407
9408 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
9409   SDValue N0 = N->getOperand(0);
9410   EVT VT = N->getValueType(0);
9411   SDLoc DL(N);
9412
9413   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9414     return Res;
9415
9416   // fold (sext (sext x)) -> (sext x)
9417   // fold (sext (aext x)) -> (sext x)
9418   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9419     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
9420
9421   if (N0.getOpcode() == ISD::TRUNCATE) {
9422     // fold (sext (truncate (load x))) -> (sext (smaller load x))
9423     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
9424     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9425       SDNode *oye = N0.getOperand(0).getNode();
9426       if (NarrowLoad.getNode() != N0.getNode()) {
9427         CombineTo(N0.getNode(), NarrowLoad);
9428         // CombineTo deleted the truncate, if needed, but not what's under it.
9429         AddToWorklist(oye);
9430       }
9431       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9432     }
9433
9434     // See if the value being truncated is already sign extended.  If so, just
9435     // eliminate the trunc/sext pair.
9436     SDValue Op = N0.getOperand(0);
9437     unsigned OpBits   = Op.getScalarValueSizeInBits();
9438     unsigned MidBits  = N0.getScalarValueSizeInBits();
9439     unsigned DestBits = VT.getScalarSizeInBits();
9440     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
9441
9442     if (OpBits == DestBits) {
9443       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
9444       // bits, it is already ready.
9445       if (NumSignBits > DestBits-MidBits)
9446         return Op;
9447     } else if (OpBits < DestBits) {
9448       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
9449       // bits, just sext from i32.
9450       if (NumSignBits > OpBits-MidBits)
9451         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
9452     } else {
9453       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
9454       // bits, just truncate to i32.
9455       if (NumSignBits > OpBits-MidBits)
9456         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
9457     }
9458
9459     // fold (sext (truncate x)) -> (sextinreg x).
9460     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
9461                                                  N0.getValueType())) {
9462       if (OpBits < DestBits)
9463         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
9464       else if (OpBits > DestBits)
9465         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
9466       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
9467                          DAG.getValueType(N0.getValueType()));
9468     }
9469   }
9470
9471   // Try to simplify (sext (load x)).
9472   if (SDValue foldedExt =
9473           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9474                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9475     return foldedExt;
9476
9477   if (SDValue foldedExt =
9478       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
9479                                ISD::SIGN_EXTEND))
9480     return foldedExt;
9481
9482   // fold (sext (load x)) to multiple smaller sextloads.
9483   // Only on illegal but splittable vectors.
9484   if (SDValue ExtLoad = CombineExtLoad(N))
9485     return ExtLoad;
9486
9487   // Try to simplify (sext (sextload x)).
9488   if (SDValue foldedExt = tryToFoldExtOfExtload(
9489           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9490     return foldedExt;
9491
9492   // fold (sext (and/or/xor (load x), cst)) ->
9493   //      (and/or/xor (sextload x), (sext cst))
9494   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9495        N0.getOpcode() == ISD::XOR) &&
9496       isa<LoadSDNode>(N0.getOperand(0)) &&
9497       N0.getOperand(1).getOpcode() == ISD::Constant &&
9498       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9499     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9500     EVT MemVT = LN00->getMemoryVT();
9501     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9502       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9503       SmallVector<SDNode*, 4> SetCCs;
9504       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9505                                              ISD::SIGN_EXTEND, SetCCs, TLI);
9506       if (DoXform) {
9507         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9508                                          LN00->getChain(), LN00->getBasePtr(),
9509                                          LN00->getMemoryVT(),
9510                                          LN00->getMemOperand());
9511         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9512         Mask = Mask.sext(VT.getSizeInBits());
9513         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9514                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9515         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9516         bool NoReplaceTruncAnd = !N0.hasOneUse();
9517         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9518         CombineTo(N, And);
9519         // If N0 has multiple uses, change other uses as well.
9520         if (NoReplaceTruncAnd) {
9521           SDValue TruncAnd =
9522               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9523           CombineTo(N0.getNode(), TruncAnd);
9524         }
9525         if (NoReplaceTrunc) {
9526           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9527         } else {
9528           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9529                                       LN00->getValueType(0), ExtLoad);
9530           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9531         }
9532         return SDValue(N,0); // Return N so it doesn't get rechecked!
9533       }
9534     }
9535   }
9536
9537   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9538     return V;
9539
9540   if (N0.getOpcode() == ISD::SETCC) {
9541     SDValue N00 = N0.getOperand(0);
9542     SDValue N01 = N0.getOperand(1);
9543     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9544     EVT N00VT = N0.getOperand(0).getValueType();
9545
9546     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9547     // Only do this before legalize for now.
9548     if (VT.isVector() && !LegalOperations &&
9549         TLI.getBooleanContents(N00VT) ==
9550             TargetLowering::ZeroOrNegativeOneBooleanContent) {
9551       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9552       // of the same size as the compared operands. Only optimize sext(setcc())
9553       // if this is the case.
9554       EVT SVT = getSetCCResultType(N00VT);
9555
9556       // If we already have the desired type, don't change it.
9557       if (SVT != N0.getValueType()) {
9558         // We know that the # elements of the results is the same as the
9559         // # elements of the compare (and the # elements of the compare result
9560         // for that matter).  Check to see that they are the same size.  If so,
9561         // we know that the element size of the sext'd result matches the
9562         // element size of the compare operands.
9563         if (VT.getSizeInBits() == SVT.getSizeInBits())
9564           return DAG.getSetCC(DL, VT, N00, N01, CC);
9565
9566         // If the desired elements are smaller or larger than the source
9567         // elements, we can use a matching integer vector type and then
9568         // truncate/sign extend.
9569         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9570         if (SVT == MatchingVecType) {
9571           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9572           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9573         }
9574       }
9575     }
9576
9577     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9578     // Here, T can be 1 or -1, depending on the type of the setcc and
9579     // getBooleanContents().
9580     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9581
9582     // To determine the "true" side of the select, we need to know the high bit
9583     // of the value returned by the setcc if it evaluates to true.
9584     // If the type of the setcc is i1, then the true case of the select is just
9585     // sext(i1 1), that is, -1.
9586     // If the type of the setcc is larger (say, i8) then the value of the high
9587     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9588     // of the appropriate width.
9589     SDValue ExtTrueVal = (SetCCWidth == 1)
9590                              ? DAG.getAllOnesConstant(DL, VT)
9591                              : DAG.getBoolConstant(true, DL, VT, N00VT);
9592     SDValue Zero = DAG.getConstant(0, DL, VT);
9593     if (SDValue SCC =
9594             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9595       return SCC;
9596
9597     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9598       EVT SetCCVT = getSetCCResultType(N00VT);
9599       // Don't do this transform for i1 because there's a select transform
9600       // that would reverse it.
9601       // TODO: We should not do this transform at all without a target hook
9602       // because a sext is likely cheaper than a select?
9603       if (SetCCVT.getScalarSizeInBits() != 1 &&
9604           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9605         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9606         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9607       }
9608     }
9609   }
9610
9611   // fold (sext x) -> (zext x) if the sign bit is known zero.
9612   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9613       DAG.SignBitIsZero(N0))
9614     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9615
9616   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9617     return NewVSel;
9618
9619   // Eliminate this sign extend by doing a negation in the destination type:
9620   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9621   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9622       isNullOrNullSplat(N0.getOperand(0)) &&
9623       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
9624       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
9625     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9626     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9627   }
9628   // Eliminate this sign extend by doing a decrement in the destination type:
9629   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9630   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9631       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
9632       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9633       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
9634     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9635     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9636   }
9637
9638   return SDValue();
9639 }
9640
9641 // isTruncateOf - If N is a truncate of some other value, return true, record
9642 // the value being truncated in Op and which of Op's bits are zero/one in Known.
9643 // This function computes KnownBits to avoid a duplicated call to
9644 // computeKnownBits in the caller.
9645 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9646                          KnownBits &Known) {
9647   if (N->getOpcode() == ISD::TRUNCATE) {
9648     Op = N->getOperand(0);
9649     Known = DAG.computeKnownBits(Op);
9650     return true;
9651   }
9652
9653   if (N.getOpcode() != ISD::SETCC ||
9654       N.getValueType().getScalarType() != MVT::i1 ||
9655       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9656     return false;
9657
9658   SDValue Op0 = N->getOperand(0);
9659   SDValue Op1 = N->getOperand(1);
9660   assert(Op0.getValueType() == Op1.getValueType());
9661
9662   if (isNullOrNullSplat(Op0))
9663     Op = Op1;
9664   else if (isNullOrNullSplat(Op1))
9665     Op = Op0;
9666   else
9667     return false;
9668
9669   Known = DAG.computeKnownBits(Op);
9670
9671   return (Known.Zero | 1).isAllOnesValue();
9672 }
9673
9674 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9675   SDValue N0 = N->getOperand(0);
9676   EVT VT = N->getValueType(0);
9677
9678   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9679     return Res;
9680
9681   // fold (zext (zext x)) -> (zext x)
9682   // fold (zext (aext x)) -> (zext x)
9683   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9684     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9685                        N0.getOperand(0));
9686
9687   // fold (zext (truncate x)) -> (zext x) or
9688   //      (zext (truncate x)) -> (truncate x)
9689   // This is valid when the truncated bits of x are already zero.
9690   SDValue Op;
9691   KnownBits Known;
9692   if (isTruncateOf(DAG, N0, Op, Known)) {
9693     APInt TruncatedBits =
9694       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9695       APInt(Op.getScalarValueSizeInBits(), 0) :
9696       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9697                         N0.getScalarValueSizeInBits(),
9698                         std::min(Op.getScalarValueSizeInBits(),
9699                                  VT.getScalarSizeInBits()));
9700     if (TruncatedBits.isSubsetOf(Known.Zero))
9701       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9702   }
9703
9704   // fold (zext (truncate x)) -> (and x, mask)
9705   if (N0.getOpcode() == ISD::TRUNCATE) {
9706     // fold (zext (truncate (load x))) -> (zext (smaller load x))
9707     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9708     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9709       SDNode *oye = N0.getOperand(0).getNode();
9710       if (NarrowLoad.getNode() != N0.getNode()) {
9711         CombineTo(N0.getNode(), NarrowLoad);
9712         // CombineTo deleted the truncate, if needed, but not what's under it.
9713         AddToWorklist(oye);
9714       }
9715       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9716     }
9717
9718     EVT SrcVT = N0.getOperand(0).getValueType();
9719     EVT MinVT = N0.getValueType();
9720
9721     // Try to mask before the extension to avoid having to generate a larger mask,
9722     // possibly over several sub-vectors.
9723     if (SrcVT.bitsLT(VT) && VT.isVector()) {
9724       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9725                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9726         SDValue Op = N0.getOperand(0);
9727         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9728         AddToWorklist(Op.getNode());
9729         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9730         // Transfer the debug info; the new node is equivalent to N0.
9731         DAG.transferDbgValues(N0, ZExtOrTrunc);
9732         return ZExtOrTrunc;
9733       }
9734     }
9735
9736     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9737       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9738       AddToWorklist(Op.getNode());
9739       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9740       // We may safely transfer the debug info describing the truncate node over
9741       // to the equivalent and operation.
9742       DAG.transferDbgValues(N0, And);
9743       return And;
9744     }
9745   }
9746
9747   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9748   // if either of the casts is not free.
9749   if (N0.getOpcode() == ISD::AND &&
9750       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9751       N0.getOperand(1).getOpcode() == ISD::Constant &&
9752       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9753                            N0.getValueType()) ||
9754        !TLI.isZExtFree(N0.getValueType(), VT))) {
9755     SDValue X = N0.getOperand(0).getOperand(0);
9756     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9757     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9758     Mask = Mask.zext(VT.getSizeInBits());
9759     SDLoc DL(N);
9760     return DAG.getNode(ISD::AND, DL, VT,
9761                        X, DAG.getConstant(Mask, DL, VT));
9762   }
9763
9764   // Try to simplify (zext (load x)).
9765   if (SDValue foldedExt =
9766           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9767                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9768     return foldedExt;
9769
9770   if (SDValue foldedExt =
9771       tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
9772                                ISD::ZERO_EXTEND))
9773     return foldedExt;
9774
9775   // fold (zext (load x)) to multiple smaller zextloads.
9776   // Only on illegal but splittable vectors.
9777   if (SDValue ExtLoad = CombineExtLoad(N))
9778     return ExtLoad;
9779
9780   // fold (zext (and/or/xor (load x), cst)) ->
9781   //      (and/or/xor (zextload x), (zext cst))
9782   // Unless (and (load x) cst) will match as a zextload already and has
9783   // additional users.
9784   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9785        N0.getOpcode() == ISD::XOR) &&
9786       isa<LoadSDNode>(N0.getOperand(0)) &&
9787       N0.getOperand(1).getOpcode() == ISD::Constant &&
9788       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9789     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9790     EVT MemVT = LN00->getMemoryVT();
9791     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9792         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9793       bool DoXform = true;
9794       SmallVector<SDNode*, 4> SetCCs;
9795       if (!N0.hasOneUse()) {
9796         if (N0.getOpcode() == ISD::AND) {
9797           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9798           EVT LoadResultTy = AndC->getValueType(0);
9799           EVT ExtVT;
9800           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9801             DoXform = false;
9802         }
9803       }
9804       if (DoXform)
9805         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9806                                           ISD::ZERO_EXTEND, SetCCs, TLI);
9807       if (DoXform) {
9808         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9809                                          LN00->getChain(), LN00->getBasePtr(),
9810                                          LN00->getMemoryVT(),
9811                                          LN00->getMemOperand());
9812         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9813         Mask = Mask.zext(VT.getSizeInBits());
9814         SDLoc DL(N);
9815         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9816                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9817         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9818         bool NoReplaceTruncAnd = !N0.hasOneUse();
9819         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9820         CombineTo(N, And);
9821         // If N0 has multiple uses, change other uses as well.
9822         if (NoReplaceTruncAnd) {
9823           SDValue TruncAnd =
9824               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9825           CombineTo(N0.getNode(), TruncAnd);
9826         }
9827         if (NoReplaceTrunc) {
9828           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9829         } else {
9830           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9831                                       LN00->getValueType(0), ExtLoad);
9832           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9833         }
9834         return SDValue(N,0); // Return N so it doesn't get rechecked!
9835       }
9836     }
9837   }
9838
9839   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9840   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9841   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9842     return ZExtLoad;
9843
9844   // Try to simplify (zext (zextload x)).
9845   if (SDValue foldedExt = tryToFoldExtOfExtload(
9846           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9847     return foldedExt;
9848
9849   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9850     return V;
9851
9852   if (N0.getOpcode() == ISD::SETCC) {
9853     // Only do this before legalize for now.
9854     if (!LegalOperations && VT.isVector() &&
9855         N0.getValueType().getVectorElementType() == MVT::i1) {
9856       EVT N00VT = N0.getOperand(0).getValueType();
9857       if (getSetCCResultType(N00VT) == N0.getValueType())
9858         return SDValue();
9859
9860       // We know that the # elements of the results is the same as the #
9861       // elements of the compare (and the # elements of the compare result for
9862       // that matter). Check to see that they are the same size. If so, we know
9863       // that the element size of the sext'd result matches the element size of
9864       // the compare operands.
9865       SDLoc DL(N);
9866       SDValue VecOnes = DAG.getConstant(1, DL, VT);
9867       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9868         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9869         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9870                                      N0.getOperand(1), N0.getOperand(2));
9871         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9872       }
9873
9874       // If the desired elements are smaller or larger than the source
9875       // elements we can use a matching integer vector type and then
9876       // truncate/sign extend.
9877       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9878       SDValue VsetCC =
9879           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9880                       N0.getOperand(1), N0.getOperand(2));
9881       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9882                          VecOnes);
9883     }
9884
9885     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9886     SDLoc DL(N);
9887     if (SDValue SCC = SimplifySelectCC(
9888             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9889             DAG.getConstant(0, DL, VT),
9890             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9891       return SCC;
9892   }
9893
9894   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9895   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9896       isa<ConstantSDNode>(N0.getOperand(1)) &&
9897       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9898       N0.hasOneUse()) {
9899     SDValue ShAmt = N0.getOperand(1);
9900     if (N0.getOpcode() == ISD::SHL) {
9901       SDValue InnerZExt = N0.getOperand(0);
9902       // If the original shl may be shifting out bits, do not perform this
9903       // transformation.
9904       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
9905         InnerZExt.getOperand(0).getValueSizeInBits();
9906       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
9907         return SDValue();
9908     }
9909
9910     SDLoc DL(N);
9911
9912     // Ensure that the shift amount is wide enough for the shifted value.
9913     if (VT.getSizeInBits() >= 256)
9914       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
9915
9916     return DAG.getNode(N0.getOpcode(), DL, VT,
9917                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
9918                        ShAmt);
9919   }
9920
9921   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9922     return NewVSel;
9923
9924   return SDValue();
9925 }
9926
9927 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
9928   SDValue N0 = N->getOperand(0);
9929   EVT VT = N->getValueType(0);
9930
9931   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9932     return Res;
9933
9934   // fold (aext (aext x)) -> (aext x)
9935   // fold (aext (zext x)) -> (zext x)
9936   // fold (aext (sext x)) -> (sext x)
9937   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
9938       N0.getOpcode() == ISD::ZERO_EXTEND ||
9939       N0.getOpcode() == ISD::SIGN_EXTEND)
9940     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
9941
9942   // fold (aext (truncate (load x))) -> (aext (smaller load x))
9943   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
9944   if (N0.getOpcode() == ISD::TRUNCATE) {
9945     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9946       SDNode *oye = N0.getOperand(0).getNode();
9947       if (NarrowLoad.getNode() != N0.getNode()) {
9948         CombineTo(N0.getNode(), NarrowLoad);
9949         // CombineTo deleted the truncate, if needed, but not what's under it.
9950         AddToWorklist(oye);
9951       }
9952       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9953     }
9954   }
9955
9956   // fold (aext (truncate x))
9957   if (N0.getOpcode() == ISD::TRUNCATE)
9958     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9959
9960   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
9961   // if the trunc is not free.
9962   if (N0.getOpcode() == ISD::AND &&
9963       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9964       N0.getOperand(1).getOpcode() == ISD::Constant &&
9965       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9966                           N0.getValueType())) {
9967     SDLoc DL(N);
9968     SDValue X = N0.getOperand(0).getOperand(0);
9969     X = DAG.getAnyExtOrTrunc(X, DL, VT);
9970     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9971     Mask = Mask.zext(VT.getSizeInBits());
9972     return DAG.getNode(ISD::AND, DL, VT,
9973                        X, DAG.getConstant(Mask, DL, VT));
9974   }
9975
9976   // fold (aext (load x)) -> (aext (truncate (extload x)))
9977   // None of the supported targets knows how to perform load and any_ext
9978   // on vectors in one instruction.  We only perform this transformation on
9979   // scalars.
9980   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
9981       ISD::isUNINDEXEDLoad(N0.getNode()) &&
9982       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
9983     bool DoXform = true;
9984     SmallVector<SDNode*, 4> SetCCs;
9985     if (!N0.hasOneUse())
9986       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
9987                                         TLI);
9988     if (DoXform) {
9989       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9990       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
9991                                        LN0->getChain(),
9992                                        LN0->getBasePtr(), N0.getValueType(),
9993                                        LN0->getMemOperand());
9994       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
9995       // If the load value is used only by N, replace it via CombineTo N.
9996       bool NoReplaceTrunc = N0.hasOneUse();
9997       CombineTo(N, ExtLoad);
9998       if (NoReplaceTrunc) {
9999         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10000         recursivelyDeleteUnusedNodes(LN0);
10001       } else {
10002         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10003                                     N0.getValueType(), ExtLoad);
10004         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10005       }
10006       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10007     }
10008   }
10009
10010   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10011   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10012   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
10013   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10014       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10015     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10016     ISD::LoadExtType ExtType = LN0->getExtensionType();
10017     EVT MemVT = LN0->getMemoryVT();
10018     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10019       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10020                                        VT, LN0->getChain(), LN0->getBasePtr(),
10021                                        MemVT, LN0->getMemOperand());
10022       CombineTo(N, ExtLoad);
10023       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10024       recursivelyDeleteUnusedNodes(LN0);
10025       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10026     }
10027   }
10028
10029   if (N0.getOpcode() == ISD::SETCC) {
10030     // For vectors:
10031     // aext(setcc) -> vsetcc
10032     // aext(setcc) -> truncate(vsetcc)
10033     // aext(setcc) -> aext(vsetcc)
10034     // Only do this before legalize for now.
10035     if (VT.isVector() && !LegalOperations) {
10036       EVT N00VT = N0.getOperand(0).getValueType();
10037       if (getSetCCResultType(N00VT) == N0.getValueType())
10038         return SDValue();
10039
10040       // We know that the # elements of the results is the same as the
10041       // # elements of the compare (and the # elements of the compare result
10042       // for that matter).  Check to see that they are the same size.  If so,
10043       // we know that the element size of the sext'd result matches the
10044       // element size of the compare operands.
10045       if (VT.getSizeInBits() == N00VT.getSizeInBits())
10046         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10047                              N0.getOperand(1),
10048                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
10049
10050       // If the desired elements are smaller or larger than the source
10051       // elements we can use a matching integer vector type and then
10052       // truncate/any extend
10053       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10054       SDValue VsetCC =
10055         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10056                       N0.getOperand(1),
10057                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
10058       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10059     }
10060
10061     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10062     SDLoc DL(N);
10063     if (SDValue SCC = SimplifySelectCC(
10064             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10065             DAG.getConstant(0, DL, VT),
10066             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10067       return SCC;
10068   }
10069
10070   return SDValue();
10071 }
10072
10073 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10074   unsigned Opcode = N->getOpcode();
10075   SDValue N0 = N->getOperand(0);
10076   SDValue N1 = N->getOperand(1);
10077   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10078
10079   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10080   if (N0.getOpcode() == Opcode &&
10081       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10082     return N0;
10083
10084   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10085       N0.getOperand(0).getOpcode() == Opcode) {
10086     // We have an assert, truncate, assert sandwich. Make one stronger assert
10087     // by asserting on the smallest asserted type to the larger source type.
10088     // This eliminates the later assert:
10089     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10090     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10091     SDValue BigA = N0.getOperand(0);
10092     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10093     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10094            "Asserting zero/sign-extended bits to a type larger than the "
10095            "truncated destination does not provide information");
10096
10097     SDLoc DL(N);
10098     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10099     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10100     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10101                                     BigA.getOperand(0), MinAssertVTVal);
10102     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10103   }
10104
10105   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10106   // than X. Just move the AssertZext in front of the truncate and drop the
10107   // AssertSExt.
10108   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10109       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10110       Opcode == ISD::AssertZext) {
10111     SDValue BigA = N0.getOperand(0);
10112     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10113     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10114            "Asserting zero/sign-extended bits to a type larger than the "
10115            "truncated destination does not provide information");
10116
10117     if (AssertVT.bitsLT(BigA_AssertVT)) {
10118       SDLoc DL(N);
10119       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10120                                       BigA.getOperand(0), N1);
10121       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10122     }
10123   }
10124
10125   return SDValue();
10126 }
10127
10128 /// If the result of a wider load is shifted to right of N  bits and then
10129 /// truncated to a narrower type and where N is a multiple of number of bits of
10130 /// the narrower type, transform it to a narrower load from address + N / num of
10131 /// bits of new type. Also narrow the load if the result is masked with an AND
10132 /// to effectively produce a smaller type. If the result is to be extended, also
10133 /// fold the extension to form a extending load.
10134 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10135   unsigned Opc = N->getOpcode();
10136
10137   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10138   SDValue N0 = N->getOperand(0);
10139   EVT VT = N->getValueType(0);
10140   EVT ExtVT = VT;
10141
10142   // This transformation isn't valid for vector loads.
10143   if (VT.isVector())
10144     return SDValue();
10145
10146   unsigned ShAmt = 0;
10147   bool HasShiftedOffset = false;
10148   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10149   // extended to VT.
10150   if (Opc == ISD::SIGN_EXTEND_INREG) {
10151     ExtType = ISD::SEXTLOAD;
10152     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10153   } else if (Opc == ISD::SRL) {
10154     // Another special-case: SRL is basically zero-extending a narrower value,
10155     // or it maybe shifting a higher subword, half or byte into the lowest
10156     // bits.
10157     ExtType = ISD::ZEXTLOAD;
10158     N0 = SDValue(N, 0);
10159
10160     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10161     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10162     if (!N01 || !LN0)
10163       return SDValue();
10164
10165     uint64_t ShiftAmt = N01->getZExtValue();
10166     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10167     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10168       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10169     else
10170       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10171                                 VT.getSizeInBits() - ShiftAmt);
10172   } else if (Opc == ISD::AND) {
10173     // An AND with a constant mask is the same as a truncate + zero-extend.
10174     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10175     if (!AndC)
10176       return SDValue();
10177
10178     const APInt &Mask = AndC->getAPIntValue();
10179     unsigned ActiveBits = 0;
10180     if (Mask.isMask()) {
10181       ActiveBits = Mask.countTrailingOnes();
10182     } else if (Mask.isShiftedMask()) {
10183       ShAmt = Mask.countTrailingZeros();
10184       APInt ShiftedMask = Mask.lshr(ShAmt);
10185       ActiveBits = ShiftedMask.countTrailingOnes();
10186       HasShiftedOffset = true;
10187     } else
10188       return SDValue();
10189
10190     ExtType = ISD::ZEXTLOAD;
10191     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10192   }
10193
10194   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10195     SDValue SRL = N0;
10196     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10197       ShAmt = ConstShift->getZExtValue();
10198       unsigned EVTBits = ExtVT.getSizeInBits();
10199       // Is the shift amount a multiple of size of VT?
10200       if ((ShAmt & (EVTBits-1)) == 0) {
10201         N0 = N0.getOperand(0);
10202         // Is the load width a multiple of size of VT?
10203         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10204           return SDValue();
10205       }
10206
10207       // At this point, we must have a load or else we can't do the transform.
10208       if (!isa<LoadSDNode>(N0)) return SDValue();
10209
10210       auto *LN0 = cast<LoadSDNode>(N0);
10211
10212       // Because a SRL must be assumed to *need* to zero-extend the high bits
10213       // (as opposed to anyext the high bits), we can't combine the zextload
10214       // lowering of SRL and an sextload.
10215       if (LN0->getExtensionType() == ISD::SEXTLOAD)
10216         return SDValue();
10217
10218       // If the shift amount is larger than the input type then we're not
10219       // accessing any of the loaded bytes.  If the load was a zextload/extload
10220       // then the result of the shift+trunc is zero/undef (handled elsewhere).
10221       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10222         return SDValue();
10223
10224       // If the SRL is only used by a masking AND, we may be able to adjust
10225       // the ExtVT to make the AND redundant.
10226       SDNode *Mask = *(SRL->use_begin());
10227       if (Mask->getOpcode() == ISD::AND &&
10228           isa<ConstantSDNode>(Mask->getOperand(1))) {
10229         const APInt &ShiftMask =
10230           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
10231         if (ShiftMask.isMask()) {
10232           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10233                                            ShiftMask.countTrailingOnes());
10234           // If the mask is smaller, recompute the type.
10235           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10236               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10237             ExtVT = MaskedVT;
10238         }
10239       }
10240     }
10241   }
10242
10243   // If the load is shifted left (and the result isn't shifted back right),
10244   // we can fold the truncate through the shift.
10245   unsigned ShLeftAmt = 0;
10246   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10247       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10248     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10249       ShLeftAmt = N01->getZExtValue();
10250       N0 = N0.getOperand(0);
10251     }
10252   }
10253
10254   // If we haven't found a load, we can't narrow it.
10255   if (!isa<LoadSDNode>(N0))
10256     return SDValue();
10257
10258   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10259   // Reducing the width of a volatile load is illegal.  For atomics, we may be
10260   // able to reduce the width provided we never widen again. (see D66309)
10261   if (!LN0->isSimple() ||
10262       !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10263     return SDValue();
10264
10265   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10266     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10267     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10268     return LVTStoreBits - EVTStoreBits - ShAmt;
10269   };
10270
10271   // For big endian targets, we need to adjust the offset to the pointer to
10272   // load the correct bytes.
10273   if (DAG.getDataLayout().isBigEndian())
10274     ShAmt = AdjustBigEndianShift(ShAmt);
10275
10276   EVT PtrType = N0.getOperand(1).getValueType();
10277   uint64_t PtrOff = ShAmt / 8;
10278   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10279   SDLoc DL(LN0);
10280   // The original load itself didn't wrap, so an offset within it doesn't.
10281   SDNodeFlags Flags;
10282   Flags.setNoUnsignedWrap(true);
10283   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
10284                                PtrType, LN0->getBasePtr(),
10285                                DAG.getConstant(PtrOff, DL, PtrType),
10286                                Flags);
10287   AddToWorklist(NewPtr.getNode());
10288
10289   SDValue Load;
10290   if (ExtType == ISD::NON_EXTLOAD)
10291     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
10292                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10293                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10294   else
10295     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
10296                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10297                           NewAlign, LN0->getMemOperand()->getFlags(),
10298                           LN0->getAAInfo());
10299
10300   // Replace the old load's chain with the new load's chain.
10301   WorklistRemover DeadNodes(*this);
10302   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10303
10304   // Shift the result left, if we've swallowed a left shift.
10305   SDValue Result = Load;
10306   if (ShLeftAmt != 0) {
10307     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10308     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10309       ShImmTy = VT;
10310     // If the shift amount is as large as the result size (but, presumably,
10311     // no larger than the source) then the useful bits of the result are
10312     // zero; we can't simply return the shortened shift, because the result
10313     // of that operation is undefined.
10314     if (ShLeftAmt >= VT.getSizeInBits())
10315       Result = DAG.getConstant(0, DL, VT);
10316     else
10317       Result = DAG.getNode(ISD::SHL, DL, VT,
10318                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10319   }
10320
10321   if (HasShiftedOffset) {
10322     // Recalculate the shift amount after it has been altered to calculate
10323     // the offset.
10324     if (DAG.getDataLayout().isBigEndian())
10325       ShAmt = AdjustBigEndianShift(ShAmt);
10326
10327     // We're using a shifted mask, so the load now has an offset. This means
10328     // that data has been loaded into the lower bytes than it would have been
10329     // before, so we need to shl the loaded data into the correct position in the
10330     // register.
10331     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10332     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10333     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10334   }
10335
10336   // Return the new loaded value.
10337   return Result;
10338 }
10339
10340 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
10341   SDValue N0 = N->getOperand(0);
10342   SDValue N1 = N->getOperand(1);
10343   EVT VT = N->getValueType(0);
10344   EVT EVT = cast<VTSDNode>(N1)->getVT();
10345   unsigned VTBits = VT.getScalarSizeInBits();
10346   unsigned EVTBits = EVT.getScalarSizeInBits();
10347
10348   if (N0.isUndef())
10349     return DAG.getUNDEF(VT);
10350
10351   // fold (sext_in_reg c1) -> c1
10352   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10353     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
10354
10355   // If the input is already sign extended, just drop the extension.
10356   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
10357     return N0;
10358
10359   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
10360   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
10361       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
10362     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10363                        N0.getOperand(0), N1);
10364
10365   // fold (sext_in_reg (sext x)) -> (sext x)
10366   // fold (sext_in_reg (aext x)) -> (sext x)
10367   // if x is small enough or if we know that x has more than 1 sign bit and the
10368   // sign_extend_inreg is extending from one of them.
10369   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
10370     SDValue N00 = N0.getOperand(0);
10371     unsigned N00Bits = N00.getScalarValueSizeInBits();
10372     if ((N00Bits <= EVTBits ||
10373          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
10374         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10375       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
10376   }
10377
10378   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
10379   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
10380        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
10381        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
10382       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
10383     if (!LegalOperations ||
10384         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
10385       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
10386                          N0.getOperand(0));
10387   }
10388
10389   // fold (sext_in_reg (zext x)) -> (sext x)
10390   // iff we are extending the source sign bit.
10391   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
10392     SDValue N00 = N0.getOperand(0);
10393     if (N00.getScalarValueSizeInBits() == EVTBits &&
10394         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10395       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
10396   }
10397
10398   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
10399   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
10400     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
10401
10402   // fold operands of sext_in_reg based on knowledge that the top bits are not
10403   // demanded.
10404   if (SimplifyDemandedBits(SDValue(N, 0)))
10405     return SDValue(N, 0);
10406
10407   // fold (sext_in_reg (load x)) -> (smaller sextload x)
10408   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
10409   if (SDValue NarrowLoad = ReduceLoadWidth(N))
10410     return NarrowLoad;
10411
10412   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
10413   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
10414   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
10415   if (N0.getOpcode() == ISD::SRL) {
10416     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
10417       if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
10418         // We can turn this into an SRA iff the input to the SRL is already sign
10419         // extended enough.
10420         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
10421         if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
10422           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
10423                              N0.getOperand(1));
10424       }
10425   }
10426
10427   // fold (sext_inreg (extload x)) -> (sextload x)
10428   // If sextload is not supported by target, we can only do the combine when
10429   // load has one use. Doing otherwise can block folding the extload with other
10430   // extends that the target does support.
10431   if (ISD::isEXTLoad(N0.getNode()) &&
10432       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10433       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10434       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
10435         N0.hasOneUse()) ||
10436        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10437     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10438     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10439                                      LN0->getChain(),
10440                                      LN0->getBasePtr(), EVT,
10441                                      LN0->getMemOperand());
10442     CombineTo(N, ExtLoad);
10443     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10444     AddToWorklist(ExtLoad.getNode());
10445     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10446   }
10447   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
10448   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
10449       N0.hasOneUse() &&
10450       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10451       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
10452        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10453     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10454     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10455                                      LN0->getChain(),
10456                                      LN0->getBasePtr(), EVT,
10457                                      LN0->getMemOperand());
10458     CombineTo(N, ExtLoad);
10459     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10460     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10461   }
10462
10463   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
10464   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
10465     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
10466                                            N0.getOperand(1), false))
10467       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10468                          BSwap, N1);
10469   }
10470
10471   return SDValue();
10472 }
10473
10474 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
10475   SDValue N0 = N->getOperand(0);
10476   EVT VT = N->getValueType(0);
10477
10478   if (N0.isUndef())
10479     return DAG.getUNDEF(VT);
10480
10481   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10482     return Res;
10483
10484   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10485     return SDValue(N, 0);
10486
10487   return SDValue();
10488 }
10489
10490 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10491   SDValue N0 = N->getOperand(0);
10492   EVT VT = N->getValueType(0);
10493
10494   if (N0.isUndef())
10495     return DAG.getUNDEF(VT);
10496
10497   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10498     return Res;
10499
10500   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10501     return SDValue(N, 0);
10502
10503   return SDValue();
10504 }
10505
10506 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10507   SDValue N0 = N->getOperand(0);
10508   EVT VT = N->getValueType(0);
10509   EVT SrcVT = N0.getValueType();
10510   bool isLE = DAG.getDataLayout().isLittleEndian();
10511
10512   // noop truncate
10513   if (SrcVT == VT)
10514     return N0;
10515
10516   // fold (truncate (truncate x)) -> (truncate x)
10517   if (N0.getOpcode() == ISD::TRUNCATE)
10518     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10519
10520   // fold (truncate c1) -> c1
10521   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10522     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10523     if (C.getNode() != N)
10524       return C;
10525   }
10526
10527   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10528   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10529       N0.getOpcode() == ISD::SIGN_EXTEND ||
10530       N0.getOpcode() == ISD::ANY_EXTEND) {
10531     // if the source is smaller than the dest, we still need an extend.
10532     if (N0.getOperand(0).getValueType().bitsLT(VT))
10533       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10534     // if the source is larger than the dest, than we just need the truncate.
10535     if (N0.getOperand(0).getValueType().bitsGT(VT))
10536       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10537     // if the source and dest are the same type, we can drop both the extend
10538     // and the truncate.
10539     return N0.getOperand(0);
10540   }
10541
10542   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10543   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10544     return SDValue();
10545
10546   // Fold extract-and-trunc into a narrow extract. For example:
10547   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10548   //   i32 y = TRUNCATE(i64 x)
10549   //        -- becomes --
10550   //   v16i8 b = BITCAST (v2i64 val)
10551   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10552   //
10553   // Note: We only run this optimization after type legalization (which often
10554   // creates this pattern) and before operation legalization after which
10555   // we need to be more careful about the vector instructions that we generate.
10556   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10557       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10558     EVT VecTy = N0.getOperand(0).getValueType();
10559     EVT ExTy = N0.getValueType();
10560     EVT TrTy = N->getValueType(0);
10561
10562     unsigned NumElem = VecTy.getVectorNumElements();
10563     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10564
10565     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10566     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10567
10568     SDValue EltNo = N0->getOperand(1);
10569     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10570       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10571       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
10572       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10573
10574       SDLoc DL(N);
10575       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10576                          DAG.getBitcast(NVT, N0.getOperand(0)),
10577                          DAG.getConstant(Index, DL, IndexTy));
10578     }
10579   }
10580
10581   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10582   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10583     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10584         TLI.isTruncateFree(SrcVT, VT)) {
10585       SDLoc SL(N0);
10586       SDValue Cond = N0.getOperand(0);
10587       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10588       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10589       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10590     }
10591   }
10592
10593   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10594   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10595       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
10596       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10597     SDValue Amt = N0.getOperand(1);
10598     KnownBits Known = DAG.computeKnownBits(Amt);
10599     unsigned Size = VT.getScalarSizeInBits();
10600     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10601       SDLoc SL(N);
10602       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10603
10604       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10605       if (AmtVT != Amt.getValueType()) {
10606         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10607         AddToWorklist(Amt.getNode());
10608       }
10609       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10610     }
10611   }
10612
10613   // Attempt to pre-truncate BUILD_VECTOR sources.
10614   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10615       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10616     SDLoc DL(N);
10617     EVT SVT = VT.getScalarType();
10618     SmallVector<SDValue, 8> TruncOps;
10619     for (const SDValue &Op : N0->op_values()) {
10620       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10621       TruncOps.push_back(TruncOp);
10622     }
10623     return DAG.getBuildVector(VT, DL, TruncOps);
10624   }
10625
10626   // Fold a series of buildvector, bitcast, and truncate if possible.
10627   // For example fold
10628   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10629   //   (2xi32 (buildvector x, y)).
10630   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10631       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10632       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
10633       N0.getOperand(0).hasOneUse()) {
10634     SDValue BuildVect = N0.getOperand(0);
10635     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10636     EVT TruncVecEltTy = VT.getVectorElementType();
10637
10638     // Check that the element types match.
10639     if (BuildVectEltTy == TruncVecEltTy) {
10640       // Now we only need to compute the offset of the truncated elements.
10641       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
10642       unsigned TruncVecNumElts = VT.getVectorNumElements();
10643       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10644
10645       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10646              "Invalid number of elements");
10647
10648       SmallVector<SDValue, 8> Opnds;
10649       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10650         Opnds.push_back(BuildVect.getOperand(i));
10651
10652       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10653     }
10654   }
10655
10656   // See if we can simplify the input to this truncate through knowledge that
10657   // only the low bits are being used.
10658   // For example "trunc (or (shl x, 8), y)" // -> trunc y
10659   // Currently we only perform this optimization on scalars because vectors
10660   // may have different active low bits.
10661   if (!VT.isVector()) {
10662     APInt Mask =
10663         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
10664     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10665       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10666   }
10667
10668   // fold (truncate (load x)) -> (smaller load x)
10669   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10670   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
10671     if (SDValue Reduced = ReduceLoadWidth(N))
10672       return Reduced;
10673
10674     // Handle the case where the load remains an extending load even
10675     // after truncation.
10676     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
10677       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10678       if (LN0->isSimple() &&
10679           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
10680         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10681                                          VT, LN0->getChain(), LN0->getBasePtr(),
10682                                          LN0->getMemoryVT(),
10683                                          LN0->getMemOperand());
10684         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10685         return NewLoad;
10686       }
10687     }
10688   }
10689
10690   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10691   // where ... are all 'undef'.
10692   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10693     SmallVector<EVT, 8> VTs;
10694     SDValue V;
10695     unsigned Idx = 0;
10696     unsigned NumDefs = 0;
10697
10698     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10699       SDValue X = N0.getOperand(i);
10700       if (!X.isUndef()) {
10701         V = X;
10702         Idx = i;
10703         NumDefs++;
10704       }
10705       // Stop if more than one members are non-undef.
10706       if (NumDefs > 1)
10707         break;
10708       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10709                                      VT.getVectorElementType(),
10710                                      X.getValueType().getVectorNumElements()));
10711     }
10712
10713     if (NumDefs == 0)
10714       return DAG.getUNDEF(VT);
10715
10716     if (NumDefs == 1) {
10717       assert(V.getNode() && "The single defined operand is empty!");
10718       SmallVector<SDValue, 8> Opnds;
10719       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10720         if (i != Idx) {
10721           Opnds.push_back(DAG.getUNDEF(VTs[i]));
10722           continue;
10723         }
10724         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10725         AddToWorklist(NV.getNode());
10726         Opnds.push_back(NV);
10727       }
10728       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10729     }
10730   }
10731
10732   // Fold truncate of a bitcast of a vector to an extract of the low vector
10733   // element.
10734   //
10735   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10736   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10737     SDValue VecSrc = N0.getOperand(0);
10738     EVT SrcVT = VecSrc.getValueType();
10739     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10740         (!LegalOperations ||
10741          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
10742       SDLoc SL(N);
10743
10744       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10745       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10746       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10747                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10748     }
10749   }
10750
10751   // Simplify the operands using demanded-bits information.
10752   if (!VT.isVector() &&
10753       SimplifyDemandedBits(SDValue(N, 0)))
10754     return SDValue(N, 0);
10755
10756   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10757   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10758   // When the adde's carry is not used.
10759   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10760       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10761       // We only do for addcarry before legalize operation
10762       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
10763        TLI.isOperationLegal(N0.getOpcode(), VT))) {
10764     SDLoc SL(N);
10765     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10766     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10767     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10768     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10769   }
10770
10771   // fold (truncate (extract_subvector(ext x))) ->
10772   //      (extract_subvector x)
10773   // TODO: This can be generalized to cover cases where the truncate and extract
10774   // do not fully cancel each other out.
10775   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10776     SDValue N00 = N0.getOperand(0);
10777     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10778         N00.getOpcode() == ISD::ZERO_EXTEND ||
10779         N00.getOpcode() == ISD::ANY_EXTEND) {
10780       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10781           VT.getVectorElementType())
10782         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10783                            N00.getOperand(0), N0.getOperand(1));
10784     }
10785   }
10786
10787   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10788     return NewVSel;
10789
10790   // Narrow a suitable binary operation with a non-opaque constant operand by
10791   // moving it ahead of the truncate. This is limited to pre-legalization
10792   // because targets may prefer a wider type during later combines and invert
10793   // this transform.
10794   switch (N0.getOpcode()) {
10795   case ISD::ADD:
10796   case ISD::SUB:
10797   case ISD::MUL:
10798   case ISD::AND:
10799   case ISD::OR:
10800   case ISD::XOR:
10801     if (!LegalOperations && N0.hasOneUse() &&
10802         (isConstantOrConstantVector(N0.getOperand(0), true) ||
10803          isConstantOrConstantVector(N0.getOperand(1), true))) {
10804       // TODO: We already restricted this to pre-legalization, but for vectors
10805       // we are extra cautious to not create an unsupported operation.
10806       // Target-specific changes are likely needed to avoid regressions here.
10807       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10808         SDLoc DL(N);
10809         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10810         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10811         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10812       }
10813     }
10814   }
10815
10816   return SDValue();
10817 }
10818
10819 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10820   SDValue Elt = N->getOperand(i);
10821   if (Elt.getOpcode() != ISD::MERGE_VALUES)
10822     return Elt.getNode();
10823   return Elt.getOperand(Elt.getResNo()).getNode();
10824 }
10825
10826 /// build_pair (load, load) -> load
10827 /// if load locations are consecutive.
10828 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10829   assert(N->getOpcode() == ISD::BUILD_PAIR);
10830
10831   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10832   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10833
10834   // A BUILD_PAIR is always having the least significant part in elt 0 and the
10835   // most significant part in elt 1. So when combining into one large load, we
10836   // need to consider the endianness.
10837   if (DAG.getDataLayout().isBigEndian())
10838     std::swap(LD1, LD2);
10839
10840   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10841       LD1->getAddressSpace() != LD2->getAddressSpace())
10842     return SDValue();
10843   EVT LD1VT = LD1->getValueType(0);
10844   unsigned LD1Bytes = LD1VT.getStoreSize();
10845   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10846       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10847     unsigned Align = LD1->getAlignment();
10848     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10849         VT.getTypeForEVT(*DAG.getContext()));
10850
10851     if (NewAlign <= Align &&
10852         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10853       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10854                          LD1->getPointerInfo(), Align);
10855   }
10856
10857   return SDValue();
10858 }
10859
10860 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10861   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10862   // and Lo parts; on big-endian machines it doesn't.
10863   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10864 }
10865
10866 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10867                                     const TargetLowering &TLI) {
10868   // If this is not a bitcast to an FP type or if the target doesn't have
10869   // IEEE754-compliant FP logic, we're done.
10870   EVT VT = N->getValueType(0);
10871   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10872     return SDValue();
10873
10874   // TODO: Handle cases where the integer constant is a different scalar
10875   // bitwidth to the FP.
10876   SDValue N0 = N->getOperand(0);
10877   EVT SourceVT = N0.getValueType();
10878   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10879     return SDValue();
10880
10881   unsigned FPOpcode;
10882   APInt SignMask;
10883   switch (N0.getOpcode()) {
10884   case ISD::AND:
10885     FPOpcode = ISD::FABS;
10886     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10887     break;
10888   case ISD::XOR:
10889     FPOpcode = ISD::FNEG;
10890     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10891     break;
10892   case ISD::OR:
10893     FPOpcode = ISD::FABS;
10894     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10895     break;
10896   default:
10897     return SDValue();
10898   }
10899
10900   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
10901   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
10902   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
10903   //   fneg (fabs X)
10904   SDValue LogicOp0 = N0.getOperand(0);
10905   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
10906   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
10907       LogicOp0.getOpcode() == ISD::BITCAST &&
10908       LogicOp0.getOperand(0).getValueType() == VT) {
10909     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
10910     NumFPLogicOpsConv++;
10911     if (N0.getOpcode() == ISD::OR)
10912       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
10913     return FPOp;
10914   }
10915
10916   return SDValue();
10917 }
10918
10919 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
10920   SDValue N0 = N->getOperand(0);
10921   EVT VT = N->getValueType(0);
10922
10923   if (N0.isUndef())
10924     return DAG.getUNDEF(VT);
10925
10926   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
10927   // Only do this before legalize types, unless both types are integer and the
10928   // scalar type is legal. Only do this before legalize ops, since the target
10929   // maybe depending on the bitcast.
10930   // First check to see if this is all constant.
10931   // TODO: Support FP bitcasts after legalize types.
10932   if (VT.isVector() &&
10933       (!LegalTypes ||
10934        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
10935         TLI.isTypeLegal(VT.getVectorElementType()))) &&
10936       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
10937       cast<BuildVectorSDNode>(N0)->isConstant())
10938     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
10939                                              VT.getVectorElementType());
10940
10941   // If the input is a constant, let getNode fold it.
10942   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
10943     // If we can't allow illegal operations, we need to check that this is just
10944     // a fp -> int or int -> conversion and that the resulting operation will
10945     // be legal.
10946     if (!LegalOperations ||
10947         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
10948          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
10949         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
10950          TLI.isOperationLegal(ISD::Constant, VT))) {
10951       SDValue C = DAG.getBitcast(VT, N0);
10952       if (C.getNode() != N)
10953         return C;
10954     }
10955   }
10956
10957   // (conv (conv x, t1), t2) -> (conv x, t2)
10958   if (N0.getOpcode() == ISD::BITCAST)
10959     return DAG.getBitcast(VT, N0.getOperand(0));
10960
10961   // fold (conv (load x)) -> (load (conv*)x)
10962   // If the resultant load doesn't need a higher alignment than the original!
10963   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
10964       // Do not remove the cast if the types differ in endian layout.
10965       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
10966           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
10967       // If the load is volatile, we only want to change the load type if the
10968       // resulting load is legal. Otherwise we might increase the number of
10969       // memory accesses. We don't care if the original type was legal or not
10970       // as we assume software couldn't rely on the number of accesses of an
10971       // illegal type.
10972       ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
10973        TLI.isOperationLegal(ISD::LOAD, VT))) {
10974     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10975
10976     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
10977                                     *LN0->getMemOperand())) {
10978       SDValue Load =
10979           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
10980                       LN0->getPointerInfo(), LN0->getAlignment(),
10981                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10982       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10983       return Load;
10984     }
10985   }
10986
10987   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
10988     return V;
10989
10990   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
10991   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
10992   //
10993   // For ppc_fp128:
10994   // fold (bitcast (fneg x)) ->
10995   //     flipbit = signbit
10996   //     (xor (bitcast x) (build_pair flipbit, flipbit))
10997   //
10998   // fold (bitcast (fabs x)) ->
10999   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
11000   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11001   // This often reduces constant pool loads.
11002   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11003        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11004       N0.getNode()->hasOneUse() && VT.isInteger() &&
11005       !VT.isVector() && !N0.getValueType().isVector()) {
11006     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11007     AddToWorklist(NewConv.getNode());
11008
11009     SDLoc DL(N);
11010     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11011       assert(VT.getSizeInBits() == 128);
11012       SDValue SignBit = DAG.getConstant(
11013           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11014       SDValue FlipBit;
11015       if (N0.getOpcode() == ISD::FNEG) {
11016         FlipBit = SignBit;
11017         AddToWorklist(FlipBit.getNode());
11018       } else {
11019         assert(N0.getOpcode() == ISD::FABS);
11020         SDValue Hi =
11021             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11022                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11023                                               SDLoc(NewConv)));
11024         AddToWorklist(Hi.getNode());
11025         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11026         AddToWorklist(FlipBit.getNode());
11027       }
11028       SDValue FlipBits =
11029           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11030       AddToWorklist(FlipBits.getNode());
11031       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11032     }
11033     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11034     if (N0.getOpcode() == ISD::FNEG)
11035       return DAG.getNode(ISD::XOR, DL, VT,
11036                          NewConv, DAG.getConstant(SignBit, DL, VT));
11037     assert(N0.getOpcode() == ISD::FABS);
11038     return DAG.getNode(ISD::AND, DL, VT,
11039                        NewConv, DAG.getConstant(~SignBit, DL, VT));
11040   }
11041
11042   // fold (bitconvert (fcopysign cst, x)) ->
11043   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
11044   // Note that we don't handle (copysign x, cst) because this can always be
11045   // folded to an fneg or fabs.
11046   //
11047   // For ppc_fp128:
11048   // fold (bitcast (fcopysign cst, x)) ->
11049   //     flipbit = (and (extract_element
11050   //                     (xor (bitcast cst), (bitcast x)), 0),
11051   //                    signbit)
11052   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
11053   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11054       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11055       VT.isInteger() && !VT.isVector()) {
11056     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11057     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11058     if (isTypeLegal(IntXVT)) {
11059       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11060       AddToWorklist(X.getNode());
11061
11062       // If X has a different width than the result/lhs, sext it or truncate it.
11063       unsigned VTWidth = VT.getSizeInBits();
11064       if (OrigXWidth < VTWidth) {
11065         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11066         AddToWorklist(X.getNode());
11067       } else if (OrigXWidth > VTWidth) {
11068         // To get the sign bit in the right place, we have to shift it right
11069         // before truncating.
11070         SDLoc DL(X);
11071         X = DAG.getNode(ISD::SRL, DL,
11072                         X.getValueType(), X,
11073                         DAG.getConstant(OrigXWidth-VTWidth, DL,
11074                                         X.getValueType()));
11075         AddToWorklist(X.getNode());
11076         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11077         AddToWorklist(X.getNode());
11078       }
11079
11080       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11081         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11082         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11083         AddToWorklist(Cst.getNode());
11084         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11085         AddToWorklist(X.getNode());
11086         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11087         AddToWorklist(XorResult.getNode());
11088         SDValue XorResult64 = DAG.getNode(
11089             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11090             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11091                                   SDLoc(XorResult)));
11092         AddToWorklist(XorResult64.getNode());
11093         SDValue FlipBit =
11094             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11095                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11096         AddToWorklist(FlipBit.getNode());
11097         SDValue FlipBits =
11098             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11099         AddToWorklist(FlipBits.getNode());
11100         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11101       }
11102       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11103       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11104                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
11105       AddToWorklist(X.getNode());
11106
11107       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11108       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11109                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11110       AddToWorklist(Cst.getNode());
11111
11112       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11113     }
11114   }
11115
11116   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11117   if (N0.getOpcode() == ISD::BUILD_PAIR)
11118     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11119       return CombineLD;
11120
11121   // Remove double bitcasts from shuffles - this is often a legacy of
11122   // XformToShuffleWithZero being used to combine bitmaskings (of
11123   // float vectors bitcast to integer vectors) into shuffles.
11124   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11125   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11126       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11127       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11128       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11129     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11130
11131     // If operands are a bitcast, peek through if it casts the original VT.
11132     // If operands are a constant, just bitcast back to original VT.
11133     auto PeekThroughBitcast = [&](SDValue Op) {
11134       if (Op.getOpcode() == ISD::BITCAST &&
11135           Op.getOperand(0).getValueType() == VT)
11136         return SDValue(Op.getOperand(0));
11137       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11138           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11139         return DAG.getBitcast(VT, Op);
11140       return SDValue();
11141     };
11142
11143     // FIXME: If either input vector is bitcast, try to convert the shuffle to
11144     // the result type of this bitcast. This would eliminate at least one
11145     // bitcast. See the transform in InstCombine.
11146     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11147     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11148     if (!(SV0 && SV1))
11149       return SDValue();
11150
11151     int MaskScale =
11152         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11153     SmallVector<int, 8> NewMask;
11154     for (int M : SVN->getMask())
11155       for (int i = 0; i != MaskScale; ++i)
11156         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11157
11158     SDValue LegalShuffle =
11159         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
11160     if (LegalShuffle)
11161       return LegalShuffle;
11162   }
11163
11164   return SDValue();
11165 }
11166
11167 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11168   EVT VT = N->getValueType(0);
11169   return CombineConsecutiveLoads(N, VT);
11170 }
11171
11172 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11173 /// operands. DstEltVT indicates the destination element value type.
11174 SDValue DAGCombiner::
11175 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11176   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11177
11178   // If this is already the right type, we're done.
11179   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11180
11181   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11182   unsigned DstBitSize = DstEltVT.getSizeInBits();
11183
11184   // If this is a conversion of N elements of one type to N elements of another
11185   // type, convert each element.  This handles FP<->INT cases.
11186   if (SrcBitSize == DstBitSize) {
11187     SmallVector<SDValue, 8> Ops;
11188     for (SDValue Op : BV->op_values()) {
11189       // If the vector element type is not legal, the BUILD_VECTOR operands
11190       // are promoted and implicitly truncated.  Make that explicit here.
11191       if (Op.getValueType() != SrcEltVT)
11192         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11193       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11194       AddToWorklist(Ops.back().getNode());
11195     }
11196     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11197                               BV->getValueType(0).getVectorNumElements());
11198     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11199   }
11200
11201   // Otherwise, we're growing or shrinking the elements.  To avoid having to
11202   // handle annoying details of growing/shrinking FP values, we convert them to
11203   // int first.
11204   if (SrcEltVT.isFloatingPoint()) {
11205     // Convert the input float vector to a int vector where the elements are the
11206     // same sizes.
11207     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11208     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11209     SrcEltVT = IntVT;
11210   }
11211
11212   // Now we know the input is an integer vector.  If the output is a FP type,
11213   // convert to integer first, then to FP of the right size.
11214   if (DstEltVT.isFloatingPoint()) {
11215     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11216     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11217
11218     // Next, convert to FP elements of the same size.
11219     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11220   }
11221
11222   SDLoc DL(BV);
11223
11224   // Okay, we know the src/dst types are both integers of differing types.
11225   // Handling growing first.
11226   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11227   if (SrcBitSize < DstBitSize) {
11228     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11229
11230     SmallVector<SDValue, 8> Ops;
11231     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11232          i += NumInputsPerOutput) {
11233       bool isLE = DAG.getDataLayout().isLittleEndian();
11234       APInt NewBits = APInt(DstBitSize, 0);
11235       bool EltIsUndef = true;
11236       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11237         // Shift the previously computed bits over.
11238         NewBits <<= SrcBitSize;
11239         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11240         if (Op.isUndef()) continue;
11241         EltIsUndef = false;
11242
11243         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11244                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
11245       }
11246
11247       if (EltIsUndef)
11248         Ops.push_back(DAG.getUNDEF(DstEltVT));
11249       else
11250         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11251     }
11252
11253     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11254     return DAG.getBuildVector(VT, DL, Ops);
11255   }
11256
11257   // Finally, this must be the case where we are shrinking elements: each input
11258   // turns into multiple outputs.
11259   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11260   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11261                             NumOutputsPerInput*BV->getNumOperands());
11262   SmallVector<SDValue, 8> Ops;
11263
11264   for (const SDValue &Op : BV->op_values()) {
11265     if (Op.isUndef()) {
11266       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11267       continue;
11268     }
11269
11270     APInt OpVal = cast<ConstantSDNode>(Op)->
11271                   getAPIntValue().zextOrTrunc(SrcBitSize);
11272
11273     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11274       APInt ThisVal = OpVal.trunc(DstBitSize);
11275       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11276       OpVal.lshrInPlace(DstBitSize);
11277     }
11278
11279     // For big endian targets, swap the order of the pieces of each element.
11280     if (DAG.getDataLayout().isBigEndian())
11281       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11282   }
11283
11284   return DAG.getBuildVector(VT, DL, Ops);
11285 }
11286
11287 static bool isContractable(SDNode *N) {
11288   SDNodeFlags F = N->getFlags();
11289   return F.hasAllowContract() || F.hasAllowReassociation();
11290 }
11291
11292 /// Try to perform FMA combining on a given FADD node.
11293 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11294   SDValue N0 = N->getOperand(0);
11295   SDValue N1 = N->getOperand(1);
11296   EVT VT = N->getValueType(0);
11297   SDLoc SL(N);
11298
11299   const TargetOptions &Options = DAG.getTarget().Options;
11300
11301   // Floating-point multiply-add with intermediate rounding.
11302   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11303
11304   // Floating-point multiply-add without intermediate rounding.
11305   bool HasFMA =
11306       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11307       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11308
11309   // No valid opcode, do not combine.
11310   if (!HasFMAD && !HasFMA)
11311     return SDValue();
11312
11313   SDNodeFlags Flags = N->getFlags();
11314   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11315   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11316                               CanFuse || HasFMAD);
11317   // If the addition is not contractable, do not combine.
11318   if (!AllowFusionGlobally && !isContractable(N))
11319     return SDValue();
11320
11321   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11322   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11323     return SDValue();
11324
11325   // Always prefer FMAD to FMA for precision.
11326   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11327   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11328
11329   // Is the node an FMUL and contractable either due to global flags or
11330   // SDNodeFlags.
11331   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11332     if (N.getOpcode() != ISD::FMUL)
11333       return false;
11334     return AllowFusionGlobally || isContractable(N.getNode());
11335   };
11336   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
11337   // prefer to fold the multiply with fewer uses.
11338   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
11339     if (N0.getNode()->use_size() > N1.getNode()->use_size())
11340       std::swap(N0, N1);
11341   }
11342
11343   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
11344   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11345     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11346                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
11347   }
11348
11349   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
11350   // Note: Commutes FADD operands.
11351   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11352     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11353                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
11354   }
11355
11356   // Look through FP_EXTEND nodes to do more combining.
11357
11358   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
11359   if (N0.getOpcode() == ISD::FP_EXTEND) {
11360     SDValue N00 = N0.getOperand(0);
11361     if (isContractableFMUL(N00) &&
11362         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11363       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11364                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11365                                      N00.getOperand(0)),
11366                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11367                                      N00.getOperand(1)), N1, Flags);
11368     }
11369   }
11370
11371   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
11372   // Note: Commutes FADD operands.
11373   if (N1.getOpcode() == ISD::FP_EXTEND) {
11374     SDValue N10 = N1.getOperand(0);
11375     if (isContractableFMUL(N10) &&
11376         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11377       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11378                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11379                                      N10.getOperand(0)),
11380                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11381                                      N10.getOperand(1)), N0, Flags);
11382     }
11383   }
11384
11385   // More folding opportunities when target permits.
11386   if (Aggressive) {
11387     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
11388     if (CanFuse &&
11389         N0.getOpcode() == PreferredFusedOpcode &&
11390         N0.getOperand(2).getOpcode() == ISD::FMUL &&
11391         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
11392       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11393                          N0.getOperand(0), N0.getOperand(1),
11394                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11395                                      N0.getOperand(2).getOperand(0),
11396                                      N0.getOperand(2).getOperand(1),
11397                                      N1, Flags), Flags);
11398     }
11399
11400     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
11401     if (CanFuse &&
11402         N1->getOpcode() == PreferredFusedOpcode &&
11403         N1.getOperand(2).getOpcode() == ISD::FMUL &&
11404         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
11405       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11406                          N1.getOperand(0), N1.getOperand(1),
11407                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11408                                      N1.getOperand(2).getOperand(0),
11409                                      N1.getOperand(2).getOperand(1),
11410                                      N0, Flags), Flags);
11411     }
11412
11413
11414     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
11415     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
11416     auto FoldFAddFMAFPExtFMul = [&] (
11417       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11418       SDNodeFlags Flags) {
11419       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
11420                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11421                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11422                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11423                                      Z, Flags), Flags);
11424     };
11425     if (N0.getOpcode() == PreferredFusedOpcode) {
11426       SDValue N02 = N0.getOperand(2);
11427       if (N02.getOpcode() == ISD::FP_EXTEND) {
11428         SDValue N020 = N02.getOperand(0);
11429         if (isContractableFMUL(N020) &&
11430             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11431           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
11432                                       N020.getOperand(0), N020.getOperand(1),
11433                                       N1, Flags);
11434         }
11435       }
11436     }
11437
11438     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
11439     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
11440     // FIXME: This turns two single-precision and one double-precision
11441     // operation into two double-precision operations, which might not be
11442     // interesting for all targets, especially GPUs.
11443     auto FoldFAddFPExtFMAFMul = [&] (
11444       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11445       SDNodeFlags Flags) {
11446       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11447                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
11448                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
11449                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11450                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11451                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11452                                      Z, Flags), Flags);
11453     };
11454     if (N0.getOpcode() == ISD::FP_EXTEND) {
11455       SDValue N00 = N0.getOperand(0);
11456       if (N00.getOpcode() == PreferredFusedOpcode) {
11457         SDValue N002 = N00.getOperand(2);
11458         if (isContractableFMUL(N002) &&
11459             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11460           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
11461                                       N002.getOperand(0), N002.getOperand(1),
11462                                       N1, Flags);
11463         }
11464       }
11465     }
11466
11467     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
11468     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
11469     if (N1.getOpcode() == PreferredFusedOpcode) {
11470       SDValue N12 = N1.getOperand(2);
11471       if (N12.getOpcode() == ISD::FP_EXTEND) {
11472         SDValue N120 = N12.getOperand(0);
11473         if (isContractableFMUL(N120) &&
11474             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11475           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11476                                       N120.getOperand(0), N120.getOperand(1),
11477                                       N0, Flags);
11478         }
11479       }
11480     }
11481
11482     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11483     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11484     // FIXME: This turns two single-precision and one double-precision
11485     // operation into two double-precision operations, which might not be
11486     // interesting for all targets, especially GPUs.
11487     if (N1.getOpcode() == ISD::FP_EXTEND) {
11488       SDValue N10 = N1.getOperand(0);
11489       if (N10.getOpcode() == PreferredFusedOpcode) {
11490         SDValue N102 = N10.getOperand(2);
11491         if (isContractableFMUL(N102) &&
11492             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11493           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11494                                       N102.getOperand(0), N102.getOperand(1),
11495                                       N0, Flags);
11496         }
11497       }
11498     }
11499   }
11500
11501   return SDValue();
11502 }
11503
11504 /// Try to perform FMA combining on a given FSUB node.
11505 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11506   SDValue N0 = N->getOperand(0);
11507   SDValue N1 = N->getOperand(1);
11508   EVT VT = N->getValueType(0);
11509   SDLoc SL(N);
11510
11511   const TargetOptions &Options = DAG.getTarget().Options;
11512   // Floating-point multiply-add with intermediate rounding.
11513   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11514
11515   // Floating-point multiply-add without intermediate rounding.
11516   bool HasFMA =
11517       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11518       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11519
11520   // No valid opcode, do not combine.
11521   if (!HasFMAD && !HasFMA)
11522     return SDValue();
11523
11524   const SDNodeFlags Flags = N->getFlags();
11525   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11526   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11527                               CanFuse || HasFMAD);
11528
11529   // If the subtraction is not contractable, do not combine.
11530   if (!AllowFusionGlobally && !isContractable(N))
11531     return SDValue();
11532
11533   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11534   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11535     return SDValue();
11536
11537   // Always prefer FMAD to FMA for precision.
11538   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11539   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11540
11541   // Is the node an FMUL and contractable either due to global flags or
11542   // SDNodeFlags.
11543   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11544     if (N.getOpcode() != ISD::FMUL)
11545       return false;
11546     return AllowFusionGlobally || isContractable(N.getNode());
11547   };
11548
11549   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11550   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11551     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11552                        N0.getOperand(0), N0.getOperand(1),
11553                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11554   }
11555
11556   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11557   // Note: Commutes FSUB operands.
11558   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11559     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11560                        DAG.getNode(ISD::FNEG, SL, VT,
11561                                    N1.getOperand(0)),
11562                        N1.getOperand(1), N0, Flags);
11563   }
11564
11565   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11566   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11567       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11568     SDValue N00 = N0.getOperand(0).getOperand(0);
11569     SDValue N01 = N0.getOperand(0).getOperand(1);
11570     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11571                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11572                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11573   }
11574
11575   // Look through FP_EXTEND nodes to do more combining.
11576
11577   // fold (fsub (fpext (fmul x, y)), z)
11578   //   -> (fma (fpext x), (fpext y), (fneg z))
11579   if (N0.getOpcode() == ISD::FP_EXTEND) {
11580     SDValue N00 = N0.getOperand(0);
11581     if (isContractableFMUL(N00) &&
11582         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11583       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11584                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11585                                      N00.getOperand(0)),
11586                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11587                                      N00.getOperand(1)),
11588                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11589     }
11590   }
11591
11592   // fold (fsub x, (fpext (fmul y, z)))
11593   //   -> (fma (fneg (fpext y)), (fpext z), x)
11594   // Note: Commutes FSUB operands.
11595   if (N1.getOpcode() == ISD::FP_EXTEND) {
11596     SDValue N10 = N1.getOperand(0);
11597     if (isContractableFMUL(N10) &&
11598         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11599       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11600                          DAG.getNode(ISD::FNEG, SL, VT,
11601                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
11602                                                  N10.getOperand(0))),
11603                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11604                                      N10.getOperand(1)),
11605                          N0, Flags);
11606     }
11607   }
11608
11609   // fold (fsub (fpext (fneg (fmul, x, y))), z)
11610   //   -> (fneg (fma (fpext x), (fpext y), z))
11611   // Note: This could be removed with appropriate canonicalization of the
11612   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11613   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11614   // from implementing the canonicalization in visitFSUB.
11615   if (N0.getOpcode() == ISD::FP_EXTEND) {
11616     SDValue N00 = N0.getOperand(0);
11617     if (N00.getOpcode() == ISD::FNEG) {
11618       SDValue N000 = N00.getOperand(0);
11619       if (isContractableFMUL(N000) &&
11620           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11621         return DAG.getNode(ISD::FNEG, SL, VT,
11622                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11623                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11624                                                    N000.getOperand(0)),
11625                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11626                                                    N000.getOperand(1)),
11627                                        N1, Flags));
11628       }
11629     }
11630   }
11631
11632   // fold (fsub (fneg (fpext (fmul, x, y))), z)
11633   //   -> (fneg (fma (fpext x)), (fpext y), z)
11634   // Note: This could be removed with appropriate canonicalization of the
11635   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11636   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11637   // from implementing the canonicalization in visitFSUB.
11638   if (N0.getOpcode() == ISD::FNEG) {
11639     SDValue N00 = N0.getOperand(0);
11640     if (N00.getOpcode() == ISD::FP_EXTEND) {
11641       SDValue N000 = N00.getOperand(0);
11642       if (isContractableFMUL(N000) &&
11643           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
11644         return DAG.getNode(ISD::FNEG, SL, VT,
11645                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11646                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11647                                                    N000.getOperand(0)),
11648                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11649                                                    N000.getOperand(1)),
11650                                        N1, Flags));
11651       }
11652     }
11653   }
11654
11655   // More folding opportunities when target permits.
11656   if (Aggressive) {
11657     // fold (fsub (fma x, y, (fmul u, v)), z)
11658     //   -> (fma x, y (fma u, v, (fneg z)))
11659     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
11660         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
11661         N0.getOperand(2)->hasOneUse()) {
11662       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11663                          N0.getOperand(0), N0.getOperand(1),
11664                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11665                                      N0.getOperand(2).getOperand(0),
11666                                      N0.getOperand(2).getOperand(1),
11667                                      DAG.getNode(ISD::FNEG, SL, VT,
11668                                                  N1), Flags), Flags);
11669     }
11670
11671     // fold (fsub x, (fma y, z, (fmul u, v)))
11672     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
11673     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
11674         isContractableFMUL(N1.getOperand(2))) {
11675       SDValue N20 = N1.getOperand(2).getOperand(0);
11676       SDValue N21 = N1.getOperand(2).getOperand(1);
11677       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11678                          DAG.getNode(ISD::FNEG, SL, VT,
11679                                      N1.getOperand(0)),
11680                          N1.getOperand(1),
11681                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11682                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
11683                                      N21, N0, Flags), Flags);
11684     }
11685
11686
11687     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11688     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11689     if (N0.getOpcode() == PreferredFusedOpcode) {
11690       SDValue N02 = N0.getOperand(2);
11691       if (N02.getOpcode() == ISD::FP_EXTEND) {
11692         SDValue N020 = N02.getOperand(0);
11693         if (isContractableFMUL(N020) &&
11694             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11695           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11696                              N0.getOperand(0), N0.getOperand(1),
11697                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11698                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11699                                                      N020.getOperand(0)),
11700                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11701                                                      N020.getOperand(1)),
11702                                          DAG.getNode(ISD::FNEG, SL, VT,
11703                                                      N1), Flags), Flags);
11704         }
11705       }
11706     }
11707
11708     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11709     //   -> (fma (fpext x), (fpext y),
11710     //           (fma (fpext u), (fpext v), (fneg z)))
11711     // FIXME: This turns two single-precision and one double-precision
11712     // operation into two double-precision operations, which might not be
11713     // interesting for all targets, especially GPUs.
11714     if (N0.getOpcode() == ISD::FP_EXTEND) {
11715       SDValue N00 = N0.getOperand(0);
11716       if (N00.getOpcode() == PreferredFusedOpcode) {
11717         SDValue N002 = N00.getOperand(2);
11718         if (isContractableFMUL(N002) &&
11719             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11720           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11721                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11722                                          N00.getOperand(0)),
11723                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11724                                          N00.getOperand(1)),
11725                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11726                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11727                                                      N002.getOperand(0)),
11728                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11729                                                      N002.getOperand(1)),
11730                                          DAG.getNode(ISD::FNEG, SL, VT,
11731                                                      N1), Flags), Flags);
11732         }
11733       }
11734     }
11735
11736     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11737     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11738     if (N1.getOpcode() == PreferredFusedOpcode &&
11739         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11740       SDValue N120 = N1.getOperand(2).getOperand(0);
11741       if (isContractableFMUL(N120) &&
11742           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11743         SDValue N1200 = N120.getOperand(0);
11744         SDValue N1201 = N120.getOperand(1);
11745         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11746                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11747                            N1.getOperand(1),
11748                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11749                                        DAG.getNode(ISD::FNEG, SL, VT,
11750                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11751                                                                VT, N1200)),
11752                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11753                                                    N1201),
11754                                        N0, Flags), Flags);
11755       }
11756     }
11757
11758     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11759     //   -> (fma (fneg (fpext y)), (fpext z),
11760     //           (fma (fneg (fpext u)), (fpext v), x))
11761     // FIXME: This turns two single-precision and one double-precision
11762     // operation into two double-precision operations, which might not be
11763     // interesting for all targets, especially GPUs.
11764     if (N1.getOpcode() == ISD::FP_EXTEND &&
11765         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11766       SDValue CvtSrc = N1.getOperand(0);
11767       SDValue N100 = CvtSrc.getOperand(0);
11768       SDValue N101 = CvtSrc.getOperand(1);
11769       SDValue N102 = CvtSrc.getOperand(2);
11770       if (isContractableFMUL(N102) &&
11771           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11772         SDValue N1020 = N102.getOperand(0);
11773         SDValue N1021 = N102.getOperand(1);
11774         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11775                            DAG.getNode(ISD::FNEG, SL, VT,
11776                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11777                                                    N100)),
11778                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11779                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11780                                        DAG.getNode(ISD::FNEG, SL, VT,
11781                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11782                                                                VT, N1020)),
11783                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11784                                                    N1021),
11785                                        N0, Flags), Flags);
11786       }
11787     }
11788   }
11789
11790   return SDValue();
11791 }
11792
11793 /// Try to perform FMA combining on a given FMUL node based on the distributive
11794 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11795 /// subtraction instead of addition).
11796 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11797   SDValue N0 = N->getOperand(0);
11798   SDValue N1 = N->getOperand(1);
11799   EVT VT = N->getValueType(0);
11800   SDLoc SL(N);
11801   const SDNodeFlags Flags = N->getFlags();
11802
11803   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11804
11805   const TargetOptions &Options = DAG.getTarget().Options;
11806
11807   // The transforms below are incorrect when x == 0 and y == inf, because the
11808   // intermediate multiplication produces a nan.
11809   if (!Options.NoInfsFPMath)
11810     return SDValue();
11811
11812   // Floating-point multiply-add without intermediate rounding.
11813   bool HasFMA =
11814       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11815       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11816       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11817
11818   // Floating-point multiply-add with intermediate rounding. This can result
11819   // in a less precise result due to the changed rounding order.
11820   bool HasFMAD = Options.UnsafeFPMath &&
11821                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11822
11823   // No valid opcode, do not combine.
11824   if (!HasFMAD && !HasFMA)
11825     return SDValue();
11826
11827   // Always prefer FMAD to FMA for precision.
11828   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11829   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11830
11831   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11832   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11833   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11834     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11835       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11836         if (C->isExactlyValue(+1.0))
11837           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11838                              Y, Flags);
11839         if (C->isExactlyValue(-1.0))
11840           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11841                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11842       }
11843     }
11844     return SDValue();
11845   };
11846
11847   if (SDValue FMA = FuseFADD(N0, N1, Flags))
11848     return FMA;
11849   if (SDValue FMA = FuseFADD(N1, N0, Flags))
11850     return FMA;
11851
11852   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11853   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11854   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11855   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11856   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11857     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11858       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11859         if (C0->isExactlyValue(+1.0))
11860           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11861                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11862                              Y, Flags);
11863         if (C0->isExactlyValue(-1.0))
11864           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11865                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11866                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11867       }
11868       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11869         if (C1->isExactlyValue(+1.0))
11870           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11871                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11872         if (C1->isExactlyValue(-1.0))
11873           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11874                              Y, Flags);
11875       }
11876     }
11877     return SDValue();
11878   };
11879
11880   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11881     return FMA;
11882   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11883     return FMA;
11884
11885   return SDValue();
11886 }
11887
11888 SDValue DAGCombiner::visitFADD(SDNode *N) {
11889   SDValue N0 = N->getOperand(0);
11890   SDValue N1 = N->getOperand(1);
11891   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11892   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11893   EVT VT = N->getValueType(0);
11894   SDLoc DL(N);
11895   const TargetOptions &Options = DAG.getTarget().Options;
11896   const SDNodeFlags Flags = N->getFlags();
11897
11898   // fold vector ops
11899   if (VT.isVector())
11900     if (SDValue FoldedVOp = SimplifyVBinOp(N))
11901       return FoldedVOp;
11902
11903   // fold (fadd c1, c2) -> c1 + c2
11904   if (N0CFP && N1CFP)
11905     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
11906
11907   // canonicalize constant to RHS
11908   if (N0CFP && !N1CFP)
11909     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
11910
11911   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
11912   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
11913   if (N1C && N1C->isZero())
11914     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
11915       return N0;
11916
11917   if (SDValue NewSel = foldBinOpIntoSelect(N))
11918     return NewSel;
11919
11920   // fold (fadd A, (fneg B)) -> (fsub A, B)
11921   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11922       TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)
11923     return DAG.getNode(
11924         ISD::FSUB, DL, VT, N0,
11925         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
11926
11927   // fold (fadd (fneg A), B) -> (fsub B, A)
11928   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
11929       TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)
11930     return DAG.getNode(
11931         ISD::FSUB, DL, VT, N1,
11932         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
11933
11934   auto isFMulNegTwo = [](SDValue FMul) {
11935     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
11936       return false;
11937     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
11938     return C && C->isExactlyValue(-2.0);
11939   };
11940
11941   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
11942   if (isFMulNegTwo(N0)) {
11943     SDValue B = N0.getOperand(0);
11944     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11945     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
11946   }
11947   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
11948   if (isFMulNegTwo(N1)) {
11949     SDValue B = N1.getOperand(0);
11950     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
11951     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
11952   }
11953
11954   // No FP constant should be created after legalization as Instruction
11955   // Selection pass has a hard time dealing with FP constants.
11956   bool AllowNewConst = (Level < AfterLegalizeDAG);
11957
11958   // If nnan is enabled, fold lots of things.
11959   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
11960     // If allowed, fold (fadd (fneg x), x) -> 0.0
11961     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
11962       return DAG.getConstantFP(0.0, DL, VT);
11963
11964     // If allowed, fold (fadd x, (fneg x)) -> 0.0
11965     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
11966       return DAG.getConstantFP(0.0, DL, VT);
11967   }
11968
11969   // If 'unsafe math' or reassoc and nsz, fold lots of things.
11970   // TODO: break out portions of the transformations below for which Unsafe is
11971   //       considered and which do not require both nsz and reassoc
11972   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
11973        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
11974       AllowNewConst) {
11975     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
11976     if (N1CFP && N0.getOpcode() == ISD::FADD &&
11977         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
11978       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
11979       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
11980     }
11981
11982     // We can fold chains of FADD's of the same value into multiplications.
11983     // This transform is not safe in general because we are reducing the number
11984     // of rounding steps.
11985     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
11986       if (N0.getOpcode() == ISD::FMUL) {
11987         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
11988         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
11989
11990         // (fadd (fmul x, c), x) -> (fmul x, c+1)
11991         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
11992           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
11993                                        DAG.getConstantFP(1.0, DL, VT), Flags);
11994           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
11995         }
11996
11997         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
11998         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
11999             N1.getOperand(0) == N1.getOperand(1) &&
12000             N0.getOperand(0) == N1.getOperand(0)) {
12001           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12002                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12003           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12004         }
12005       }
12006
12007       if (N1.getOpcode() == ISD::FMUL) {
12008         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12009         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12010
12011         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12012         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12013           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12014                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12015           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12016         }
12017
12018         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12019         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12020             N0.getOperand(0) == N0.getOperand(1) &&
12021             N1.getOperand(0) == N0.getOperand(0)) {
12022           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12023                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12024           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12025         }
12026       }
12027
12028       if (N0.getOpcode() == ISD::FADD) {
12029         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12030         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12031         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12032             (N0.getOperand(0) == N1)) {
12033           return DAG.getNode(ISD::FMUL, DL, VT,
12034                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12035         }
12036       }
12037
12038       if (N1.getOpcode() == ISD::FADD) {
12039         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12040         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12041         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12042             N1.getOperand(0) == N0) {
12043           return DAG.getNode(ISD::FMUL, DL, VT,
12044                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12045         }
12046       }
12047
12048       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12049       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12050           N0.getOperand(0) == N0.getOperand(1) &&
12051           N1.getOperand(0) == N1.getOperand(1) &&
12052           N0.getOperand(0) == N1.getOperand(0)) {
12053         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12054                            DAG.getConstantFP(4.0, DL, VT), Flags);
12055       }
12056     }
12057   } // enable-unsafe-fp-math
12058
12059   // FADD -> FMA combines:
12060   if (SDValue Fused = visitFADDForFMACombine(N)) {
12061     AddToWorklist(Fused.getNode());
12062     return Fused;
12063   }
12064   return SDValue();
12065 }
12066
12067 SDValue DAGCombiner::visitFSUB(SDNode *N) {
12068   SDValue N0 = N->getOperand(0);
12069   SDValue N1 = N->getOperand(1);
12070   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12071   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12072   EVT VT = N->getValueType(0);
12073   SDLoc DL(N);
12074   const TargetOptions &Options = DAG.getTarget().Options;
12075   const SDNodeFlags Flags = N->getFlags();
12076
12077   // fold vector ops
12078   if (VT.isVector())
12079     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12080       return FoldedVOp;
12081
12082   // fold (fsub c1, c2) -> c1-c2
12083   if (N0CFP && N1CFP)
12084     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12085
12086   if (SDValue NewSel = foldBinOpIntoSelect(N))
12087     return NewSel;
12088
12089   // (fsub A, 0) -> A
12090   if (N1CFP && N1CFP->isZero()) {
12091     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
12092         Flags.hasNoSignedZeros()) {
12093       return N0;
12094     }
12095   }
12096
12097   if (N0 == N1) {
12098     // (fsub x, x) -> 0.0
12099     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12100       return DAG.getConstantFP(0.0f, DL, VT);
12101   }
12102
12103   // (fsub -0.0, N1) -> -N1
12104   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12105   //       FSUB does not specify the sign bit of a NaN. Also note that for
12106   //       the same reason, the inverse transform is not safe, unless fast math
12107   //       flags are in play.
12108   if (N0CFP && N0CFP->isZero()) {
12109     if (N0CFP->isNegative() ||
12110         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12111       if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
12112         return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12113       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12114         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12115     }
12116   }
12117
12118   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12119        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12120       N1.getOpcode() == ISD::FADD) {
12121     // X - (X + Y) -> -Y
12122     if (N0 == N1->getOperand(0))
12123       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12124     // X - (Y + X) -> -Y
12125     if (N0 == N1->getOperand(1))
12126       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12127   }
12128
12129   // fold (fsub A, (fneg B)) -> (fadd A, B)
12130   if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
12131     return DAG.getNode(
12132         ISD::FADD, DL, VT, N0,
12133         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12134
12135   // FSUB -> FMA combines:
12136   if (SDValue Fused = visitFSUBForFMACombine(N)) {
12137     AddToWorklist(Fused.getNode());
12138     return Fused;
12139   }
12140
12141   return SDValue();
12142 }
12143
12144 /// Return true if both inputs are at least as cheap in negated form and at
12145 /// least one input is strictly cheaper in negated form.
12146 bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
12147   if (char LHSNeg =
12148           TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))
12149     if (char RHSNeg =
12150             TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))
12151       // Both negated operands are at least as cheap as their counterparts.
12152       // Check to see if at least one is cheaper negated.
12153       if (LHSNeg == 2 || RHSNeg == 2)
12154         return true;
12155
12156   return false;
12157 }
12158
12159 SDValue DAGCombiner::visitFMUL(SDNode *N) {
12160   SDValue N0 = N->getOperand(0);
12161   SDValue N1 = N->getOperand(1);
12162   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12163   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12164   EVT VT = N->getValueType(0);
12165   SDLoc DL(N);
12166   const TargetOptions &Options = DAG.getTarget().Options;
12167   const SDNodeFlags Flags = N->getFlags();
12168
12169   // fold vector ops
12170   if (VT.isVector()) {
12171     // This just handles C1 * C2 for vectors. Other vector folds are below.
12172     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12173       return FoldedVOp;
12174   }
12175
12176   // fold (fmul c1, c2) -> c1*c2
12177   if (N0CFP && N1CFP)
12178     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12179
12180   // canonicalize constant to RHS
12181   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12182      !isConstantFPBuildVectorOrConstantFP(N1))
12183     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12184
12185   if (SDValue NewSel = foldBinOpIntoSelect(N))
12186     return NewSel;
12187
12188   if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12189       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12190     // fold (fmul A, 0) -> 0
12191     if (N1CFP && N1CFP->isZero())
12192       return N1;
12193   }
12194
12195   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12196     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12197     if (isConstantFPBuildVectorOrConstantFP(N1) &&
12198         N0.getOpcode() == ISD::FMUL) {
12199       SDValue N00 = N0.getOperand(0);
12200       SDValue N01 = N0.getOperand(1);
12201       // Avoid an infinite loop by making sure that N00 is not a constant
12202       // (the inner multiply has not been constant folded yet).
12203       if (isConstantFPBuildVectorOrConstantFP(N01) &&
12204           !isConstantFPBuildVectorOrConstantFP(N00)) {
12205         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12206         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12207       }
12208     }
12209
12210     // Match a special-case: we convert X * 2.0 into fadd.
12211     // fmul (fadd X, X), C -> fmul X, 2.0 * C
12212     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12213         N0.getOperand(0) == N0.getOperand(1)) {
12214       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12215       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12216       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12217     }
12218   }
12219
12220   // fold (fmul X, 2.0) -> (fadd X, X)
12221   if (N1CFP && N1CFP->isExactlyValue(+2.0))
12222     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12223
12224   // fold (fmul X, -1.0) -> (fneg X)
12225   if (N1CFP && N1CFP->isExactlyValue(-1.0))
12226     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12227       return DAG.getNode(ISD::FNEG, DL, VT, N0);
12228
12229   // -N0 * -N1 --> N0 * N1
12230   if (isCheaperToUseNegatedFPOps(N0, N1)) {
12231     SDValue NegN0 =
12232         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12233     SDValue NegN1 =
12234         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12235     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
12236   }
12237
12238   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12239   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12240   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12241       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12242       TLI.isOperationLegal(ISD::FABS, VT)) {
12243     SDValue Select = N0, X = N1;
12244     if (Select.getOpcode() != ISD::SELECT)
12245       std::swap(Select, X);
12246
12247     SDValue Cond = Select.getOperand(0);
12248     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12249     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12250
12251     if (TrueOpnd && FalseOpnd &&
12252         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12253         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12254         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12255       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12256       switch (CC) {
12257       default: break;
12258       case ISD::SETOLT:
12259       case ISD::SETULT:
12260       case ISD::SETOLE:
12261       case ISD::SETULE:
12262       case ISD::SETLT:
12263       case ISD::SETLE:
12264         std::swap(TrueOpnd, FalseOpnd);
12265         LLVM_FALLTHROUGH;
12266       case ISD::SETOGT:
12267       case ISD::SETUGT:
12268       case ISD::SETOGE:
12269       case ISD::SETUGE:
12270       case ISD::SETGT:
12271       case ISD::SETGE:
12272         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12273             TLI.isOperationLegal(ISD::FNEG, VT))
12274           return DAG.getNode(ISD::FNEG, DL, VT,
12275                    DAG.getNode(ISD::FABS, DL, VT, X));
12276         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12277           return DAG.getNode(ISD::FABS, DL, VT, X);
12278
12279         break;
12280       }
12281     }
12282   }
12283
12284   // FMUL -> FMA combines:
12285   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
12286     AddToWorklist(Fused.getNode());
12287     return Fused;
12288   }
12289
12290   return SDValue();
12291 }
12292
12293 SDValue DAGCombiner::visitFMA(SDNode *N) {
12294   SDValue N0 = N->getOperand(0);
12295   SDValue N1 = N->getOperand(1);
12296   SDValue N2 = N->getOperand(2);
12297   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12298   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12299   EVT VT = N->getValueType(0);
12300   SDLoc DL(N);
12301   const TargetOptions &Options = DAG.getTarget().Options;
12302
12303   // FMA nodes have flags that propagate to the created nodes.
12304   const SDNodeFlags Flags = N->getFlags();
12305   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
12306
12307   // Constant fold FMA.
12308   if (isa<ConstantFPSDNode>(N0) &&
12309       isa<ConstantFPSDNode>(N1) &&
12310       isa<ConstantFPSDNode>(N2)) {
12311     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
12312   }
12313
12314   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
12315   if (isCheaperToUseNegatedFPOps(N0, N1)) {
12316     SDValue NegN0 =
12317         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12318     SDValue NegN1 =
12319         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12320     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
12321   }
12322
12323   if (UnsafeFPMath) {
12324     if (N0CFP && N0CFP->isZero())
12325       return N2;
12326     if (N1CFP && N1CFP->isZero())
12327       return N2;
12328   }
12329   // TODO: The FMA node should have flags that propagate to these nodes.
12330   if (N0CFP && N0CFP->isExactlyValue(1.0))
12331     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
12332   if (N1CFP && N1CFP->isExactlyValue(1.0))
12333     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
12334
12335   // Canonicalize (fma c, x, y) -> (fma x, c, y)
12336   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12337      !isConstantFPBuildVectorOrConstantFP(N1))
12338     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
12339
12340   if (UnsafeFPMath) {
12341     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
12342     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
12343         isConstantFPBuildVectorOrConstantFP(N1) &&
12344         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
12345       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12346                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
12347                                      Flags), Flags);
12348     }
12349
12350     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
12351     if (N0.getOpcode() == ISD::FMUL &&
12352         isConstantFPBuildVectorOrConstantFP(N1) &&
12353         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12354       return DAG.getNode(ISD::FMA, DL, VT,
12355                          N0.getOperand(0),
12356                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
12357                                      Flags),
12358                          N2);
12359     }
12360   }
12361
12362   // (fma x, 1, y) -> (fadd x, y)
12363   // (fma x, -1, y) -> (fadd (fneg x), y)
12364   if (N1CFP) {
12365     if (N1CFP->isExactlyValue(1.0))
12366       // TODO: The FMA node should have flags that propagate to this node.
12367       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
12368
12369     if (N1CFP->isExactlyValue(-1.0) &&
12370         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
12371       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
12372       AddToWorklist(RHSNeg.getNode());
12373       // TODO: The FMA node should have flags that propagate to this node.
12374       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
12375     }
12376
12377     // fma (fneg x), K, y -> fma x -K, y
12378     if (N0.getOpcode() == ISD::FNEG &&
12379         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12380          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
12381                                               ForCodeSize)))) {
12382       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
12383                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
12384     }
12385   }
12386
12387   if (UnsafeFPMath) {
12388     // (fma x, c, x) -> (fmul x, (c+1))
12389     if (N1CFP && N0 == N2) {
12390       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12391                          DAG.getNode(ISD::FADD, DL, VT, N1,
12392                                      DAG.getConstantFP(1.0, DL, VT), Flags),
12393                          Flags);
12394     }
12395
12396     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
12397     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
12398       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12399                          DAG.getNode(ISD::FADD, DL, VT, N1,
12400                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
12401                          Flags);
12402     }
12403   }
12404
12405   return SDValue();
12406 }
12407
12408 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12409 // reciprocal.
12410 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
12411 // Notice that this is not always beneficial. One reason is different targets
12412 // may have different costs for FDIV and FMUL, so sometimes the cost of two
12413 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
12414 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
12415 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
12416   // TODO: Limit this transform based on optsize/minsize - it always creates at
12417   //       least 1 extra instruction. But the perf win may be substantial enough
12418   //       that only minsize should restrict this.
12419   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
12420   const SDNodeFlags Flags = N->getFlags();
12421   if (!UnsafeMath && !Flags.hasAllowReciprocal())
12422     return SDValue();
12423
12424   // Skip if current node is a reciprocal/fneg-reciprocal.
12425   SDValue N0 = N->getOperand(0);
12426   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
12427   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
12428     return SDValue();
12429
12430   // Exit early if the target does not want this transform or if there can't
12431   // possibly be enough uses of the divisor to make the transform worthwhile.
12432   SDValue N1 = N->getOperand(1);
12433   unsigned MinUses = TLI.combineRepeatedFPDivisors();
12434
12435   // For splat vectors, scale the number of uses by the splat factor. If we can
12436   // convert the division into a scalar op, that will likely be much faster.
12437   unsigned NumElts = 1;
12438   EVT VT = N->getValueType(0);
12439   if (VT.isVector() && DAG.isSplatValue(N1))
12440     NumElts = VT.getVectorNumElements();
12441
12442   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
12443     return SDValue();
12444
12445   // Find all FDIV users of the same divisor.
12446   // Use a set because duplicates may be present in the user list.
12447   SetVector<SDNode *> Users;
12448   for (auto *U : N1->uses()) {
12449     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
12450       // This division is eligible for optimization only if global unsafe math
12451       // is enabled or if this division allows reciprocal formation.
12452       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
12453         Users.insert(U);
12454     }
12455   }
12456
12457   // Now that we have the actual number of divisor uses, make sure it meets
12458   // the minimum threshold specified by the target.
12459   if ((Users.size() * NumElts) < MinUses)
12460     return SDValue();
12461
12462   SDLoc DL(N);
12463   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
12464   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
12465
12466   // Dividend / Divisor -> Dividend * Reciprocal
12467   for (auto *U : Users) {
12468     SDValue Dividend = U->getOperand(0);
12469     if (Dividend != FPOne) {
12470       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
12471                                     Reciprocal, Flags);
12472       CombineTo(U, NewNode);
12473     } else if (U != Reciprocal.getNode()) {
12474       // In the absence of fast-math-flags, this user node is always the
12475       // same node as Reciprocal, but with FMF they may be different nodes.
12476       CombineTo(U, Reciprocal);
12477     }
12478   }
12479   return SDValue(N, 0);  // N was replaced.
12480 }
12481
12482 SDValue DAGCombiner::visitFDIV(SDNode *N) {
12483   SDValue N0 = N->getOperand(0);
12484   SDValue N1 = N->getOperand(1);
12485   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12486   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12487   EVT VT = N->getValueType(0);
12488   SDLoc DL(N);
12489   const TargetOptions &Options = DAG.getTarget().Options;
12490   SDNodeFlags Flags = N->getFlags();
12491
12492   // fold vector ops
12493   if (VT.isVector())
12494     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12495       return FoldedVOp;
12496
12497   // fold (fdiv c1, c2) -> c1/c2
12498   if (N0CFP && N1CFP)
12499     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12500
12501   if (SDValue NewSel = foldBinOpIntoSelect(N))
12502     return NewSel;
12503
12504   if (SDValue V = combineRepeatedFPDivisors(N))
12505     return V;
12506
12507   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12508     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12509     if (N1CFP) {
12510       // Compute the reciprocal 1.0 / c2.
12511       const APFloat &N1APF = N1CFP->getValueAPF();
12512       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12513       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12514       // Only do the transform if the reciprocal is a legal fp immediate that
12515       // isn't too nasty (eg NaN, denormal, ...).
12516       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12517           (!LegalOperations ||
12518            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12519            // backend)... we should handle this gracefully after Legalize.
12520            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12521            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12522            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12523         return DAG.getNode(ISD::FMUL, DL, VT, N0,
12524                            DAG.getConstantFP(Recip, DL, VT), Flags);
12525     }
12526
12527     // If this FDIV is part of a reciprocal square root, it may be folded
12528     // into a target-specific square root estimate instruction.
12529     if (N1.getOpcode() == ISD::FSQRT) {
12530       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
12531         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12532     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12533                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12534       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12535                                           Flags)) {
12536         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12537         AddToWorklist(RV.getNode());
12538         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12539       }
12540     } else if (N1.getOpcode() == ISD::FP_ROUND &&
12541                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12542       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12543                                           Flags)) {
12544         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12545         AddToWorklist(RV.getNode());
12546         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12547       }
12548     } else if (N1.getOpcode() == ISD::FMUL) {
12549       // Look through an FMUL. Even though this won't remove the FDIV directly,
12550       // it's still worthwhile to get rid of the FSQRT if possible.
12551       SDValue SqrtOp;
12552       SDValue OtherOp;
12553       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12554         SqrtOp = N1.getOperand(0);
12555         OtherOp = N1.getOperand(1);
12556       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12557         SqrtOp = N1.getOperand(1);
12558         OtherOp = N1.getOperand(0);
12559       }
12560       if (SqrtOp.getNode()) {
12561         // We found a FSQRT, so try to make this fold:
12562         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12563         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12564           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12565           AddToWorklist(RV.getNode());
12566           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12567         }
12568       }
12569     }
12570
12571     // Fold into a reciprocal estimate and multiply instead of a real divide.
12572     if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
12573       return RV;
12574   }
12575
12576   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12577   if (isCheaperToUseNegatedFPOps(N0, N1))
12578     return DAG.getNode(
12579         ISD::FDIV, SDLoc(N), VT,
12580         TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
12581         TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
12582
12583   return SDValue();
12584 }
12585
12586 SDValue DAGCombiner::visitFREM(SDNode *N) {
12587   SDValue N0 = N->getOperand(0);
12588   SDValue N1 = N->getOperand(1);
12589   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12590   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12591   EVT VT = N->getValueType(0);
12592
12593   // fold (frem c1, c2) -> fmod(c1,c2)
12594   if (N0CFP && N1CFP)
12595     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12596
12597   if (SDValue NewSel = foldBinOpIntoSelect(N))
12598     return NewSel;
12599
12600   return SDValue();
12601 }
12602
12603 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12604   SDNodeFlags Flags = N->getFlags();
12605   if (!DAG.getTarget().Options.UnsafeFPMath &&
12606       !Flags.hasApproximateFuncs())
12607     return SDValue();
12608
12609   SDValue N0 = N->getOperand(0);
12610   if (TLI.isFsqrtCheap(N0, DAG))
12611     return SDValue();
12612
12613   // FSQRT nodes have flags that propagate to the created nodes.
12614   return buildSqrtEstimate(N0, Flags);
12615 }
12616
12617 /// copysign(x, fp_extend(y)) -> copysign(x, y)
12618 /// copysign(x, fp_round(y)) -> copysign(x, y)
12619 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
12620   SDValue N1 = N->getOperand(1);
12621   if ((N1.getOpcode() == ISD::FP_EXTEND ||
12622        N1.getOpcode() == ISD::FP_ROUND)) {
12623     // Do not optimize out type conversion of f128 type yet.
12624     // For some targets like x86_64, configuration is changed to keep one f128
12625     // value in one SSE register, but instruction selection cannot handle
12626     // FCOPYSIGN on SSE registers yet.
12627     EVT N1VT = N1->getValueType(0);
12628     EVT N1Op0VT = N1->getOperand(0).getValueType();
12629     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12630   }
12631   return false;
12632 }
12633
12634 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12635   SDValue N0 = N->getOperand(0);
12636   SDValue N1 = N->getOperand(1);
12637   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12638   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12639   EVT VT = N->getValueType(0);
12640
12641   if (N0CFP && N1CFP) // Constant fold
12642     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12643
12644   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12645     const APFloat &V = N1C->getValueAPF();
12646     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
12647     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12648     if (!V.isNegative()) {
12649       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
12650         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12651     } else {
12652       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12653         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12654                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12655     }
12656   }
12657
12658   // copysign(fabs(x), y) -> copysign(x, y)
12659   // copysign(fneg(x), y) -> copysign(x, y)
12660   // copysign(copysign(x,z), y) -> copysign(x, y)
12661   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
12662       N0.getOpcode() == ISD::FCOPYSIGN)
12663     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12664
12665   // copysign(x, abs(y)) -> abs(x)
12666   if (N1.getOpcode() == ISD::FABS)
12667     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12668
12669   // copysign(x, copysign(y,z)) -> copysign(x, z)
12670   if (N1.getOpcode() == ISD::FCOPYSIGN)
12671     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12672
12673   // copysign(x, fp_extend(y)) -> copysign(x, y)
12674   // copysign(x, fp_round(y)) -> copysign(x, y)
12675   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
12676     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12677
12678   return SDValue();
12679 }
12680
12681 SDValue DAGCombiner::visitFPOW(SDNode *N) {
12682   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12683   if (!ExponentC)
12684     return SDValue();
12685
12686   // Try to convert x ** (1/3) into cube root.
12687   // TODO: Handle the various flavors of long double.
12688   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12689   //       Some range near 1/3 should be fine.
12690   EVT VT = N->getValueType(0);
12691   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
12692       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
12693     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12694     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12695     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
12696     // For regular numbers, rounding may cause the results to differ.
12697     // Therefore, we require { nsz ninf nnan afn } for this transform.
12698     // TODO: We could select out the special cases if we don't have nsz/ninf.
12699     SDNodeFlags Flags = N->getFlags();
12700     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
12701         !Flags.hasApproximateFuncs())
12702       return SDValue();
12703
12704     // Do not create a cbrt() libcall if the target does not have it, and do not
12705     // turn a pow that has lowering support into a cbrt() libcall.
12706     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12707         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12708          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
12709       return SDValue();
12710
12711     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12712   }
12713
12714   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12715   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12716   // TODO: This could be extended (using a target hook) to handle smaller
12717   // power-of-2 fractional exponents.
12718   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12719   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12720   if (ExponentIs025 || ExponentIs075) {
12721     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12722     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
12723     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12724     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
12725     // For regular numbers, rounding may cause the results to differ.
12726     // Therefore, we require { nsz ninf afn } for this transform.
12727     // TODO: We could select out the special cases if we don't have nsz/ninf.
12728     SDNodeFlags Flags = N->getFlags();
12729
12730     // We only need no signed zeros for the 0.25 case.
12731     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12732         !Flags.hasApproximateFuncs())
12733       return SDValue();
12734
12735     // Don't double the number of libcalls. We are trying to inline fast code.
12736     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12737       return SDValue();
12738
12739     // Assume that libcalls are the smallest code.
12740     // TODO: This restriction should probably be lifted for vectors.
12741     if (DAG.getMachineFunction().getFunction().hasOptSize())
12742       return SDValue();
12743
12744     // pow(X, 0.25) --> sqrt(sqrt(X))
12745     SDLoc DL(N);
12746     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12747     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12748     if (ExponentIs025)
12749       return SqrtSqrt;
12750     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12751     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12752   }
12753
12754   return SDValue();
12755 }
12756
12757 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12758                                const TargetLowering &TLI) {
12759   // This optimization is guarded by a function attribute because it may produce
12760   // unexpected results. Ie, programs may be relying on the platform-specific
12761   // undefined behavior when the float-to-int conversion overflows.
12762   const Function &F = DAG.getMachineFunction().getFunction();
12763   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12764   if (StrictOverflow.getValueAsString().equals("false"))
12765     return SDValue();
12766
12767   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12768   // replacing casts with a libcall. We also must be allowed to ignore -0.0
12769   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12770   // conversions would return +0.0.
12771   // FIXME: We should be able to use node-level FMF here.
12772   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12773   EVT VT = N->getValueType(0);
12774   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12775       !DAG.getTarget().Options.NoSignedZerosFPMath)
12776     return SDValue();
12777
12778   // fptosi/fptoui round towards zero, so converting from FP to integer and
12779   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12780   SDValue N0 = N->getOperand(0);
12781   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12782       N0.getOperand(0).getValueType() == VT)
12783     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12784
12785   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12786       N0.getOperand(0).getValueType() == VT)
12787     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12788
12789   return SDValue();
12790 }
12791
12792 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12793   SDValue N0 = N->getOperand(0);
12794   EVT VT = N->getValueType(0);
12795   EVT OpVT = N0.getValueType();
12796
12797   // [us]itofp(undef) = 0, because the result value is bounded.
12798   if (N0.isUndef())
12799     return DAG.getConstantFP(0.0, SDLoc(N), VT);
12800
12801   // fold (sint_to_fp c1) -> c1fp
12802   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12803       // ...but only if the target supports immediate floating-point values
12804       (!LegalOperations ||
12805        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12806     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12807
12808   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12809   // but UINT_TO_FP is legal on this target, try to convert.
12810   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12811       hasOperation(ISD::UINT_TO_FP, OpVT)) {
12812     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12813     if (DAG.SignBitIsZero(N0))
12814       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12815   }
12816
12817   // The next optimizations are desirable only if SELECT_CC can be lowered.
12818   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12819     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12820     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12821         !VT.isVector() &&
12822         (!LegalOperations ||
12823          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12824       SDLoc DL(N);
12825       SDValue Ops[] =
12826         { N0.getOperand(0), N0.getOperand(1),
12827           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12828           N0.getOperand(2) };
12829       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12830     }
12831
12832     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12833     //      (select_cc x, y, 1.0, 0.0,, cc)
12834     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12835         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12836         (!LegalOperations ||
12837          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12838       SDLoc DL(N);
12839       SDValue Ops[] =
12840         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12841           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12842           N0.getOperand(0).getOperand(2) };
12843       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12844     }
12845   }
12846
12847   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12848     return FTrunc;
12849
12850   return SDValue();
12851 }
12852
12853 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12854   SDValue N0 = N->getOperand(0);
12855   EVT VT = N->getValueType(0);
12856   EVT OpVT = N0.getValueType();
12857
12858   // [us]itofp(undef) = 0, because the result value is bounded.
12859   if (N0.isUndef())
12860     return DAG.getConstantFP(0.0, SDLoc(N), VT);
12861
12862   // fold (uint_to_fp c1) -> c1fp
12863   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12864       // ...but only if the target supports immediate floating-point values
12865       (!LegalOperations ||
12866        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12867     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12868
12869   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12870   // but SINT_TO_FP is legal on this target, try to convert.
12871   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12872       hasOperation(ISD::SINT_TO_FP, OpVT)) {
12873     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12874     if (DAG.SignBitIsZero(N0))
12875       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12876   }
12877
12878   // The next optimizations are desirable only if SELECT_CC can be lowered.
12879   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12880     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12881     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12882         (!LegalOperations ||
12883          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12884       SDLoc DL(N);
12885       SDValue Ops[] =
12886         { N0.getOperand(0), N0.getOperand(1),
12887           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12888           N0.getOperand(2) };
12889       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12890     }
12891   }
12892
12893   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12894     return FTrunc;
12895
12896   return SDValue();
12897 }
12898
12899 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
12900 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
12901   SDValue N0 = N->getOperand(0);
12902   EVT VT = N->getValueType(0);
12903
12904   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
12905     return SDValue();
12906
12907   SDValue Src = N0.getOperand(0);
12908   EVT SrcVT = Src.getValueType();
12909   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
12910   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
12911
12912   // We can safely assume the conversion won't overflow the output range,
12913   // because (for example) (uint8_t)18293.f is undefined behavior.
12914
12915   // Since we can assume the conversion won't overflow, our decision as to
12916   // whether the input will fit in the float should depend on the minimum
12917   // of the input range and output range.
12918
12919   // This means this is also safe for a signed input and unsigned output, since
12920   // a negative input would lead to undefined behavior.
12921   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
12922   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
12923   unsigned ActualSize = std::min(InputSize, OutputSize);
12924   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
12925
12926   // We can only fold away the float conversion if the input range can be
12927   // represented exactly in the float range.
12928   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
12929     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
12930       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
12931                                                        : ISD::ZERO_EXTEND;
12932       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
12933     }
12934     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
12935       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
12936     return DAG.getBitcast(VT, Src);
12937   }
12938   return SDValue();
12939 }
12940
12941 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
12942   SDValue N0 = N->getOperand(0);
12943   EVT VT = N->getValueType(0);
12944
12945   // fold (fp_to_sint undef) -> undef
12946   if (N0.isUndef())
12947     return DAG.getUNDEF(VT);
12948
12949   // fold (fp_to_sint c1fp) -> c1
12950   if (isConstantFPBuildVectorOrConstantFP(N0))
12951     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
12952
12953   return FoldIntToFPToInt(N, DAG);
12954 }
12955
12956 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
12957   SDValue N0 = N->getOperand(0);
12958   EVT VT = N->getValueType(0);
12959
12960   // fold (fp_to_uint undef) -> undef
12961   if (N0.isUndef())
12962     return DAG.getUNDEF(VT);
12963
12964   // fold (fp_to_uint c1fp) -> c1
12965   if (isConstantFPBuildVectorOrConstantFP(N0))
12966     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
12967
12968   return FoldIntToFPToInt(N, DAG);
12969 }
12970
12971 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
12972   SDValue N0 = N->getOperand(0);
12973   SDValue N1 = N->getOperand(1);
12974   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12975   EVT VT = N->getValueType(0);
12976
12977   // fold (fp_round c1fp) -> c1fp
12978   if (N0CFP)
12979     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
12980
12981   // fold (fp_round (fp_extend x)) -> x
12982   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
12983     return N0.getOperand(0);
12984
12985   // fold (fp_round (fp_round x)) -> (fp_round x)
12986   if (N0.getOpcode() == ISD::FP_ROUND) {
12987     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
12988     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
12989
12990     // Skip this folding if it results in an fp_round from f80 to f16.
12991     //
12992     // f80 to f16 always generates an expensive (and as yet, unimplemented)
12993     // libcall to __truncxfhf2 instead of selecting native f16 conversion
12994     // instructions from f32 or f64.  Moreover, the first (value-preserving)
12995     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
12996     // x86.
12997     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
12998       return SDValue();
12999
13000     // If the first fp_round isn't a value preserving truncation, it might
13001     // introduce a tie in the second fp_round, that wouldn't occur in the
13002     // single-step fp_round we want to fold to.
13003     // In other words, double rounding isn't the same as rounding.
13004     // Also, this is a value preserving truncation iff both fp_round's are.
13005     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13006       SDLoc DL(N);
13007       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13008                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13009     }
13010   }
13011
13012   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13013   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13014     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13015                               N0.getOperand(0), N1);
13016     AddToWorklist(Tmp.getNode());
13017     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13018                        Tmp, N0.getOperand(1));
13019   }
13020
13021   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13022     return NewVSel;
13023
13024   return SDValue();
13025 }
13026
13027 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13028   SDValue N0 = N->getOperand(0);
13029   EVT VT = N->getValueType(0);
13030
13031   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13032   if (N->hasOneUse() &&
13033       N->use_begin()->getOpcode() == ISD::FP_ROUND)
13034     return SDValue();
13035
13036   // fold (fp_extend c1fp) -> c1fp
13037   if (isConstantFPBuildVectorOrConstantFP(N0))
13038     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13039
13040   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13041   if (N0.getOpcode() == ISD::FP16_TO_FP &&
13042       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
13043     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13044
13045   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13046   // value of X.
13047   if (N0.getOpcode() == ISD::FP_ROUND
13048       && N0.getConstantOperandVal(1) == 1) {
13049     SDValue In = N0.getOperand(0);
13050     if (In.getValueType() == VT) return In;
13051     if (VT.bitsLT(In.getValueType()))
13052       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13053                          In, N0.getOperand(1));
13054     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13055   }
13056
13057   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13058   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13059        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13060     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13061     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13062                                      LN0->getChain(),
13063                                      LN0->getBasePtr(), N0.getValueType(),
13064                                      LN0->getMemOperand());
13065     CombineTo(N, ExtLoad);
13066     CombineTo(N0.getNode(),
13067               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13068                           N0.getValueType(), ExtLoad,
13069                           DAG.getIntPtrConstant(1, SDLoc(N0))),
13070               ExtLoad.getValue(1));
13071     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13072   }
13073
13074   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13075     return NewVSel;
13076
13077   return SDValue();
13078 }
13079
13080 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13081   SDValue N0 = N->getOperand(0);
13082   EVT VT = N->getValueType(0);
13083
13084   // fold (fceil c1) -> fceil(c1)
13085   if (isConstantFPBuildVectorOrConstantFP(N0))
13086     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13087
13088   return SDValue();
13089 }
13090
13091 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13092   SDValue N0 = N->getOperand(0);
13093   EVT VT = N->getValueType(0);
13094
13095   // fold (ftrunc c1) -> ftrunc(c1)
13096   if (isConstantFPBuildVectorOrConstantFP(N0))
13097     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13098
13099   // fold ftrunc (known rounded int x) -> x
13100   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13101   // likely to be generated to extract integer from a rounded floating value.
13102   switch (N0.getOpcode()) {
13103   default: break;
13104   case ISD::FRINT:
13105   case ISD::FTRUNC:
13106   case ISD::FNEARBYINT:
13107   case ISD::FFLOOR:
13108   case ISD::FCEIL:
13109     return N0;
13110   }
13111
13112   return SDValue();
13113 }
13114
13115 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13116   SDValue N0 = N->getOperand(0);
13117   EVT VT = N->getValueType(0);
13118
13119   // fold (ffloor c1) -> ffloor(c1)
13120   if (isConstantFPBuildVectorOrConstantFP(N0))
13121     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13122
13123   return SDValue();
13124 }
13125
13126 // FIXME: FNEG and FABS have a lot in common; refactor.
13127 SDValue DAGCombiner::visitFNEG(SDNode *N) {
13128   SDValue N0 = N->getOperand(0);
13129   EVT VT = N->getValueType(0);
13130
13131   // Constant fold FNEG.
13132   if (isConstantFPBuildVectorOrConstantFP(N0))
13133     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13134
13135   if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
13136     return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
13137
13138   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13139   // constant pool values.
13140   if (!TLI.isFNegFree(VT) &&
13141       N0.getOpcode() == ISD::BITCAST &&
13142       N0.getNode()->hasOneUse()) {
13143     SDValue Int = N0.getOperand(0);
13144     EVT IntVT = Int.getValueType();
13145     if (IntVT.isInteger() && !IntVT.isVector()) {
13146       APInt SignMask;
13147       if (N0.getValueType().isVector()) {
13148         // For a vector, get a mask such as 0x80... per scalar element
13149         // and splat it.
13150         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13151         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13152       } else {
13153         // For a scalar, just generate 0x80...
13154         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13155       }
13156       SDLoc DL0(N0);
13157       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13158                         DAG.getConstant(SignMask, DL0, IntVT));
13159       AddToWorklist(Int.getNode());
13160       return DAG.getBitcast(VT, Int);
13161     }
13162   }
13163
13164   // (fneg (fmul c, x)) -> (fmul -c, x)
13165   if (N0.getOpcode() == ISD::FMUL &&
13166       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13167     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13168     if (CFP1) {
13169       APFloat CVal = CFP1->getValueAPF();
13170       CVal.changeSign();
13171       if (Level >= AfterLegalizeDAG &&
13172           (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13173            TLI.isOperationLegal(ISD::ConstantFP, VT)))
13174         return DAG.getNode(
13175             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13176             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13177             N0->getFlags());
13178     }
13179   }
13180
13181   return SDValue();
13182 }
13183
13184 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13185                             APFloat (*Op)(const APFloat &, const APFloat &)) {
13186   SDValue N0 = N->getOperand(0);
13187   SDValue N1 = N->getOperand(1);
13188   EVT VT = N->getValueType(0);
13189   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13190   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13191
13192   if (N0CFP && N1CFP) {
13193     const APFloat &C0 = N0CFP->getValueAPF();
13194     const APFloat &C1 = N1CFP->getValueAPF();
13195     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13196   }
13197
13198   // Canonicalize to constant on RHS.
13199   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13200       !isConstantFPBuildVectorOrConstantFP(N1))
13201     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13202
13203   return SDValue();
13204 }
13205
13206 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13207   return visitFMinMax(DAG, N, minnum);
13208 }
13209
13210 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13211   return visitFMinMax(DAG, N, maxnum);
13212 }
13213
13214 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13215   return visitFMinMax(DAG, N, minimum);
13216 }
13217
13218 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13219   return visitFMinMax(DAG, N, maximum);
13220 }
13221
13222 SDValue DAGCombiner::visitFABS(SDNode *N) {
13223   SDValue N0 = N->getOperand(0);
13224   EVT VT = N->getValueType(0);
13225
13226   // fold (fabs c1) -> fabs(c1)
13227   if (isConstantFPBuildVectorOrConstantFP(N0))
13228     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13229
13230   // fold (fabs (fabs x)) -> (fabs x)
13231   if (N0.getOpcode() == ISD::FABS)
13232     return N->getOperand(0);
13233
13234   // fold (fabs (fneg x)) -> (fabs x)
13235   // fold (fabs (fcopysign x, y)) -> (fabs x)
13236   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13237     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13238
13239   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13240   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
13241     SDValue Int = N0.getOperand(0);
13242     EVT IntVT = Int.getValueType();
13243     if (IntVT.isInteger() && !IntVT.isVector()) {
13244       APInt SignMask;
13245       if (N0.getValueType().isVector()) {
13246         // For a vector, get a mask such as 0x7f... per scalar element
13247         // and splat it.
13248         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
13249         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13250       } else {
13251         // For a scalar, just generate 0x7f...
13252         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
13253       }
13254       SDLoc DL(N0);
13255       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
13256                         DAG.getConstant(SignMask, DL, IntVT));
13257       AddToWorklist(Int.getNode());
13258       return DAG.getBitcast(N->getValueType(0), Int);
13259     }
13260   }
13261
13262   return SDValue();
13263 }
13264
13265 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
13266   SDValue Chain = N->getOperand(0);
13267   SDValue N1 = N->getOperand(1);
13268   SDValue N2 = N->getOperand(2);
13269
13270   // If N is a constant we could fold this into a fallthrough or unconditional
13271   // branch. However that doesn't happen very often in normal code, because
13272   // Instcombine/SimplifyCFG should have handled the available opportunities.
13273   // If we did this folding here, it would be necessary to update the
13274   // MachineBasicBlock CFG, which is awkward.
13275
13276   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
13277   // on the target.
13278   if (N1.getOpcode() == ISD::SETCC &&
13279       TLI.isOperationLegalOrCustom(ISD::BR_CC,
13280                                    N1.getOperand(0).getValueType())) {
13281     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13282                        Chain, N1.getOperand(2),
13283                        N1.getOperand(0), N1.getOperand(1), N2);
13284   }
13285
13286   if (N1.hasOneUse()) {
13287     if (SDValue NewN1 = rebuildSetCC(N1))
13288       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
13289   }
13290
13291   return SDValue();
13292 }
13293
13294 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
13295   if (N.getOpcode() == ISD::SRL ||
13296       (N.getOpcode() == ISD::TRUNCATE &&
13297        (N.getOperand(0).hasOneUse() &&
13298         N.getOperand(0).getOpcode() == ISD::SRL))) {
13299     // Look pass the truncate.
13300     if (N.getOpcode() == ISD::TRUNCATE)
13301       N = N.getOperand(0);
13302
13303     // Match this pattern so that we can generate simpler code:
13304     //
13305     //   %a = ...
13306     //   %b = and i32 %a, 2
13307     //   %c = srl i32 %b, 1
13308     //   brcond i32 %c ...
13309     //
13310     // into
13311     //
13312     //   %a = ...
13313     //   %b = and i32 %a, 2
13314     //   %c = setcc eq %b, 0
13315     //   brcond %c ...
13316     //
13317     // This applies only when the AND constant value has one bit set and the
13318     // SRL constant is equal to the log2 of the AND constant. The back-end is
13319     // smart enough to convert the result into a TEST/JMP sequence.
13320     SDValue Op0 = N.getOperand(0);
13321     SDValue Op1 = N.getOperand(1);
13322
13323     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
13324       SDValue AndOp1 = Op0.getOperand(1);
13325
13326       if (AndOp1.getOpcode() == ISD::Constant) {
13327         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
13328
13329         if (AndConst.isPowerOf2() &&
13330             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
13331           SDLoc DL(N);
13332           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
13333                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
13334                               ISD::SETNE);
13335         }
13336       }
13337     }
13338   }
13339
13340   // Transform br(xor(x, y)) -> br(x != y)
13341   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
13342   if (N.getOpcode() == ISD::XOR) {
13343     // Because we may call this on a speculatively constructed
13344     // SimplifiedSetCC Node, we need to simplify this node first.
13345     // Ideally this should be folded into SimplifySetCC and not
13346     // here. For now, grab a handle to N so we don't lose it from
13347     // replacements interal to the visit.
13348     HandleSDNode XORHandle(N);
13349     while (N.getOpcode() == ISD::XOR) {
13350       SDValue Tmp = visitXOR(N.getNode());
13351       // No simplification done.
13352       if (!Tmp.getNode())
13353         break;
13354       // Returning N is form in-visit replacement that may invalidated
13355       // N. Grab value from Handle.
13356       if (Tmp.getNode() == N.getNode())
13357         N = XORHandle.getValue();
13358       else // Node simplified. Try simplifying again.
13359         N = Tmp;
13360     }
13361
13362     if (N.getOpcode() != ISD::XOR)
13363       return N;
13364
13365     SDNode *TheXor = N.getNode();
13366
13367     SDValue Op0 = TheXor->getOperand(0);
13368     SDValue Op1 = TheXor->getOperand(1);
13369
13370     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
13371       bool Equal = false;
13372       if (isOneConstant(Op0) && Op0.hasOneUse() &&
13373           Op0.getOpcode() == ISD::XOR) {
13374         TheXor = Op0.getNode();
13375         Equal = true;
13376       }
13377
13378       EVT SetCCVT = N.getValueType();
13379       if (LegalTypes)
13380         SetCCVT = getSetCCResultType(SetCCVT);
13381       // Replace the uses of XOR with SETCC
13382       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
13383                           Equal ? ISD::SETEQ : ISD::SETNE);
13384     }
13385   }
13386
13387   return SDValue();
13388 }
13389
13390 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
13391 //
13392 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
13393   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
13394   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
13395
13396   // If N is a constant we could fold this into a fallthrough or unconditional
13397   // branch. However that doesn't happen very often in normal code, because
13398   // Instcombine/SimplifyCFG should have handled the available opportunities.
13399   // If we did this folding here, it would be necessary to update the
13400   // MachineBasicBlock CFG, which is awkward.
13401
13402   // Use SimplifySetCC to simplify SETCC's.
13403   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
13404                                CondLHS, CondRHS, CC->get(), SDLoc(N),
13405                                false);
13406   if (Simp.getNode()) AddToWorklist(Simp.getNode());
13407
13408   // fold to a simpler setcc
13409   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
13410     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13411                        N->getOperand(0), Simp.getOperand(2),
13412                        Simp.getOperand(0), Simp.getOperand(1),
13413                        N->getOperand(4));
13414
13415   return SDValue();
13416 }
13417
13418 /// Return true if 'Use' is a load or a store that uses N as its base pointer
13419 /// and that N may be folded in the load / store addressing mode.
13420 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
13421                                     SelectionDAG &DAG,
13422                                     const TargetLowering &TLI) {
13423   EVT VT;
13424   unsigned AS;
13425
13426   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
13427     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13428       return false;
13429     VT = LD->getMemoryVT();
13430     AS = LD->getAddressSpace();
13431   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
13432     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13433       return false;
13434     VT = ST->getMemoryVT();
13435     AS = ST->getAddressSpace();
13436   } else
13437     return false;
13438
13439   TargetLowering::AddrMode AM;
13440   if (N->getOpcode() == ISD::ADD) {
13441     AM.HasBaseReg = true;
13442     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13443     if (Offset)
13444       // [reg +/- imm]
13445       AM.BaseOffs = Offset->getSExtValue();
13446     else
13447       // [reg +/- reg]
13448       AM.Scale = 1;
13449   } else if (N->getOpcode() == ISD::SUB) {
13450     AM.HasBaseReg = true;
13451     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13452     if (Offset)
13453       // [reg +/- imm]
13454       AM.BaseOffs = -Offset->getSExtValue();
13455     else
13456       // [reg +/- reg]
13457       AM.Scale = 1;
13458   } else
13459     return false;
13460
13461   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
13462                                    VT.getTypeForEVT(*DAG.getContext()), AS);
13463 }
13464
13465 /// Try turning a load/store into a pre-indexed load/store when the base
13466 /// pointer is an add or subtract and it has other uses besides the load/store.
13467 /// After the transformation, the new indexed load/store has effectively folded
13468 /// the add/subtract in and all of its other uses are redirected to the
13469 /// new load/store.
13470 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13471   if (Level < AfterLegalizeDAG)
13472     return false;
13473
13474   bool isLoad = true;
13475   SDValue Ptr;
13476   EVT VT;
13477   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13478     if (LD->isIndexed())
13479       return false;
13480     VT = LD->getMemoryVT();
13481     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
13482         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
13483       return false;
13484     Ptr = LD->getBasePtr();
13485   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13486     if (ST->isIndexed())
13487       return false;
13488     VT = ST->getMemoryVT();
13489     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
13490         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
13491       return false;
13492     Ptr = ST->getBasePtr();
13493     isLoad = false;
13494   } else {
13495     return false;
13496   }
13497
13498   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13499   // out.  There is no reason to make this a preinc/predec.
13500   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13501       Ptr.getNode()->hasOneUse())
13502     return false;
13503
13504   // Ask the target to do addressing mode selection.
13505   SDValue BasePtr;
13506   SDValue Offset;
13507   ISD::MemIndexedMode AM = ISD::UNINDEXED;
13508   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13509     return false;
13510
13511   // Backends without true r+i pre-indexed forms may need to pass a
13512   // constant base with a variable offset so that constant coercion
13513   // will work with the patterns in canonical form.
13514   bool Swapped = false;
13515   if (isa<ConstantSDNode>(BasePtr)) {
13516     std::swap(BasePtr, Offset);
13517     Swapped = true;
13518   }
13519
13520   // Don't create a indexed load / store with zero offset.
13521   if (isNullConstant(Offset))
13522     return false;
13523
13524   // Try turning it into a pre-indexed load / store except when:
13525   // 1) The new base ptr is a frame index.
13526   // 2) If N is a store and the new base ptr is either the same as or is a
13527   //    predecessor of the value being stored.
13528   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13529   //    that would create a cycle.
13530   // 4) All uses are load / store ops that use it as old base ptr.
13531
13532   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
13533   // (plus the implicit offset) to a register to preinc anyway.
13534   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13535     return false;
13536
13537   // Check #2.
13538   if (!isLoad) {
13539     SDValue Val = cast<StoreSDNode>(N)->getValue();
13540
13541     // Would require a copy.
13542     if (Val == BasePtr)
13543       return false;
13544
13545     // Would create a cycle.
13546     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13547       return false;
13548   }
13549
13550   // Caches for hasPredecessorHelper.
13551   SmallPtrSet<const SDNode *, 32> Visited;
13552   SmallVector<const SDNode *, 16> Worklist;
13553   Worklist.push_back(N);
13554
13555   // If the offset is a constant, there may be other adds of constants that
13556   // can be folded with this one. We should do this to avoid having to keep
13557   // a copy of the original base pointer.
13558   SmallVector<SDNode *, 16> OtherUses;
13559   if (isa<ConstantSDNode>(Offset))
13560     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13561                               UE = BasePtr.getNode()->use_end();
13562          UI != UE; ++UI) {
13563       SDUse &Use = UI.getUse();
13564       // Skip the use that is Ptr and uses of other results from BasePtr's
13565       // node (important for nodes that return multiple results).
13566       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13567         continue;
13568
13569       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13570         continue;
13571
13572       if (Use.getUser()->getOpcode() != ISD::ADD &&
13573           Use.getUser()->getOpcode() != ISD::SUB) {
13574         OtherUses.clear();
13575         break;
13576       }
13577
13578       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13579       if (!isa<ConstantSDNode>(Op1)) {
13580         OtherUses.clear();
13581         break;
13582       }
13583
13584       // FIXME: In some cases, we can be smarter about this.
13585       if (Op1.getValueType() != Offset.getValueType()) {
13586         OtherUses.clear();
13587         break;
13588       }
13589
13590       OtherUses.push_back(Use.getUser());
13591     }
13592
13593   if (Swapped)
13594     std::swap(BasePtr, Offset);
13595
13596   // Now check for #3 and #4.
13597   bool RealUse = false;
13598
13599   for (SDNode *Use : Ptr.getNode()->uses()) {
13600     if (Use == N)
13601       continue;
13602     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13603       return false;
13604
13605     // If Ptr may be folded in addressing mode of other use, then it's
13606     // not profitable to do this transformation.
13607     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13608       RealUse = true;
13609   }
13610
13611   if (!RealUse)
13612     return false;
13613
13614   SDValue Result;
13615   if (isLoad)
13616     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13617                                 BasePtr, Offset, AM);
13618   else
13619     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13620                                  BasePtr, Offset, AM);
13621   ++PreIndexedNodes;
13622   ++NodesCombined;
13623   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13624              Result.getNode()->dump(&DAG); dbgs() << '\n');
13625   WorklistRemover DeadNodes(*this);
13626   if (isLoad) {
13627     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13628     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13629   } else {
13630     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13631   }
13632
13633   // Finally, since the node is now dead, remove it from the graph.
13634   deleteAndRecombine(N);
13635
13636   if (Swapped)
13637     std::swap(BasePtr, Offset);
13638
13639   // Replace other uses of BasePtr that can be updated to use Ptr
13640   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
13641     unsigned OffsetIdx = 1;
13642     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13643       OffsetIdx = 0;
13644     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13645            BasePtr.getNode() && "Expected BasePtr operand");
13646
13647     // We need to replace ptr0 in the following expression:
13648     //   x0 * offset0 + y0 * ptr0 = t0
13649     // knowing that
13650     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13651     //
13652     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13653     // indexed load/store and the expression that needs to be re-written.
13654     //
13655     // Therefore, we have:
13656     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13657
13658     ConstantSDNode *CN =
13659       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13660     int X0, X1, Y0, Y1;
13661     const APInt &Offset0 = CN->getAPIntValue();
13662     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13663
13664     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
13665     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
13666     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
13667     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
13668
13669     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
13670
13671     APInt CNV = Offset0;
13672     if (X0 < 0) CNV = -CNV;
13673     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
13674     else CNV = CNV - Offset1;
13675
13676     SDLoc DL(OtherUses[i]);
13677
13678     // We can now generate the new expression.
13679     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
13680     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
13681
13682     SDValue NewUse = DAG.getNode(Opcode,
13683                                  DL,
13684                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
13685     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
13686     deleteAndRecombine(OtherUses[i]);
13687   }
13688
13689   // Replace the uses of Ptr with uses of the updated base value.
13690   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
13691   deleteAndRecombine(Ptr.getNode());
13692   AddToWorklist(Result.getNode());
13693
13694   return true;
13695 }
13696
13697 /// Try to combine a load/store with a add/sub of the base pointer node into a
13698 /// post-indexed load/store. The transformation folded the add/subtract into the
13699 /// new indexed load/store effectively and all of its uses are redirected to the
13700 /// new load/store.
13701 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
13702   if (Level < AfterLegalizeDAG)
13703     return false;
13704
13705   bool isLoad = true;
13706   SDValue Ptr;
13707   EVT VT;
13708   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13709     if (LD->isIndexed())
13710       return false;
13711     VT = LD->getMemoryVT();
13712     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13713         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
13714       return false;
13715     Ptr = LD->getBasePtr();
13716   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13717     if (ST->isIndexed())
13718       return false;
13719     VT = ST->getMemoryVT();
13720     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13721         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
13722       return false;
13723     Ptr = ST->getBasePtr();
13724     isLoad = false;
13725   } else {
13726     return false;
13727   }
13728
13729   if (Ptr.getNode()->hasOneUse())
13730     return false;
13731
13732   for (SDNode *Op : Ptr.getNode()->uses()) {
13733     if (Op == N ||
13734         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
13735       continue;
13736
13737     SDValue BasePtr;
13738     SDValue Offset;
13739     ISD::MemIndexedMode AM = ISD::UNINDEXED;
13740     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13741       // Don't create a indexed load / store with zero offset.
13742       if (isNullConstant(Offset))
13743         continue;
13744
13745       // Try turning it into a post-indexed load / store except when
13746       // 1) All uses are load / store ops that use it as base ptr (and
13747       //    it may be folded as addressing mmode).
13748       // 2) Op must be independent of N, i.e. Op is neither a predecessor
13749       //    nor a successor of N. Otherwise, if Op is folded that would
13750       //    create a cycle.
13751
13752       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13753         continue;
13754
13755       // Check for #1.
13756       bool TryNext = false;
13757       for (SDNode *Use : BasePtr.getNode()->uses()) {
13758         if (Use == Ptr.getNode())
13759           continue;
13760
13761         // If all the uses are load / store addresses, then don't do the
13762         // transformation.
13763         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13764           bool RealUse = false;
13765           for (SDNode *UseUse : Use->uses()) {
13766             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13767               RealUse = true;
13768           }
13769
13770           if (!RealUse) {
13771             TryNext = true;
13772             break;
13773           }
13774         }
13775       }
13776
13777       if (TryNext)
13778         continue;
13779
13780       // Check for #2.
13781       SmallPtrSet<const SDNode *, 32> Visited;
13782       SmallVector<const SDNode *, 8> Worklist;
13783       // Ptr is predecessor to both N and Op.
13784       Visited.insert(Ptr.getNode());
13785       Worklist.push_back(N);
13786       Worklist.push_back(Op);
13787       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13788           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13789         SDValue Result = isLoad
13790           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13791                                BasePtr, Offset, AM)
13792           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13793                                 BasePtr, Offset, AM);
13794         ++PostIndexedNodes;
13795         ++NodesCombined;
13796         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13797                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13798                    dbgs() << '\n');
13799         WorklistRemover DeadNodes(*this);
13800         if (isLoad) {
13801           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13802           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13803         } else {
13804           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13805         }
13806
13807         // Finally, since the node is now dead, remove it from the graph.
13808         deleteAndRecombine(N);
13809
13810         // Replace the uses of Use with uses of the updated base value.
13811         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13812                                       Result.getValue(isLoad ? 1 : 0));
13813         deleteAndRecombine(Op);
13814         return true;
13815       }
13816     }
13817   }
13818
13819   return false;
13820 }
13821
13822 /// Return the base-pointer arithmetic from an indexed \p LD.
13823 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13824   ISD::MemIndexedMode AM = LD->getAddressingMode();
13825   assert(AM != ISD::UNINDEXED);
13826   SDValue BP = LD->getOperand(1);
13827   SDValue Inc = LD->getOperand(2);
13828
13829   // Some backends use TargetConstants for load offsets, but don't expect
13830   // TargetConstants in general ADD nodes. We can convert these constants into
13831   // regular Constants (if the constant is not opaque).
13832   assert((Inc.getOpcode() != ISD::TargetConstant ||
13833           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13834          "Cannot split out indexing using opaque target constants");
13835   if (Inc.getOpcode() == ISD::TargetConstant) {
13836     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13837     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13838                           ConstInc->getValueType(0));
13839   }
13840
13841   unsigned Opc =
13842       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13843   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13844 }
13845
13846 static inline int numVectorEltsOrZero(EVT T) {
13847   return T.isVector() ? T.getVectorNumElements() : 0;
13848 }
13849
13850 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13851   Val = ST->getValue();
13852   EVT STType = Val.getValueType();
13853   EVT STMemType = ST->getMemoryVT();
13854   if (STType == STMemType)
13855     return true;
13856   if (isTypeLegal(STMemType))
13857     return false; // fail.
13858   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13859       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13860     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13861     return true;
13862   }
13863   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13864       STType.isInteger() && STMemType.isInteger()) {
13865     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13866     return true;
13867   }
13868   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13869     Val = DAG.getBitcast(STMemType, Val);
13870     return true;
13871   }
13872   return false; // fail.
13873 }
13874
13875 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
13876   EVT LDMemType = LD->getMemoryVT();
13877   EVT LDType = LD->getValueType(0);
13878   assert(Val.getValueType() == LDMemType &&
13879          "Attempting to extend value of non-matching type");
13880   if (LDType == LDMemType)
13881     return true;
13882   if (LDMemType.isInteger() && LDType.isInteger()) {
13883     switch (LD->getExtensionType()) {
13884     case ISD::NON_EXTLOAD:
13885       Val = DAG.getBitcast(LDType, Val);
13886       return true;
13887     case ISD::EXTLOAD:
13888       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
13889       return true;
13890     case ISD::SEXTLOAD:
13891       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
13892       return true;
13893     case ISD::ZEXTLOAD:
13894       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
13895       return true;
13896     }
13897   }
13898   return false;
13899 }
13900
13901 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
13902   if (OptLevel == CodeGenOpt::None || !LD->isSimple())
13903     return SDValue();
13904   SDValue Chain = LD->getOperand(0);
13905   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
13906   // TODO: Relax this restriction for unordered atomics (see D66309)
13907   if (!ST || !ST->isSimple())
13908     return SDValue();
13909
13910   EVT LDType = LD->getValueType(0);
13911   EVT LDMemType = LD->getMemoryVT();
13912   EVT STMemType = ST->getMemoryVT();
13913   EVT STType = ST->getValue().getValueType();
13914
13915   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
13916   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
13917   int64_t Offset;
13918   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
13919     return SDValue();
13920
13921   // Normalize for Endianness. After this Offset=0 will denote that the least
13922   // significant bit in the loaded value maps to the least significant bit in
13923   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
13924   // n:th least significant byte of the stored value.
13925   if (DAG.getDataLayout().isBigEndian())
13926     Offset = (STMemType.getStoreSizeInBits() -
13927               LDMemType.getStoreSizeInBits()) / 8 - Offset;
13928
13929   // Check that the stored value cover all bits that are loaded.
13930   bool STCoversLD =
13931       (Offset >= 0) &&
13932       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
13933
13934   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
13935     if (LD->isIndexed()) {
13936       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
13937                     LD->getAddressingMode() == ISD::POST_DEC);
13938       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
13939       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
13940                              LD->getOperand(1), LD->getOperand(2));
13941       SDValue Ops[] = {Val, Idx, Chain};
13942       return CombineTo(LD, Ops, 3);
13943     }
13944     return CombineTo(LD, Val, Chain);
13945   };
13946
13947   if (!STCoversLD)
13948     return SDValue();
13949
13950   // Memory as copy space (potentially masked).
13951   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
13952     // Simple case: Direct non-truncating forwarding
13953     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
13954       return ReplaceLd(LD, ST->getValue(), Chain);
13955     // Can we model the truncate and extension with an and mask?
13956     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
13957         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
13958       // Mask to size of LDMemType
13959       auto Mask =
13960           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
13961                                                STMemType.getSizeInBits()),
13962                           SDLoc(ST), STType);
13963       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
13964       return ReplaceLd(LD, Val, Chain);
13965     }
13966   }
13967
13968   // TODO: Deal with nonzero offset.
13969   if (LD->getBasePtr().isUndef() || Offset != 0)
13970     return SDValue();
13971   // Model necessary truncations / extenstions.
13972   SDValue Val;
13973   // Truncate Value To Stored Memory Size.
13974   do {
13975     if (!getTruncatedStoreValue(ST, Val))
13976       continue;
13977     if (!isTypeLegal(LDMemType))
13978       continue;
13979     if (STMemType != LDMemType) {
13980       // TODO: Support vectors? This requires extract_subvector/bitcast.
13981       if (!STMemType.isVector() && !LDMemType.isVector() &&
13982           STMemType.isInteger() && LDMemType.isInteger())
13983         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
13984       else
13985         continue;
13986     }
13987     if (!extendLoadedValueToExtension(LD, Val))
13988       continue;
13989     return ReplaceLd(LD, Val, Chain);
13990   } while (false);
13991
13992   // On failure, cleanup dead nodes we may have created.
13993   if (Val->use_empty())
13994     deleteAndRecombine(Val.getNode());
13995   return SDValue();
13996 }
13997
13998 SDValue DAGCombiner::visitLOAD(SDNode *N) {
13999   LoadSDNode *LD  = cast<LoadSDNode>(N);
14000   SDValue Chain = LD->getChain();
14001   SDValue Ptr   = LD->getBasePtr();
14002
14003   // If load is not volatile and there are no uses of the loaded value (and
14004   // the updated indexed value in case of indexed loads), change uses of the
14005   // chain value into uses of the chain input (i.e. delete the dead load).
14006   // TODO: Allow this for unordered atomics (see D66309)
14007   if (LD->isSimple()) {
14008     if (N->getValueType(1) == MVT::Other) {
14009       // Unindexed loads.
14010       if (!N->hasAnyUseOfValue(0)) {
14011         // It's not safe to use the two value CombineTo variant here. e.g.
14012         // v1, chain2 = load chain1, loc
14013         // v2, chain3 = load chain2, loc
14014         // v3         = add v2, c
14015         // Now we replace use of chain2 with chain1.  This makes the second load
14016         // isomorphic to the one we are deleting, and thus makes this load live.
14017         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14018                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14019                    dbgs() << "\n");
14020         WorklistRemover DeadNodes(*this);
14021         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14022         AddUsersToWorklist(Chain.getNode());
14023         if (N->use_empty())
14024           deleteAndRecombine(N);
14025
14026         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14027       }
14028     } else {
14029       // Indexed loads.
14030       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14031
14032       // If this load has an opaque TargetConstant offset, then we cannot split
14033       // the indexing into an add/sub directly (that TargetConstant may not be
14034       // valid for a different type of node, and we cannot convert an opaque
14035       // target constant into a regular constant).
14036       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
14037                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
14038
14039       if (!N->hasAnyUseOfValue(0) &&
14040           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
14041         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14042         SDValue Index;
14043         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
14044           Index = SplitIndexingFromLoad(LD);
14045           // Try to fold the base pointer arithmetic into subsequent loads and
14046           // stores.
14047           AddUsersToWorklist(N);
14048         } else
14049           Index = DAG.getUNDEF(N->getValueType(1));
14050         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14051                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14052                    dbgs() << " and 2 other values\n");
14053         WorklistRemover DeadNodes(*this);
14054         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14055         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14056         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14057         deleteAndRecombine(N);
14058         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14059       }
14060     }
14061   }
14062
14063   // If this load is directly stored, replace the load value with the stored
14064   // value.
14065   if (auto V = ForwardStoreValueToDirectLoad(LD))
14066     return V;
14067
14068   // Try to infer better alignment information than the load already has.
14069   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
14070     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14071       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
14072         SDValue NewLoad = DAG.getExtLoad(
14073             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14074             LD->getPointerInfo(), LD->getMemoryVT(), Align,
14075             LD->getMemOperand()->getFlags(), LD->getAAInfo());
14076         // NewLoad will always be N as we are only refining the alignment
14077         assert(NewLoad.getNode() == N);
14078         (void)NewLoad;
14079       }
14080     }
14081   }
14082
14083   if (LD->isUnindexed()) {
14084     // Walk up chain skipping non-aliasing memory nodes.
14085     SDValue BetterChain = FindBetterChain(LD, Chain);
14086
14087     // If there is a better chain.
14088     if (Chain != BetterChain) {
14089       SDValue ReplLoad;
14090
14091       // Replace the chain to void dependency.
14092       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14093         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14094                                BetterChain, Ptr, LD->getMemOperand());
14095       } else {
14096         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14097                                   LD->getValueType(0),
14098                                   BetterChain, Ptr, LD->getMemoryVT(),
14099                                   LD->getMemOperand());
14100       }
14101
14102       // Create token factor to keep old chain connected.
14103       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14104                                   MVT::Other, Chain, ReplLoad.getValue(1));
14105
14106       // Replace uses with load result and token factor
14107       return CombineTo(N, ReplLoad.getValue(0), Token);
14108     }
14109   }
14110
14111   // Try transforming N to an indexed load.
14112   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14113     return SDValue(N, 0);
14114
14115   // Try to slice up N to more direct loads if the slices are mapped to
14116   // different register banks or pairing can take place.
14117   if (SliceUpLoad(N))
14118     return SDValue(N, 0);
14119
14120   return SDValue();
14121 }
14122
14123 namespace {
14124
14125 /// Helper structure used to slice a load in smaller loads.
14126 /// Basically a slice is obtained from the following sequence:
14127 /// Origin = load Ty1, Base
14128 /// Shift = srl Ty1 Origin, CstTy Amount
14129 /// Inst = trunc Shift to Ty2
14130 ///
14131 /// Then, it will be rewritten into:
14132 /// Slice = load SliceTy, Base + SliceOffset
14133 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14134 ///
14135 /// SliceTy is deduced from the number of bits that are actually used to
14136 /// build Inst.
14137 struct LoadedSlice {
14138   /// Helper structure used to compute the cost of a slice.
14139   struct Cost {
14140     /// Are we optimizing for code size.
14141     bool ForCodeSize = false;
14142
14143     /// Various cost.
14144     unsigned Loads = 0;
14145     unsigned Truncates = 0;
14146     unsigned CrossRegisterBanksCopies = 0;
14147     unsigned ZExts = 0;
14148     unsigned Shift = 0;
14149
14150     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
14151
14152     /// Get the cost of one isolated slice.
14153     Cost(const LoadedSlice &LS, bool ForCodeSize)
14154         : ForCodeSize(ForCodeSize), Loads(1) {
14155       EVT TruncType = LS.Inst->getValueType(0);
14156       EVT LoadedType = LS.getLoadedType();
14157       if (TruncType != LoadedType &&
14158           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14159         ZExts = 1;
14160     }
14161
14162     /// Account for slicing gain in the current cost.
14163     /// Slicing provide a few gains like removing a shift or a
14164     /// truncate. This method allows to grow the cost of the original
14165     /// load with the gain from this slice.
14166     void addSliceGain(const LoadedSlice &LS) {
14167       // Each slice saves a truncate.
14168       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14169       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14170                               LS.Inst->getValueType(0)))
14171         ++Truncates;
14172       // If there is a shift amount, this slice gets rid of it.
14173       if (LS.Shift)
14174         ++Shift;
14175       // If this slice can merge a cross register bank copy, account for it.
14176       if (LS.canMergeExpensiveCrossRegisterBankCopy())
14177         ++CrossRegisterBanksCopies;
14178     }
14179
14180     Cost &operator+=(const Cost &RHS) {
14181       Loads += RHS.Loads;
14182       Truncates += RHS.Truncates;
14183       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
14184       ZExts += RHS.ZExts;
14185       Shift += RHS.Shift;
14186       return *this;
14187     }
14188
14189     bool operator==(const Cost &RHS) const {
14190       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
14191              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
14192              ZExts == RHS.ZExts && Shift == RHS.Shift;
14193     }
14194
14195     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
14196
14197     bool operator<(const Cost &RHS) const {
14198       // Assume cross register banks copies are as expensive as loads.
14199       // FIXME: Do we want some more target hooks?
14200       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
14201       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
14202       // Unless we are optimizing for code size, consider the
14203       // expensive operation first.
14204       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
14205         return ExpensiveOpsLHS < ExpensiveOpsRHS;
14206       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
14207              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
14208     }
14209
14210     bool operator>(const Cost &RHS) const { return RHS < *this; }
14211
14212     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
14213
14214     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
14215   };
14216
14217   // The last instruction that represent the slice. This should be a
14218   // truncate instruction.
14219   SDNode *Inst;
14220
14221   // The original load instruction.
14222   LoadSDNode *Origin;
14223
14224   // The right shift amount in bits from the original load.
14225   unsigned Shift;
14226
14227   // The DAG from which Origin came from.
14228   // This is used to get some contextual information about legal types, etc.
14229   SelectionDAG *DAG;
14230
14231   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
14232               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
14233       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
14234
14235   /// Get the bits used in a chunk of bits \p BitWidth large.
14236   /// \return Result is \p BitWidth and has used bits set to 1 and
14237   ///         not used bits set to 0.
14238   APInt getUsedBits() const {
14239     // Reproduce the trunc(lshr) sequence:
14240     // - Start from the truncated value.
14241     // - Zero extend to the desired bit width.
14242     // - Shift left.
14243     assert(Origin && "No original load to compare against.");
14244     unsigned BitWidth = Origin->getValueSizeInBits(0);
14245     assert(Inst && "This slice is not bound to an instruction");
14246     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
14247            "Extracted slice is bigger than the whole type!");
14248     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
14249     UsedBits.setAllBits();
14250     UsedBits = UsedBits.zext(BitWidth);
14251     UsedBits <<= Shift;
14252     return UsedBits;
14253   }
14254
14255   /// Get the size of the slice to be loaded in bytes.
14256   unsigned getLoadedSize() const {
14257     unsigned SliceSize = getUsedBits().countPopulation();
14258     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
14259     return SliceSize / 8;
14260   }
14261
14262   /// Get the type that will be loaded for this slice.
14263   /// Note: This may not be the final type for the slice.
14264   EVT getLoadedType() const {
14265     assert(DAG && "Missing context");
14266     LLVMContext &Ctxt = *DAG->getContext();
14267     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
14268   }
14269
14270   /// Get the alignment of the load used for this slice.
14271   unsigned getAlignment() const {
14272     unsigned Alignment = Origin->getAlignment();
14273     uint64_t Offset = getOffsetFromBase();
14274     if (Offset != 0)
14275       Alignment = MinAlign(Alignment, Alignment + Offset);
14276     return Alignment;
14277   }
14278
14279   /// Check if this slice can be rewritten with legal operations.
14280   bool isLegal() const {
14281     // An invalid slice is not legal.
14282     if (!Origin || !Inst || !DAG)
14283       return false;
14284
14285     // Offsets are for indexed load only, we do not handle that.
14286     if (!Origin->getOffset().isUndef())
14287       return false;
14288
14289     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14290
14291     // Check that the type is legal.
14292     EVT SliceType = getLoadedType();
14293     if (!TLI.isTypeLegal(SliceType))
14294       return false;
14295
14296     // Check that the load is legal for this type.
14297     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
14298       return false;
14299
14300     // Check that the offset can be computed.
14301     // 1. Check its type.
14302     EVT PtrType = Origin->getBasePtr().getValueType();
14303     if (PtrType == MVT::Untyped || PtrType.isExtended())
14304       return false;
14305
14306     // 2. Check that it fits in the immediate.
14307     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
14308       return false;
14309
14310     // 3. Check that the computation is legal.
14311     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
14312       return false;
14313
14314     // Check that the zext is legal if it needs one.
14315     EVT TruncateType = Inst->getValueType(0);
14316     if (TruncateType != SliceType &&
14317         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
14318       return false;
14319
14320     return true;
14321   }
14322
14323   /// Get the offset in bytes of this slice in the original chunk of
14324   /// bits.
14325   /// \pre DAG != nullptr.
14326   uint64_t getOffsetFromBase() const {
14327     assert(DAG && "Missing context.");
14328     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
14329     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
14330     uint64_t Offset = Shift / 8;
14331     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
14332     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
14333            "The size of the original loaded type is not a multiple of a"
14334            " byte.");
14335     // If Offset is bigger than TySizeInBytes, it means we are loading all
14336     // zeros. This should have been optimized before in the process.
14337     assert(TySizeInBytes > Offset &&
14338            "Invalid shift amount for given loaded size");
14339     if (IsBigEndian)
14340       Offset = TySizeInBytes - Offset - getLoadedSize();
14341     return Offset;
14342   }
14343
14344   /// Generate the sequence of instructions to load the slice
14345   /// represented by this object and redirect the uses of this slice to
14346   /// this new sequence of instructions.
14347   /// \pre this->Inst && this->Origin are valid Instructions and this
14348   /// object passed the legal check: LoadedSlice::isLegal returned true.
14349   /// \return The last instruction of the sequence used to load the slice.
14350   SDValue loadSlice() const {
14351     assert(Inst && Origin && "Unable to replace a non-existing slice.");
14352     const SDValue &OldBaseAddr = Origin->getBasePtr();
14353     SDValue BaseAddr = OldBaseAddr;
14354     // Get the offset in that chunk of bytes w.r.t. the endianness.
14355     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
14356     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
14357     if (Offset) {
14358       // BaseAddr = BaseAddr + Offset.
14359       EVT ArithType = BaseAddr.getValueType();
14360       SDLoc DL(Origin);
14361       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
14362                               DAG->getConstant(Offset, DL, ArithType));
14363     }
14364
14365     // Create the type of the loaded slice according to its size.
14366     EVT SliceType = getLoadedType();
14367
14368     // Create the load for the slice.
14369     SDValue LastInst =
14370         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
14371                      Origin->getPointerInfo().getWithOffset(Offset),
14372                      getAlignment(), Origin->getMemOperand()->getFlags());
14373     // If the final type is not the same as the loaded type, this means that
14374     // we have to pad with zero. Create a zero extend for that.
14375     EVT FinalType = Inst->getValueType(0);
14376     if (SliceType != FinalType)
14377       LastInst =
14378           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
14379     return LastInst;
14380   }
14381
14382   /// Check if this slice can be merged with an expensive cross register
14383   /// bank copy. E.g.,
14384   /// i = load i32
14385   /// f = bitcast i32 i to float
14386   bool canMergeExpensiveCrossRegisterBankCopy() const {
14387     if (!Inst || !Inst->hasOneUse())
14388       return false;
14389     SDNode *Use = *Inst->use_begin();
14390     if (Use->getOpcode() != ISD::BITCAST)
14391       return false;
14392     assert(DAG && "Missing context");
14393     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14394     EVT ResVT = Use->getValueType(0);
14395     const TargetRegisterClass *ResRC =
14396         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
14397     const TargetRegisterClass *ArgRC =
14398         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
14399                            Use->getOperand(0)->isDivergent());
14400     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
14401       return false;
14402
14403     // At this point, we know that we perform a cross-register-bank copy.
14404     // Check if it is expensive.
14405     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
14406     // Assume bitcasts are cheap, unless both register classes do not
14407     // explicitly share a common sub class.
14408     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
14409       return false;
14410
14411     // Check if it will be merged with the load.
14412     // 1. Check the alignment constraint.
14413     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
14414         ResVT.getTypeForEVT(*DAG->getContext()));
14415
14416     if (RequiredAlignment > getAlignment())
14417       return false;
14418
14419     // 2. Check that the load is a legal operation for that type.
14420     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
14421       return false;
14422
14423     // 3. Check that we do not have a zext in the way.
14424     if (Inst->getValueType(0) != getLoadedType())
14425       return false;
14426
14427     return true;
14428   }
14429 };
14430
14431 } // end anonymous namespace
14432
14433 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
14434 /// \p UsedBits looks like 0..0 1..1 0..0.
14435 static bool areUsedBitsDense(const APInt &UsedBits) {
14436   // If all the bits are one, this is dense!
14437   if (UsedBits.isAllOnesValue())
14438     return true;
14439
14440   // Get rid of the unused bits on the right.
14441   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
14442   // Get rid of the unused bits on the left.
14443   if (NarrowedUsedBits.countLeadingZeros())
14444     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
14445   // Check that the chunk of bits is completely used.
14446   return NarrowedUsedBits.isAllOnesValue();
14447 }
14448
14449 /// Check whether or not \p First and \p Second are next to each other
14450 /// in memory. This means that there is no hole between the bits loaded
14451 /// by \p First and the bits loaded by \p Second.
14452 static bool areSlicesNextToEachOther(const LoadedSlice &First,
14453                                      const LoadedSlice &Second) {
14454   assert(First.Origin == Second.Origin && First.Origin &&
14455          "Unable to match different memory origins.");
14456   APInt UsedBits = First.getUsedBits();
14457   assert((UsedBits & Second.getUsedBits()) == 0 &&
14458          "Slices are not supposed to overlap.");
14459   UsedBits |= Second.getUsedBits();
14460   return areUsedBitsDense(UsedBits);
14461 }
14462
14463 /// Adjust the \p GlobalLSCost according to the target
14464 /// paring capabilities and the layout of the slices.
14465 /// \pre \p GlobalLSCost should account for at least as many loads as
14466 /// there is in the slices in \p LoadedSlices.
14467 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14468                                  LoadedSlice::Cost &GlobalLSCost) {
14469   unsigned NumberOfSlices = LoadedSlices.size();
14470   // If there is less than 2 elements, no pairing is possible.
14471   if (NumberOfSlices < 2)
14472     return;
14473
14474   // Sort the slices so that elements that are likely to be next to each
14475   // other in memory are next to each other in the list.
14476   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14477     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14478     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14479   });
14480   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14481   // First (resp. Second) is the first (resp. Second) potentially candidate
14482   // to be placed in a paired load.
14483   const LoadedSlice *First = nullptr;
14484   const LoadedSlice *Second = nullptr;
14485   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14486                 // Set the beginning of the pair.
14487                                                            First = Second) {
14488     Second = &LoadedSlices[CurrSlice];
14489
14490     // If First is NULL, it means we start a new pair.
14491     // Get to the next slice.
14492     if (!First)
14493       continue;
14494
14495     EVT LoadedType = First->getLoadedType();
14496
14497     // If the types of the slices are different, we cannot pair them.
14498     if (LoadedType != Second->getLoadedType())
14499       continue;
14500
14501     // Check if the target supplies paired loads for this type.
14502     unsigned RequiredAlignment = 0;
14503     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14504       // move to the next pair, this type is hopeless.
14505       Second = nullptr;
14506       continue;
14507     }
14508     // Check if we meet the alignment requirement.
14509     if (RequiredAlignment > First->getAlignment())
14510       continue;
14511
14512     // Check that both loads are next to each other in memory.
14513     if (!areSlicesNextToEachOther(*First, *Second))
14514       continue;
14515
14516     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14517     --GlobalLSCost.Loads;
14518     // Move to the next pair.
14519     Second = nullptr;
14520   }
14521 }
14522
14523 /// Check the profitability of all involved LoadedSlice.
14524 /// Currently, it is considered profitable if there is exactly two
14525 /// involved slices (1) which are (2) next to each other in memory, and
14526 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14527 ///
14528 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
14529 /// the elements themselves.
14530 ///
14531 /// FIXME: When the cost model will be mature enough, we can relax
14532 /// constraints (1) and (2).
14533 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14534                                 const APInt &UsedBits, bool ForCodeSize) {
14535   unsigned NumberOfSlices = LoadedSlices.size();
14536   if (StressLoadSlicing)
14537     return NumberOfSlices > 1;
14538
14539   // Check (1).
14540   if (NumberOfSlices != 2)
14541     return false;
14542
14543   // Check (2).
14544   if (!areUsedBitsDense(UsedBits))
14545     return false;
14546
14547   // Check (3).
14548   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14549   // The original code has one big load.
14550   OrigCost.Loads = 1;
14551   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14552     const LoadedSlice &LS = LoadedSlices[CurrSlice];
14553     // Accumulate the cost of all the slices.
14554     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14555     GlobalSlicingCost += SliceCost;
14556
14557     // Account as cost in the original configuration the gain obtained
14558     // with the current slices.
14559     OrigCost.addSliceGain(LS);
14560   }
14561
14562   // If the target supports paired load, adjust the cost accordingly.
14563   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14564   return OrigCost > GlobalSlicingCost;
14565 }
14566
14567 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
14568 /// operations, split it in the various pieces being extracted.
14569 ///
14570 /// This sort of thing is introduced by SROA.
14571 /// This slicing takes care not to insert overlapping loads.
14572 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
14573 bool DAGCombiner::SliceUpLoad(SDNode *N) {
14574   if (Level < AfterLegalizeDAG)
14575     return false;
14576
14577   LoadSDNode *LD = cast<LoadSDNode>(N);
14578   if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
14579       !LD->getValueType(0).isInteger())
14580     return false;
14581
14582   // Keep track of already used bits to detect overlapping values.
14583   // In that case, we will just abort the transformation.
14584   APInt UsedBits(LD->getValueSizeInBits(0), 0);
14585
14586   SmallVector<LoadedSlice, 4> LoadedSlices;
14587
14588   // Check if this load is used as several smaller chunks of bits.
14589   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14590   // of computation for each trunc.
14591   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14592        UI != UIEnd; ++UI) {
14593     // Skip the uses of the chain.
14594     if (UI.getUse().getResNo() != 0)
14595       continue;
14596
14597     SDNode *User = *UI;
14598     unsigned Shift = 0;
14599
14600     // Check if this is a trunc(lshr).
14601     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
14602         isa<ConstantSDNode>(User->getOperand(1))) {
14603       Shift = User->getConstantOperandVal(1);
14604       User = *User->use_begin();
14605     }
14606
14607     // At this point, User is a Truncate, iff we encountered, trunc or
14608     // trunc(lshr).
14609     if (User->getOpcode() != ISD::TRUNCATE)
14610       return false;
14611
14612     // The width of the type must be a power of 2 and greater than 8-bits.
14613     // Otherwise the load cannot be represented in LLVM IR.
14614     // Moreover, if we shifted with a non-8-bits multiple, the slice
14615     // will be across several bytes. We do not support that.
14616     unsigned Width = User->getValueSizeInBits(0);
14617     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
14618       return false;
14619
14620     // Build the slice for this chain of computations.
14621     LoadedSlice LS(User, LD, Shift, &DAG);
14622     APInt CurrentUsedBits = LS.getUsedBits();
14623
14624     // Check if this slice overlaps with another.
14625     if ((CurrentUsedBits & UsedBits) != 0)
14626       return false;
14627     // Update the bits used globally.
14628     UsedBits |= CurrentUsedBits;
14629
14630     // Check if the new slice would be legal.
14631     if (!LS.isLegal())
14632       return false;
14633
14634     // Record the slice.
14635     LoadedSlices.push_back(LS);
14636   }
14637
14638   // Abort slicing if it does not seem to be profitable.
14639   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
14640     return false;
14641
14642   ++SlicedLoads;
14643
14644   // Rewrite each chain to use an independent load.
14645   // By construction, each chain can be represented by a unique load.
14646
14647   // Prepare the argument for the new token factor for all the slices.
14648   SmallVector<SDValue, 8> ArgChains;
14649   for (SmallVectorImpl<LoadedSlice>::const_iterator
14650            LSIt = LoadedSlices.begin(),
14651            LSItEnd = LoadedSlices.end();
14652        LSIt != LSItEnd; ++LSIt) {
14653     SDValue SliceInst = LSIt->loadSlice();
14654     CombineTo(LSIt->Inst, SliceInst, true);
14655     if (SliceInst.getOpcode() != ISD::LOAD)
14656       SliceInst = SliceInst.getOperand(0);
14657     assert(SliceInst->getOpcode() == ISD::LOAD &&
14658            "It takes more than a zext to get to the loaded slice!!");
14659     ArgChains.push_back(SliceInst.getValue(1));
14660   }
14661
14662   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
14663                               ArgChains);
14664   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14665   AddToWorklist(Chain.getNode());
14666   return true;
14667 }
14668
14669 /// Check to see if V is (and load (ptr), imm), where the load is having
14670 /// specific bytes cleared out.  If so, return the byte size being masked out
14671 /// and the shift amount.
14672 static std::pair<unsigned, unsigned>
14673 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
14674   std::pair<unsigned, unsigned> Result(0, 0);
14675
14676   // Check for the structure we're looking for.
14677   if (V->getOpcode() != ISD::AND ||
14678       !isa<ConstantSDNode>(V->getOperand(1)) ||
14679       !ISD::isNormalLoad(V->getOperand(0).getNode()))
14680     return Result;
14681
14682   // Check the chain and pointer.
14683   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14684   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
14685
14686   // This only handles simple types.
14687   if (V.getValueType() != MVT::i16 &&
14688       V.getValueType() != MVT::i32 &&
14689       V.getValueType() != MVT::i64)
14690     return Result;
14691
14692   // Check the constant mask.  Invert it so that the bits being masked out are
14693   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
14694   // follow the sign bit for uniformity.
14695   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
14696   unsigned NotMaskLZ = countLeadingZeros(NotMask);
14697   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
14698   unsigned NotMaskTZ = countTrailingZeros(NotMask);
14699   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
14700   if (NotMaskLZ == 64) return Result;  // All zero mask.
14701
14702   // See if we have a continuous run of bits.  If so, we have 0*1+0*
14703   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
14704     return Result;
14705
14706   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14707   if (V.getValueType() != MVT::i64 && NotMaskLZ)
14708     NotMaskLZ -= 64-V.getValueSizeInBits();
14709
14710   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14711   switch (MaskedBytes) {
14712   case 1:
14713   case 2:
14714   case 4: break;
14715   default: return Result; // All one mask, or 5-byte mask.
14716   }
14717
14718   // Verify that the first bit starts at a multiple of mask so that the access
14719   // is aligned the same as the access width.
14720   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
14721
14722   // For narrowing to be valid, it must be the case that the load the
14723   // immediately preceding memory operation before the store.
14724   if (LD == Chain.getNode())
14725     ; // ok.
14726   else if (Chain->getOpcode() == ISD::TokenFactor &&
14727            SDValue(LD, 1).hasOneUse()) {
14728     // LD has only 1 chain use so they are no indirect dependencies.
14729     if (!LD->isOperandOf(Chain.getNode()))
14730       return Result;
14731   } else
14732     return Result; // Fail.
14733
14734   Result.first = MaskedBytes;
14735   Result.second = NotMaskTZ/8;
14736   return Result;
14737 }
14738
14739 /// Check to see if IVal is something that provides a value as specified by
14740 /// MaskInfo. If so, replace the specified store with a narrower store of
14741 /// truncated IVal.
14742 static SDValue
14743 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14744                                 SDValue IVal, StoreSDNode *St,
14745                                 DAGCombiner *DC) {
14746   unsigned NumBytes = MaskInfo.first;
14747   unsigned ByteShift = MaskInfo.second;
14748   SelectionDAG &DAG = DC->getDAG();
14749
14750   // Check to see if IVal is all zeros in the part being masked in by the 'or'
14751   // that uses this.  If not, this is not a replacement.
14752   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14753                                   ByteShift*8, (ByteShift+NumBytes)*8);
14754   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
14755
14756   // Check that it is legal on the target to do this.  It is legal if the new
14757   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14758   // legalization (and the target doesn't explicitly think this is a bad idea).
14759   MVT VT = MVT::getIntegerVT(NumBytes * 8);
14760   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14761   if (!DC->isTypeLegal(VT))
14762     return SDValue();
14763   if (St->getMemOperand() &&
14764       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
14765                               *St->getMemOperand()))
14766     return SDValue();
14767
14768   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
14769   // shifted by ByteShift and truncated down to NumBytes.
14770   if (ByteShift) {
14771     SDLoc DL(IVal);
14772     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14773                        DAG.getConstant(ByteShift*8, DL,
14774                                     DC->getShiftAmountTy(IVal.getValueType())));
14775   }
14776
14777   // Figure out the offset for the store and the alignment of the access.
14778   unsigned StOffset;
14779   unsigned NewAlign = St->getAlignment();
14780
14781   if (DAG.getDataLayout().isLittleEndian())
14782     StOffset = ByteShift;
14783   else
14784     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14785
14786   SDValue Ptr = St->getBasePtr();
14787   if (StOffset) {
14788     SDLoc DL(IVal);
14789     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14790                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14791     NewAlign = MinAlign(NewAlign, StOffset);
14792   }
14793
14794   // Truncate down to the new size.
14795   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14796
14797   ++OpsNarrowed;
14798   return DAG
14799       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14800                 St->getPointerInfo().getWithOffset(StOffset), NewAlign);
14801 }
14802
14803 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14804 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14805 /// narrowing the load and store if it would end up being a win for performance
14806 /// or code size.
14807 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14808   StoreSDNode *ST  = cast<StoreSDNode>(N);
14809   if (!ST->isSimple())
14810     return SDValue();
14811
14812   SDValue Chain = ST->getChain();
14813   SDValue Value = ST->getValue();
14814   SDValue Ptr   = ST->getBasePtr();
14815   EVT VT = Value.getValueType();
14816
14817   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14818     return SDValue();
14819
14820   unsigned Opc = Value.getOpcode();
14821
14822   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14823   // is a byte mask indicating a consecutive number of bytes, check to see if
14824   // Y is known to provide just those bytes.  If so, we try to replace the
14825   // load + replace + store sequence with a single (narrower) store, which makes
14826   // the load dead.
14827   if (Opc == ISD::OR) {
14828     std::pair<unsigned, unsigned> MaskedLoad;
14829     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14830     if (MaskedLoad.first)
14831       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14832                                                   Value.getOperand(1), ST,this))
14833         return NewST;
14834
14835     // Or is commutative, so try swapping X and Y.
14836     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14837     if (MaskedLoad.first)
14838       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14839                                                   Value.getOperand(0), ST,this))
14840         return NewST;
14841   }
14842
14843   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14844       Value.getOperand(1).getOpcode() != ISD::Constant)
14845     return SDValue();
14846
14847   SDValue N0 = Value.getOperand(0);
14848   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14849       Chain == SDValue(N0.getNode(), 1)) {
14850     LoadSDNode *LD = cast<LoadSDNode>(N0);
14851     if (LD->getBasePtr() != Ptr ||
14852         LD->getPointerInfo().getAddrSpace() !=
14853         ST->getPointerInfo().getAddrSpace())
14854       return SDValue();
14855
14856     // Find the type to narrow it the load / op / store to.
14857     SDValue N1 = Value.getOperand(1);
14858     unsigned BitWidth = N1.getValueSizeInBits();
14859     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14860     if (Opc == ISD::AND)
14861       Imm ^= APInt::getAllOnesValue(BitWidth);
14862     if (Imm == 0 || Imm.isAllOnesValue())
14863       return SDValue();
14864     unsigned ShAmt = Imm.countTrailingZeros();
14865     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14866     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14867     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14868     // The narrowing should be profitable, the load/store operation should be
14869     // legal (or custom) and the store size should be equal to the NewVT width.
14870     while (NewBW < BitWidth &&
14871            (NewVT.getStoreSizeInBits() != NewBW ||
14872             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14873             !TLI.isNarrowingProfitable(VT, NewVT))) {
14874       NewBW = NextPowerOf2(NewBW);
14875       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14876     }
14877     if (NewBW >= BitWidth)
14878       return SDValue();
14879
14880     // If the lsb changed does not start at the type bitwidth boundary,
14881     // start at the previous one.
14882     if (ShAmt % NewBW)
14883       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
14884     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
14885                                    std::min(BitWidth, ShAmt + NewBW));
14886     if ((Imm & Mask) == Imm) {
14887       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
14888       if (Opc == ISD::AND)
14889         NewImm ^= APInt::getAllOnesValue(NewBW);
14890       uint64_t PtrOff = ShAmt / 8;
14891       // For big endian targets, we need to adjust the offset to the pointer to
14892       // load the correct bytes.
14893       if (DAG.getDataLayout().isBigEndian())
14894         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
14895
14896       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
14897       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
14898       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
14899         return SDValue();
14900
14901       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
14902                                    Ptr.getValueType(), Ptr,
14903                                    DAG.getConstant(PtrOff, SDLoc(LD),
14904                                                    Ptr.getValueType()));
14905       SDValue NewLD =
14906           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
14907                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
14908                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
14909       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
14910                                    DAG.getConstant(NewImm, SDLoc(Value),
14911                                                    NewVT));
14912       SDValue NewST =
14913           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
14914                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
14915
14916       AddToWorklist(NewPtr.getNode());
14917       AddToWorklist(NewLD.getNode());
14918       AddToWorklist(NewVal.getNode());
14919       WorklistRemover DeadNodes(*this);
14920       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
14921       ++OpsNarrowed;
14922       return NewST;
14923     }
14924   }
14925
14926   return SDValue();
14927 }
14928
14929 /// For a given floating point load / store pair, if the load value isn't used
14930 /// by any other operations, then consider transforming the pair to integer
14931 /// load / store operations if the target deems the transformation profitable.
14932 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
14933   StoreSDNode *ST  = cast<StoreSDNode>(N);
14934   SDValue Value = ST->getValue();
14935   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
14936       Value.hasOneUse()) {
14937     LoadSDNode *LD = cast<LoadSDNode>(Value);
14938     EVT VT = LD->getMemoryVT();
14939     if (!VT.isFloatingPoint() ||
14940         VT != ST->getMemoryVT() ||
14941         LD->isNonTemporal() ||
14942         ST->isNonTemporal() ||
14943         LD->getPointerInfo().getAddrSpace() != 0 ||
14944         ST->getPointerInfo().getAddrSpace() != 0)
14945       return SDValue();
14946
14947     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
14948     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
14949         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
14950         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
14951         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
14952       return SDValue();
14953
14954     unsigned LDAlign = LD->getAlignment();
14955     unsigned STAlign = ST->getAlignment();
14956     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
14957     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
14958     if (LDAlign < ABIAlign || STAlign < ABIAlign)
14959       return SDValue();
14960
14961     SDValue NewLD =
14962         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
14963                     LD->getPointerInfo(), LDAlign);
14964
14965     SDValue NewST =
14966         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
14967                      ST->getPointerInfo(), STAlign);
14968
14969     AddToWorklist(NewLD.getNode());
14970     AddToWorklist(NewST.getNode());
14971     WorklistRemover DeadNodes(*this);
14972     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
14973     ++LdStFP2Int;
14974     return NewST;
14975   }
14976
14977   return SDValue();
14978 }
14979
14980 // This is a helper function for visitMUL to check the profitability
14981 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
14982 // MulNode is the original multiply, AddNode is (add x, c1),
14983 // and ConstNode is c2.
14984 //
14985 // If the (add x, c1) has multiple uses, we could increase
14986 // the number of adds if we make this transformation.
14987 // It would only be worth doing this if we can remove a
14988 // multiply in the process. Check for that here.
14989 // To illustrate:
14990 //     (A + c1) * c3
14991 //     (A + c2) * c3
14992 // We're checking for cases where we have common "c3 * A" expressions.
14993 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
14994                                               SDValue &AddNode,
14995                                               SDValue &ConstNode) {
14996   APInt Val;
14997
14998   // If the add only has one use, this would be OK to do.
14999   if (AddNode.getNode()->hasOneUse())
15000     return true;
15001
15002   // Walk all the users of the constant with which we're multiplying.
15003   for (SDNode *Use : ConstNode->uses()) {
15004     if (Use == MulNode) // This use is the one we're on right now. Skip it.
15005       continue;
15006
15007     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15008       SDNode *OtherOp;
15009       SDNode *MulVar = AddNode.getOperand(0).getNode();
15010
15011       // OtherOp is what we're multiplying against the constant.
15012       if (Use->getOperand(0) == ConstNode)
15013         OtherOp = Use->getOperand(1).getNode();
15014       else
15015         OtherOp = Use->getOperand(0).getNode();
15016
15017       // Check to see if multiply is with the same operand of our "add".
15018       //
15019       //     ConstNode  = CONST
15020       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
15021       //     ...
15022       //     AddNode  = (A + c1)  <-- MulVar is A.
15023       //         = AddNode * ConstNode   <-- current visiting instruction.
15024       //
15025       // If we make this transformation, we will have a common
15026       // multiply (ConstNode * A) that we can save.
15027       if (OtherOp == MulVar)
15028         return true;
15029
15030       // Now check to see if a future expansion will give us a common
15031       // multiply.
15032       //
15033       //     ConstNode  = CONST
15034       //     AddNode    = (A + c1)
15035       //     ...   = AddNode * ConstNode <-- current visiting instruction.
15036       //     ...
15037       //     OtherOp = (A + c2)
15038       //     Use     = OtherOp * ConstNode <-- visiting Use.
15039       //
15040       // If we make this transformation, we will have a common
15041       // multiply (CONST * A) after we also do the same transformation
15042       // to the "t2" instruction.
15043       if (OtherOp->getOpcode() == ISD::ADD &&
15044           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
15045           OtherOp->getOperand(0).getNode() == MulVar)
15046         return true;
15047     }
15048   }
15049
15050   // Didn't find a case where this would be profitable.
15051   return false;
15052 }
15053
15054 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15055                                          unsigned NumStores) {
15056   SmallVector<SDValue, 8> Chains;
15057   SmallPtrSet<const SDNode *, 8> Visited;
15058   SDLoc StoreDL(StoreNodes[0].MemNode);
15059
15060   for (unsigned i = 0; i < NumStores; ++i) {
15061     Visited.insert(StoreNodes[i].MemNode);
15062   }
15063
15064   // don't include nodes that are children or repeated nodes.
15065   for (unsigned i = 0; i < NumStores; ++i) {
15066     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15067       Chains.push_back(StoreNodes[i].MemNode->getChain());
15068   }
15069
15070   assert(Chains.size() > 0 && "Chain should have generated a chain");
15071   return DAG.getTokenFactor(StoreDL, Chains);
15072 }
15073
15074 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
15075     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15076     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15077   // Make sure we have something to merge.
15078   if (NumStores < 2)
15079     return false;
15080
15081   // The latest Node in the DAG.
15082   SDLoc DL(StoreNodes[0].MemNode);
15083
15084   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
15085   unsigned SizeInBits = NumStores * ElementSizeBits;
15086   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15087
15088   EVT StoreTy;
15089   if (UseVector) {
15090     unsigned Elts = NumStores * NumMemElts;
15091     // Get the type for the merged vector store.
15092     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15093   } else
15094     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15095
15096   SDValue StoredVal;
15097   if (UseVector) {
15098     if (IsConstantSrc) {
15099       SmallVector<SDValue, 8> BuildVector;
15100       for (unsigned I = 0; I != NumStores; ++I) {
15101         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15102         SDValue Val = St->getValue();
15103         // If constant is of the wrong type, convert it now.
15104         if (MemVT != Val.getValueType()) {
15105           Val = peekThroughBitcasts(Val);
15106           // Deal with constants of wrong size.
15107           if (ElementSizeBits != Val.getValueSizeInBits()) {
15108             EVT IntMemVT =
15109                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15110             if (isa<ConstantFPSDNode>(Val)) {
15111               // Not clear how to truncate FP values.
15112               return false;
15113             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15114               Val = DAG.getConstant(C->getAPIntValue()
15115                                         .zextOrTrunc(Val.getValueSizeInBits())
15116                                         .zextOrTrunc(ElementSizeBits),
15117                                     SDLoc(C), IntMemVT);
15118           }
15119           // Make sure correctly size type is the correct type.
15120           Val = DAG.getBitcast(MemVT, Val);
15121         }
15122         BuildVector.push_back(Val);
15123       }
15124       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15125                                                : ISD::BUILD_VECTOR,
15126                               DL, StoreTy, BuildVector);
15127     } else {
15128       SmallVector<SDValue, 8> Ops;
15129       for (unsigned i = 0; i < NumStores; ++i) {
15130         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15131         SDValue Val = peekThroughBitcasts(St->getValue());
15132         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15133         // type MemVT. If the underlying value is not the correct
15134         // type, but it is an extraction of an appropriate vector we
15135         // can recast Val to be of the correct type. This may require
15136         // converting between EXTRACT_VECTOR_ELT and
15137         // EXTRACT_SUBVECTOR.
15138         if ((MemVT != Val.getValueType()) &&
15139             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15140              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15141           EVT MemVTScalarTy = MemVT.getScalarType();
15142           // We may need to add a bitcast here to get types to line up.
15143           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15144             Val = DAG.getBitcast(MemVT, Val);
15145           } else {
15146             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15147                                             : ISD::EXTRACT_VECTOR_ELT;
15148             SDValue Vec = Val.getOperand(0);
15149             SDValue Idx = Val.getOperand(1);
15150             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15151           }
15152         }
15153         Ops.push_back(Val);
15154       }
15155
15156       // Build the extracted vector elements back into a vector.
15157       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15158                                                : ISD::BUILD_VECTOR,
15159                               DL, StoreTy, Ops);
15160     }
15161   } else {
15162     // We should always use a vector store when merging extracted vector
15163     // elements, so this path implies a store of constants.
15164     assert(IsConstantSrc && "Merged vector elements should use vector store");
15165
15166     APInt StoreInt(SizeInBits, 0);
15167
15168     // Construct a single integer constant which is made of the smaller
15169     // constant inputs.
15170     bool IsLE = DAG.getDataLayout().isLittleEndian();
15171     for (unsigned i = 0; i < NumStores; ++i) {
15172       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
15173       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
15174
15175       SDValue Val = St->getValue();
15176       Val = peekThroughBitcasts(Val);
15177       StoreInt <<= ElementSizeBits;
15178       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
15179         StoreInt |= C->getAPIntValue()
15180                         .zextOrTrunc(ElementSizeBits)
15181                         .zextOrTrunc(SizeInBits);
15182       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
15183         StoreInt |= C->getValueAPF()
15184                         .bitcastToAPInt()
15185                         .zextOrTrunc(ElementSizeBits)
15186                         .zextOrTrunc(SizeInBits);
15187         // If fp truncation is necessary give up for now.
15188         if (MemVT.getSizeInBits() != ElementSizeBits)
15189           return false;
15190       } else {
15191         llvm_unreachable("Invalid constant element type");
15192       }
15193     }
15194
15195     // Create the new Load and Store operations.
15196     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
15197   }
15198
15199   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15200   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
15201
15202   // make sure we use trunc store if it's necessary to be legal.
15203   SDValue NewStore;
15204   if (!UseTrunc) {
15205     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
15206                             FirstInChain->getPointerInfo(),
15207                             FirstInChain->getAlignment());
15208   } else { // Must be realized as a trunc store
15209     EVT LegalizedStoredValTy =
15210         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
15211     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
15212     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
15213     SDValue ExtendedStoreVal =
15214         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
15215                         LegalizedStoredValTy);
15216     NewStore = DAG.getTruncStore(
15217         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
15218         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
15219         FirstInChain->getAlignment(),
15220         FirstInChain->getMemOperand()->getFlags());
15221   }
15222
15223   // Replace all merged stores with the new store.
15224   for (unsigned i = 0; i < NumStores; ++i)
15225     CombineTo(StoreNodes[i].MemNode, NewStore);
15226
15227   AddToWorklist(NewChain.getNode());
15228   return true;
15229 }
15230
15231 void DAGCombiner::getStoreMergeCandidates(
15232     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
15233     SDNode *&RootNode) {
15234   // This holds the base pointer, index, and the offset in bytes from the base
15235   // pointer.
15236   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
15237   EVT MemVT = St->getMemoryVT();
15238
15239   SDValue Val = peekThroughBitcasts(St->getValue());
15240   // We must have a base and an offset.
15241   if (!BasePtr.getBase().getNode())
15242     return;
15243
15244   // Do not handle stores to undef base pointers.
15245   if (BasePtr.getBase().isUndef())
15246     return;
15247
15248   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
15249   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15250                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15251   bool IsLoadSrc = isa<LoadSDNode>(Val);
15252   BaseIndexOffset LBasePtr;
15253   // Match on loadbaseptr if relevant.
15254   EVT LoadVT;
15255   if (IsLoadSrc) {
15256     auto *Ld = cast<LoadSDNode>(Val);
15257     LBasePtr = BaseIndexOffset::match(Ld, DAG);
15258     LoadVT = Ld->getMemoryVT();
15259     // Load and store should be the same type.
15260     if (MemVT != LoadVT)
15261       return;
15262     // Loads must only have one use.
15263     if (!Ld->hasNUsesOfValue(1, 0))
15264       return;
15265     // The memory operands must not be volatile/indexed/atomic.
15266     // TODO: May be able to relax for unordered atomics (see D66309)
15267     if (!Ld->isSimple() || Ld->isIndexed())
15268       return;
15269   }
15270   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
15271                             int64_t &Offset) -> bool {
15272     // The memory operands must not be volatile/indexed/atomic.
15273     // TODO: May be able to relax for unordered atomics (see D66309)
15274     if (!Other->isSimple() ||  Other->isIndexed())
15275       return false;
15276     // Don't mix temporal stores with non-temporal stores.
15277     if (St->isNonTemporal() != Other->isNonTemporal())
15278       return false;
15279     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
15280     // Allow merging constants of different types as integers.
15281     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
15282                                            : Other->getMemoryVT() != MemVT;
15283     if (IsLoadSrc) {
15284       if (NoTypeMatch)
15285         return false;
15286       // The Load's Base Ptr must also match
15287       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
15288         BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
15289         if (LoadVT != OtherLd->getMemoryVT())
15290           return false;
15291         // Loads must only have one use.
15292         if (!OtherLd->hasNUsesOfValue(1, 0))
15293           return false;
15294         // The memory operands must not be volatile/indexed/atomic.
15295         // TODO: May be able to relax for unordered atomics (see D66309)
15296         if (!OtherLd->isSimple() ||
15297             OtherLd->isIndexed())
15298           return false;
15299         // Don't mix temporal loads with non-temporal loads.
15300         if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
15301           return false;
15302         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
15303           return false;
15304       } else
15305         return false;
15306     }
15307     if (IsConstantSrc) {
15308       if (NoTypeMatch)
15309         return false;
15310       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
15311         return false;
15312     }
15313     if (IsExtractVecSrc) {
15314       // Do not merge truncated stores here.
15315       if (Other->isTruncatingStore())
15316         return false;
15317       if (!MemVT.bitsEq(OtherBC.getValueType()))
15318         return false;
15319       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
15320           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15321         return false;
15322     }
15323     Ptr = BaseIndexOffset::match(Other, DAG);
15324     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
15325   };
15326
15327   // Check if the pair of StoreNode and the RootNode already bail out many
15328   // times which is over the limit in dependence check.
15329   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
15330                                         SDNode *RootNode) -> bool {
15331     auto RootCount = StoreRootCountMap.find(StoreNode);
15332     if (RootCount != StoreRootCountMap.end() &&
15333         RootCount->second.first == RootNode &&
15334         RootCount->second.second > StoreMergeDependenceLimit)
15335       return true;
15336     return false;
15337   };
15338
15339   // We looking for a root node which is an ancestor to all mergable
15340   // stores. We search up through a load, to our root and then down
15341   // through all children. For instance we will find Store{1,2,3} if
15342   // St is Store1, Store2. or Store3 where the root is not a load
15343   // which always true for nonvolatile ops. TODO: Expand
15344   // the search to find all valid candidates through multiple layers of loads.
15345   //
15346   // Root
15347   // |-------|-------|
15348   // Load    Load    Store3
15349   // |       |
15350   // Store1   Store2
15351   //
15352   // FIXME: We should be able to climb and
15353   // descend TokenFactors to find candidates as well.
15354
15355   RootNode = St->getChain().getNode();
15356
15357   unsigned NumNodesExplored = 0;
15358   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
15359     RootNode = Ldn->getChain().getNode();
15360     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15361          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15362       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
15363         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
15364           if (I2.getOperandNo() == 0)
15365             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
15366               BaseIndexOffset Ptr;
15367               int64_t PtrDiff;
15368               if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15369                   !OverLimitInDependenceCheck(OtherST, RootNode))
15370                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15371             }
15372   } else
15373     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15374          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15375       if (I.getOperandNo() == 0)
15376         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
15377           BaseIndexOffset Ptr;
15378           int64_t PtrDiff;
15379           if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15380               !OverLimitInDependenceCheck(OtherST, RootNode))
15381             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15382         }
15383 }
15384
15385 // We need to check that merging these stores does not cause a loop in
15386 // the DAG. Any store candidate may depend on another candidate
15387 // indirectly through its operand (we already consider dependencies
15388 // through the chain). Check in parallel by searching up from
15389 // non-chain operands of candidates.
15390 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
15391     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
15392     SDNode *RootNode) {
15393   // FIXME: We should be able to truncate a full search of
15394   // predecessors by doing a BFS and keeping tabs the originating
15395   // stores from which worklist nodes come from in a similar way to
15396   // TokenFactor simplfication.
15397
15398   SmallPtrSet<const SDNode *, 32> Visited;
15399   SmallVector<const SDNode *, 8> Worklist;
15400
15401   // RootNode is a predecessor to all candidates so we need not search
15402   // past it. Add RootNode (peeking through TokenFactors). Do not count
15403   // these towards size check.
15404
15405   Worklist.push_back(RootNode);
15406   while (!Worklist.empty()) {
15407     auto N = Worklist.pop_back_val();
15408     if (!Visited.insert(N).second)
15409       continue; // Already present in Visited.
15410     if (N->getOpcode() == ISD::TokenFactor) {
15411       for (SDValue Op : N->ops())
15412         Worklist.push_back(Op.getNode());
15413     }
15414   }
15415
15416   // Don't count pruning nodes towards max.
15417   unsigned int Max = 1024 + Visited.size();
15418   // Search Ops of store candidates.
15419   for (unsigned i = 0; i < NumStores; ++i) {
15420     SDNode *N = StoreNodes[i].MemNode;
15421     // Of the 4 Store Operands:
15422     //   * Chain (Op 0) -> We have already considered these
15423     //                    in candidate selection and can be
15424     //                    safely ignored
15425     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
15426     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
15427     //                       but aren't necessarily fromt the same base node, so
15428     //                       cycles possible (e.g. via indexed store).
15429     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
15430     //               non-indexed stores). Not constant on all targets (e.g. ARM)
15431     //               and so can participate in a cycle.
15432     for (unsigned j = 1; j < N->getNumOperands(); ++j)
15433       Worklist.push_back(N->getOperand(j).getNode());
15434   }
15435   // Search through DAG. We can stop early if we find a store node.
15436   for (unsigned i = 0; i < NumStores; ++i)
15437     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
15438                                      Max)) {
15439       // If the searching bail out, record the StoreNode and RootNode in the
15440       // StoreRootCountMap. If we have seen the pair many times over a limit,
15441       // we won't add the StoreNode into StoreNodes set again.
15442       if (Visited.size() >= Max) {
15443         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
15444         if (RootCount.first == RootNode)
15445           RootCount.second++;
15446         else
15447           RootCount = {RootNode, 1};
15448       }
15449       return false;
15450     }
15451   return true;
15452 }
15453
15454 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
15455   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
15456     return false;
15457
15458   EVT MemVT = St->getMemoryVT();
15459   int64_t ElementSizeBytes = MemVT.getStoreSize();
15460   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15461
15462   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
15463     return false;
15464
15465   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
15466       Attribute::NoImplicitFloat);
15467
15468   // This function cannot currently deal with non-byte-sized memory sizes.
15469   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
15470     return false;
15471
15472   if (!MemVT.isSimple())
15473     return false;
15474
15475   // Perform an early exit check. Do not bother looking at stored values that
15476   // are not constants, loads, or extracted vector elements.
15477   SDValue StoredVal = peekThroughBitcasts(St->getValue());
15478   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
15479   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
15480                        isa<ConstantFPSDNode>(StoredVal);
15481   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15482                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15483   bool IsNonTemporalStore = St->isNonTemporal();
15484   bool IsNonTemporalLoad =
15485       IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
15486
15487   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
15488     return false;
15489
15490   SmallVector<MemOpLink, 8> StoreNodes;
15491   SDNode *RootNode;
15492   // Find potential store merge candidates by searching through chain sub-DAG
15493   getStoreMergeCandidates(St, StoreNodes, RootNode);
15494
15495   // Check if there is anything to merge.
15496   if (StoreNodes.size() < 2)
15497     return false;
15498
15499   // Sort the memory operands according to their distance from the
15500   // base pointer.
15501   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
15502     return LHS.OffsetFromBase < RHS.OffsetFromBase;
15503   });
15504
15505   // Store Merge attempts to merge the lowest stores. This generally
15506   // works out as if successful, as the remaining stores are checked
15507   // after the first collection of stores is merged. However, in the
15508   // case that a non-mergeable store is found first, e.g., {p[-2],
15509   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15510   // mergeable cases. To prevent this, we prune such stores from the
15511   // front of StoreNodes here.
15512
15513   bool RV = false;
15514   while (StoreNodes.size() > 1) {
15515     size_t StartIdx = 0;
15516     while ((StartIdx + 1 < StoreNodes.size()) &&
15517            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15518                StoreNodes[StartIdx + 1].OffsetFromBase)
15519       ++StartIdx;
15520
15521     // Bail if we don't have enough candidates to merge.
15522     if (StartIdx + 1 >= StoreNodes.size())
15523       return RV;
15524
15525     if (StartIdx)
15526       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15527
15528     // Scan the memory operations on the chain and find the first
15529     // non-consecutive store memory address.
15530     unsigned NumConsecutiveStores = 1;
15531     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15532     // Check that the addresses are consecutive starting from the second
15533     // element in the list of stores.
15534     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15535       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15536       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15537         break;
15538       NumConsecutiveStores = i + 1;
15539     }
15540
15541     if (NumConsecutiveStores < 2) {
15542       StoreNodes.erase(StoreNodes.begin(),
15543                        StoreNodes.begin() + NumConsecutiveStores);
15544       continue;
15545     }
15546
15547     // The node with the lowest store address.
15548     LLVMContext &Context = *DAG.getContext();
15549     const DataLayout &DL = DAG.getDataLayout();
15550
15551     // Store the constants into memory as one consecutive store.
15552     if (IsConstantSrc) {
15553       while (NumConsecutiveStores >= 2) {
15554         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15555         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15556         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15557         unsigned LastLegalType = 1;
15558         unsigned LastLegalVectorType = 1;
15559         bool LastIntegerTrunc = false;
15560         bool NonZero = false;
15561         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15562         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15563           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15564           SDValue StoredVal = ST->getValue();
15565           bool IsElementZero = false;
15566           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15567             IsElementZero = C->isNullValue();
15568           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15569             IsElementZero = C->getConstantFPValue()->isNullValue();
15570           if (IsElementZero) {
15571             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
15572               FirstZeroAfterNonZero = i;
15573           }
15574           NonZero |= !IsElementZero;
15575
15576           // Find a legal type for the constant store.
15577           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15578           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15579           bool IsFast = false;
15580
15581           // Break early when size is too large to be legal.
15582           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15583             break;
15584
15585           if (TLI.isTypeLegal(StoreTy) &&
15586               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15587               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15588                                      *FirstInChain->getMemOperand(), &IsFast) &&
15589               IsFast) {
15590             LastIntegerTrunc = false;
15591             LastLegalType = i + 1;
15592             // Or check whether a truncstore is legal.
15593           } else if (TLI.getTypeAction(Context, StoreTy) ==
15594                      TargetLowering::TypePromoteInteger) {
15595             EVT LegalizedStoredValTy =
15596                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15597             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15598                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15599                 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15600                                        *FirstInChain->getMemOperand(),
15601                                        &IsFast) &&
15602                 IsFast) {
15603               LastIntegerTrunc = true;
15604               LastLegalType = i + 1;
15605             }
15606           }
15607
15608           // We only use vectors if the constant is known to be zero or the
15609           // target allows it and the function is not marked with the
15610           // noimplicitfloat attribute.
15611           if ((!NonZero ||
15612                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
15613               !NoVectors) {
15614             // Find a legal type for the vector store.
15615             unsigned Elts = (i + 1) * NumMemElts;
15616             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15617             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
15618                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15619                 TLI.allowsMemoryAccess(
15620                     Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
15621                 IsFast)
15622               LastLegalVectorType = i + 1;
15623           }
15624         }
15625
15626         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
15627         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
15628
15629         // Check if we found a legal integer type that creates a meaningful
15630         // merge.
15631         if (NumElem < 2) {
15632           // We know that candidate stores are in order and of correct
15633           // shape. While there is no mergeable sequence from the
15634           // beginning one may start later in the sequence. The only
15635           // reason a merge of size N could have failed where another of
15636           // the same size would not have, is if the alignment has
15637           // improved or we've dropped a non-zero value. Drop as many
15638           // candidates as we can here.
15639           unsigned NumSkip = 1;
15640           while (
15641               (NumSkip < NumConsecutiveStores) &&
15642               (NumSkip < FirstZeroAfterNonZero) &&
15643               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15644             NumSkip++;
15645
15646           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15647           NumConsecutiveStores -= NumSkip;
15648           continue;
15649         }
15650
15651         // Check that we can merge these candidates without causing a cycle.
15652         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15653                                                       RootNode)) {
15654           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15655           NumConsecutiveStores -= NumElem;
15656           continue;
15657         }
15658
15659         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15660                                               UseVector, LastIntegerTrunc);
15661
15662         // Remove merged stores for next iteration.
15663         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15664         NumConsecutiveStores -= NumElem;
15665       }
15666       continue;
15667     }
15668
15669     // When extracting multiple vector elements, try to store them
15670     // in one vector store rather than a sequence of scalar stores.
15671     if (IsExtractVecSrc) {
15672       // Loop on Consecutive Stores on success.
15673       while (NumConsecutiveStores >= 2) {
15674         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15675         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15676         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15677         unsigned NumStoresToMerge = 1;
15678         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15679           // Find a legal type for the vector store.
15680           unsigned Elts = (i + 1) * NumMemElts;
15681           EVT Ty =
15682               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15683           bool IsFast;
15684
15685           // Break early when size is too large to be legal.
15686           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
15687             break;
15688
15689           if (TLI.isTypeLegal(Ty) &&
15690               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15691               TLI.allowsMemoryAccess(Context, DL, Ty,
15692                                      *FirstInChain->getMemOperand(), &IsFast) &&
15693               IsFast)
15694             NumStoresToMerge = i + 1;
15695         }
15696
15697         // Check if we found a legal integer type creating a meaningful
15698         // merge.
15699         if (NumStoresToMerge < 2) {
15700           // We know that candidate stores are in order and of correct
15701           // shape. While there is no mergeable sequence from the
15702           // beginning one may start later in the sequence. The only
15703           // reason a merge of size N could have failed where another of
15704           // the same size would not have, is if the alignment has
15705           // improved. Drop as many candidates as we can here.
15706           unsigned NumSkip = 1;
15707           while (
15708               (NumSkip < NumConsecutiveStores) &&
15709               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15710             NumSkip++;
15711
15712           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15713           NumConsecutiveStores -= NumSkip;
15714           continue;
15715         }
15716
15717         // Check that we can merge these candidates without causing a cycle.
15718         if (!checkMergeStoreCandidatesForDependencies(
15719                 StoreNodes, NumStoresToMerge, RootNode)) {
15720           StoreNodes.erase(StoreNodes.begin(),
15721                            StoreNodes.begin() + NumStoresToMerge);
15722           NumConsecutiveStores -= NumStoresToMerge;
15723           continue;
15724         }
15725
15726         RV |= MergeStoresOfConstantsOrVecElts(
15727             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
15728
15729         StoreNodes.erase(StoreNodes.begin(),
15730                          StoreNodes.begin() + NumStoresToMerge);
15731         NumConsecutiveStores -= NumStoresToMerge;
15732       }
15733       continue;
15734     }
15735
15736     // Below we handle the case of multiple consecutive stores that
15737     // come from multiple consecutive loads. We merge them into a single
15738     // wide load and a single wide store.
15739
15740     // Look for load nodes which are used by the stored values.
15741     SmallVector<MemOpLink, 8> LoadNodes;
15742
15743     // Find acceptable loads. Loads need to have the same chain (token factor),
15744     // must not be zext, volatile, indexed, and they must be consecutive.
15745     BaseIndexOffset LdBasePtr;
15746
15747     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15748       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15749       SDValue Val = peekThroughBitcasts(St->getValue());
15750       LoadSDNode *Ld = cast<LoadSDNode>(Val);
15751
15752       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15753       // If this is not the first ptr that we check.
15754       int64_t LdOffset = 0;
15755       if (LdBasePtr.getBase().getNode()) {
15756         // The base ptr must be the same.
15757         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15758           break;
15759       } else {
15760         // Check that all other base pointers are the same as this one.
15761         LdBasePtr = LdPtr;
15762       }
15763
15764       // We found a potential memory operand to merge.
15765       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15766     }
15767
15768     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
15769       // If we have load/store pair instructions and we only have two values,
15770       // don't bother merging.
15771       unsigned RequiredAlignment;
15772       if (LoadNodes.size() == 2 &&
15773           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
15774           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
15775         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15776         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15777         break;
15778       }
15779       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15780       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15781       unsigned FirstStoreAlign = FirstInChain->getAlignment();
15782       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15783       unsigned FirstLoadAlign = FirstLoad->getAlignment();
15784
15785       // Scan the memory operations on the chain and find the first
15786       // non-consecutive load memory address. These variables hold the index in
15787       // the store node array.
15788
15789       unsigned LastConsecutiveLoad = 1;
15790
15791       // This variable refers to the size and not index in the array.
15792       unsigned LastLegalVectorType = 1;
15793       unsigned LastLegalIntegerType = 1;
15794       bool isDereferenceable = true;
15795       bool DoIntegerTruncate = false;
15796       StartAddress = LoadNodes[0].OffsetFromBase;
15797       SDValue FirstChain = FirstLoad->getChain();
15798       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15799         // All loads must share the same chain.
15800         if (LoadNodes[i].MemNode->getChain() != FirstChain)
15801           break;
15802
15803         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15804         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15805           break;
15806         LastConsecutiveLoad = i;
15807
15808         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15809           isDereferenceable = false;
15810
15811         // Find a legal type for the vector store.
15812         unsigned Elts = (i + 1) * NumMemElts;
15813         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15814
15815         // Break early when size is too large to be legal.
15816         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15817           break;
15818
15819         bool IsFastSt, IsFastLd;
15820         if (TLI.isTypeLegal(StoreTy) &&
15821             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15822             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15823                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
15824             IsFastSt &&
15825             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15826                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
15827             IsFastLd) {
15828           LastLegalVectorType = i + 1;
15829         }
15830
15831         // Find a legal type for the integer store.
15832         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15833         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15834         if (TLI.isTypeLegal(StoreTy) &&
15835             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15836             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15837                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
15838             IsFastSt &&
15839             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15840                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
15841             IsFastLd) {
15842           LastLegalIntegerType = i + 1;
15843           DoIntegerTruncate = false;
15844           // Or check whether a truncstore and extload is legal.
15845         } else if (TLI.getTypeAction(Context, StoreTy) ==
15846                    TargetLowering::TypePromoteInteger) {
15847           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15848           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15849               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15850               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15851                                  StoreTy) &&
15852               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15853                                  StoreTy) &&
15854               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15855               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15856                                      *FirstInChain->getMemOperand(),
15857                                      &IsFastSt) &&
15858               IsFastSt &&
15859               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15860                                      *FirstLoad->getMemOperand(), &IsFastLd) &&
15861               IsFastLd) {
15862             LastLegalIntegerType = i + 1;
15863             DoIntegerTruncate = true;
15864           }
15865         }
15866       }
15867
15868       // Only use vector types if the vector type is larger than the integer
15869       // type. If they are the same, use integers.
15870       bool UseVectorTy =
15871           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15872       unsigned LastLegalType =
15873           std::max(LastLegalVectorType, LastLegalIntegerType);
15874
15875       // We add +1 here because the LastXXX variables refer to location while
15876       // the NumElem refers to array/index size.
15877       unsigned NumElem =
15878           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15879       NumElem = std::min(LastLegalType, NumElem);
15880
15881       if (NumElem < 2) {
15882         // We know that candidate stores are in order and of correct
15883         // shape. While there is no mergeable sequence from the
15884         // beginning one may start later in the sequence. The only
15885         // reason a merge of size N could have failed where another of
15886         // the same size would not have is if the alignment or either
15887         // the load or store has improved. Drop as many candidates as we
15888         // can here.
15889         unsigned NumSkip = 1;
15890         while ((NumSkip < LoadNodes.size()) &&
15891                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
15892                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15893           NumSkip++;
15894         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15895         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
15896         NumConsecutiveStores -= NumSkip;
15897         continue;
15898       }
15899
15900       // Check that we can merge these candidates without causing a cycle.
15901       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15902                                                     RootNode)) {
15903         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15904         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15905         NumConsecutiveStores -= NumElem;
15906         continue;
15907       }
15908
15909       // Find if it is better to use vectors or integers to load and store
15910       // to memory.
15911       EVT JointMemOpVT;
15912       if (UseVectorTy) {
15913         // Find a legal type for the vector store.
15914         unsigned Elts = NumElem * NumMemElts;
15915         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15916       } else {
15917         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
15918         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
15919       }
15920
15921       SDLoc LoadDL(LoadNodes[0].MemNode);
15922       SDLoc StoreDL(StoreNodes[0].MemNode);
15923
15924       // The merged loads are required to have the same incoming chain, so
15925       // using the first's chain is acceptable.
15926
15927       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
15928       AddToWorklist(NewStoreChain.getNode());
15929
15930       MachineMemOperand::Flags LdMMOFlags =
15931           isDereferenceable ? MachineMemOperand::MODereferenceable
15932                             : MachineMemOperand::MONone;
15933       if (IsNonTemporalLoad)
15934         LdMMOFlags |= MachineMemOperand::MONonTemporal;
15935
15936       MachineMemOperand::Flags StMMOFlags =
15937           IsNonTemporalStore ? MachineMemOperand::MONonTemporal
15938                              : MachineMemOperand::MONone;
15939
15940       SDValue NewLoad, NewStore;
15941       if (UseVectorTy || !DoIntegerTruncate) {
15942         NewLoad =
15943             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
15944                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
15945                         FirstLoadAlign, LdMMOFlags);
15946         NewStore = DAG.getStore(
15947             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
15948             FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
15949       } else { // This must be the truncstore/extload case
15950         EVT ExtendedTy =
15951             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
15952         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
15953                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
15954                                  FirstLoad->getPointerInfo(), JointMemOpVT,
15955                                  FirstLoadAlign, LdMMOFlags);
15956         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
15957                                      FirstInChain->getBasePtr(),
15958                                      FirstInChain->getPointerInfo(),
15959                                      JointMemOpVT, FirstInChain->getAlignment(),
15960                                      FirstInChain->getMemOperand()->getFlags());
15961       }
15962
15963       // Transfer chain users from old loads to the new load.
15964       for (unsigned i = 0; i < NumElem; ++i) {
15965         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
15966         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
15967                                       SDValue(NewLoad.getNode(), 1));
15968       }
15969
15970       // Replace the all stores with the new store. Recursively remove
15971       // corresponding value if its no longer used.
15972       for (unsigned i = 0; i < NumElem; ++i) {
15973         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
15974         CombineTo(StoreNodes[i].MemNode, NewStore);
15975         if (Val.getNode()->use_empty())
15976           recursivelyDeleteUnusedNodes(Val.getNode());
15977       }
15978
15979       RV = true;
15980       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15981       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
15982       NumConsecutiveStores -= NumElem;
15983     }
15984   }
15985   return RV;
15986 }
15987
15988 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
15989   SDLoc SL(ST);
15990   SDValue ReplStore;
15991
15992   // Replace the chain to avoid dependency.
15993   if (ST->isTruncatingStore()) {
15994     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
15995                                   ST->getBasePtr(), ST->getMemoryVT(),
15996                                   ST->getMemOperand());
15997   } else {
15998     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
15999                              ST->getMemOperand());
16000   }
16001
16002   // Create token to keep both nodes around.
16003   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16004                               MVT::Other, ST->getChain(), ReplStore);
16005
16006   // Make sure the new and old chains are cleaned up.
16007   AddToWorklist(Token.getNode());
16008
16009   // Don't add users to work list.
16010   return CombineTo(ST, Token, false);
16011 }
16012
16013 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16014   SDValue Value = ST->getValue();
16015   if (Value.getOpcode() == ISD::TargetConstantFP)
16016     return SDValue();
16017
16018   SDLoc DL(ST);
16019
16020   SDValue Chain = ST->getChain();
16021   SDValue Ptr = ST->getBasePtr();
16022
16023   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16024
16025   // NOTE: If the original store is volatile, this transform must not increase
16026   // the number of stores.  For example, on x86-32 an f64 can be stored in one
16027   // processor operation but an i64 (which is not legal) requires two.  So the
16028   // transform should not be done in this case.
16029
16030   SDValue Tmp;
16031   switch (CFP->getSimpleValueType(0).SimpleTy) {
16032   default:
16033     llvm_unreachable("Unknown FP type");
16034   case MVT::f16:    // We don't do this for these yet.
16035   case MVT::f80:
16036   case MVT::f128:
16037   case MVT::ppcf128:
16038     return SDValue();
16039   case MVT::f32:
16040     if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
16041         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16042       ;
16043       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16044                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16045                             MVT::i32);
16046       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16047     }
16048
16049     return SDValue();
16050   case MVT::f64:
16051     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16052          ST->isSimple()) ||
16053         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
16054       ;
16055       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16056                             getZExtValue(), SDLoc(CFP), MVT::i64);
16057       return DAG.getStore(Chain, DL, Tmp,
16058                           Ptr, ST->getMemOperand());
16059     }
16060
16061     if (ST->isSimple() &&
16062         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16063       // Many FP stores are not made apparent until after legalize, e.g. for
16064       // argument passing.  Since this is so common, custom legalize the
16065       // 64-bit integer store into two 32-bit stores.
16066       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16067       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16068       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16069       if (DAG.getDataLayout().isBigEndian())
16070         std::swap(Lo, Hi);
16071
16072       unsigned Alignment = ST->getAlignment();
16073       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16074       AAMDNodes AAInfo = ST->getAAInfo();
16075
16076       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16077                                  ST->getAlignment(), MMOFlags, AAInfo);
16078       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16079                         DAG.getConstant(4, DL, Ptr.getValueType()));
16080       Alignment = MinAlign(Alignment, 4U);
16081       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16082                                  ST->getPointerInfo().getWithOffset(4),
16083                                  Alignment, MMOFlags, AAInfo);
16084       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16085                          St0, St1);
16086     }
16087
16088     return SDValue();
16089   }
16090 }
16091
16092 SDValue DAGCombiner::visitSTORE(SDNode *N) {
16093   StoreSDNode *ST  = cast<StoreSDNode>(N);
16094   SDValue Chain = ST->getChain();
16095   SDValue Value = ST->getValue();
16096   SDValue Ptr   = ST->getBasePtr();
16097
16098   // If this is a store of a bit convert, store the input value if the
16099   // resultant store does not need a higher alignment than the original.
16100   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16101       ST->isUnindexed()) {
16102     EVT SVT = Value.getOperand(0).getValueType();
16103     // If the store is volatile, we only want to change the store type if the
16104     // resulting store is legal. Otherwise we might increase the number of
16105     // memory accesses. We don't care if the original type was legal or not
16106     // as we assume software couldn't rely on the number of accesses of an
16107     // illegal type.
16108     // TODO: May be able to relax for unordered atomics (see D66309)
16109     if (((!LegalOperations && ST->isSimple()) ||
16110          TLI.isOperationLegal(ISD::STORE, SVT)) &&
16111         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
16112                                      DAG, *ST->getMemOperand())) {
16113       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
16114                           ST->getPointerInfo(), ST->getAlignment(),
16115                           ST->getMemOperand()->getFlags(), ST->getAAInfo());
16116     }
16117   }
16118
16119   // Turn 'store undef, Ptr' -> nothing.
16120   if (Value.isUndef() && ST->isUnindexed())
16121     return Chain;
16122
16123   // Try to infer better alignment information than the store already has.
16124   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
16125     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
16126       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
16127         SDValue NewStore =
16128             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
16129                               ST->getMemoryVT(), Align,
16130                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
16131         // NewStore will always be N as we are only refining the alignment
16132         assert(NewStore.getNode() == N);
16133         (void)NewStore;
16134       }
16135     }
16136   }
16137
16138   // Try transforming a pair floating point load / store ops to integer
16139   // load / store ops.
16140   if (SDValue NewST = TransformFPLoadStorePair(N))
16141     return NewST;
16142
16143   // Try transforming several stores into STORE (BSWAP).
16144   if (SDValue Store = MatchStoreCombine(ST))
16145     return Store;
16146
16147   if (ST->isUnindexed()) {
16148     // Walk up chain skipping non-aliasing memory nodes, on this store and any
16149     // adjacent stores.
16150     if (findBetterNeighborChains(ST)) {
16151       // replaceStoreChain uses CombineTo, which handled all of the worklist
16152       // manipulation. Return the original node to not do anything else.
16153       return SDValue(ST, 0);
16154     }
16155     Chain = ST->getChain();
16156   }
16157
16158   // FIXME: is there such a thing as a truncating indexed store?
16159   if (ST->isTruncatingStore() && ST->isUnindexed() &&
16160       Value.getValueType().isInteger() &&
16161       (!isa<ConstantSDNode>(Value) ||
16162        !cast<ConstantSDNode>(Value)->isOpaque())) {
16163     APInt TruncDemandedBits =
16164         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
16165                              ST->getMemoryVT().getScalarSizeInBits());
16166
16167     // See if we can simplify the input to this truncstore with knowledge that
16168     // only the low bits are being used.  For example:
16169     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
16170     AddToWorklist(Value.getNode());
16171     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
16172       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
16173                                ST->getMemOperand());
16174
16175     // Otherwise, see if we can simplify the operation with
16176     // SimplifyDemandedBits, which only works if the value has a single use.
16177     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
16178       // Re-visit the store if anything changed and the store hasn't been merged
16179       // with another node (N is deleted) SimplifyDemandedBits will add Value's
16180       // node back to the worklist if necessary, but we also need to re-visit
16181       // the Store node itself.
16182       if (N->getOpcode() != ISD::DELETED_NODE)
16183         AddToWorklist(N);
16184       return SDValue(N, 0);
16185     }
16186   }
16187
16188   // If this is a load followed by a store to the same location, then the store
16189   // is dead/noop.
16190   // TODO: Can relax for unordered atomics (see D66309)
16191   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
16192     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
16193         ST->isUnindexed() && ST->isSimple() &&
16194         // There can't be any side effects between the load and store, such as
16195         // a call or store.
16196         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
16197       // The store is dead, remove it.
16198       return Chain;
16199     }
16200   }
16201
16202   // TODO: Can relax for unordered atomics (see D66309)
16203   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
16204     if (ST->isUnindexed() && ST->isSimple() &&
16205         ST1->isUnindexed() && ST1->isSimple()) {
16206       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
16207           ST->getMemoryVT() == ST1->getMemoryVT()) {
16208         // If this is a store followed by a store with the same value to the
16209         // same location, then the store is dead/noop.
16210         return Chain;
16211       }
16212
16213       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
16214           !ST1->getBasePtr().isUndef()) {
16215         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
16216         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
16217         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
16218         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
16219         // If this is a store who's preceding store to a subset of the current
16220         // location and no one other node is chained to that store we can
16221         // effectively drop the store. Do not remove stores to undef as they may
16222         // be used as data sinks.
16223         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
16224           CombineTo(ST1, ST1->getChain());
16225           return SDValue();
16226         }
16227
16228         // If ST stores to a subset of preceding store's write set, we may be
16229         // able to fold ST's value into the preceding stored value. As we know
16230         // the other uses of ST1's chain are unconcerned with ST, this folding
16231         // will not affect those nodes.
16232         int64_t BitOffset;
16233         if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
16234                                BitOffset)) {
16235           SDValue ChainValue = ST1->getValue();
16236           if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
16237             if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
16238               APInt Val = C1->getAPIntValue();
16239               APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
16240               // FIXME: Handle Big-endian mode.
16241               if (!DAG.getDataLayout().isBigEndian()) {
16242                 Val.insertBits(InsertVal, BitOffset);
16243                 SDValue NewSDVal =
16244                     DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
16245                                     C1->isTargetOpcode(), C1->isOpaque());
16246                 SDNode *NewST1 = DAG.UpdateNodeOperands(
16247                     ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
16248                     ST1->getOperand(3));
16249                 return CombineTo(ST, SDValue(NewST1, 0));
16250               }
16251             }
16252           }
16253         } // End ST subset of ST1 case.
16254       }
16255     }
16256   }
16257
16258   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
16259   // truncating store.  We can do this even if this is already a truncstore.
16260   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
16261       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
16262       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
16263                             ST->getMemoryVT())) {
16264     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
16265                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
16266   }
16267
16268   // Always perform this optimization before types are legal. If the target
16269   // prefers, also try this after legalization to catch stores that were created
16270   // by intrinsics or other nodes.
16271   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
16272     while (true) {
16273       // There can be multiple store sequences on the same chain.
16274       // Keep trying to merge store sequences until we are unable to do so
16275       // or until we merge the last store on the chain.
16276       bool Changed = MergeConsecutiveStores(ST);
16277       if (!Changed) break;
16278       // Return N as merge only uses CombineTo and no worklist clean
16279       // up is necessary.
16280       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
16281         return SDValue(N, 0);
16282     }
16283   }
16284
16285   // Try transforming N to an indexed store.
16286   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16287     return SDValue(N, 0);
16288
16289   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
16290   //
16291   // Make sure to do this only after attempting to merge stores in order to
16292   //  avoid changing the types of some subset of stores due to visit order,
16293   //  preventing their merging.
16294   if (isa<ConstantFPSDNode>(ST->getValue())) {
16295     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
16296       return NewSt;
16297   }
16298
16299   if (SDValue NewSt = splitMergedValStore(ST))
16300     return NewSt;
16301
16302   return ReduceLoadOpStoreWidth(N);
16303 }
16304
16305 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
16306   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
16307   if (!LifetimeEnd->hasOffset())
16308     return SDValue();
16309
16310   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
16311                                         LifetimeEnd->getOffset(), false);
16312
16313   // We walk up the chains to find stores.
16314   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
16315   while (!Chains.empty()) {
16316     SDValue Chain = Chains.back();
16317     Chains.pop_back();
16318     if (!Chain.hasOneUse())
16319       continue;
16320     switch (Chain.getOpcode()) {
16321     case ISD::TokenFactor:
16322       for (unsigned Nops = Chain.getNumOperands(); Nops;)
16323         Chains.push_back(Chain.getOperand(--Nops));
16324       break;
16325     case ISD::LIFETIME_START:
16326     case ISD::LIFETIME_END:
16327       // We can forward past any lifetime start/end that can be proven not to
16328       // alias the node.
16329       if (!isAlias(Chain.getNode(), N))
16330         Chains.push_back(Chain.getOperand(0));
16331       break;
16332     case ISD::STORE: {
16333       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
16334       // TODO: Can relax for unordered atomics (see D66309)
16335       if (!ST->isSimple() || ST->isIndexed())
16336         continue;
16337       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
16338       // If we store purely within object bounds just before its lifetime ends,
16339       // we can remove the store.
16340       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
16341                                    ST->getMemoryVT().getStoreSizeInBits())) {
16342         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
16343                    dbgs() << "\nwithin LIFETIME_END of : ";
16344                    LifetimeEndBase.dump(); dbgs() << "\n");
16345         CombineTo(ST, ST->getChain());
16346         return SDValue(N, 0);
16347       }
16348     }
16349     }
16350   }
16351   return SDValue();
16352 }
16353
16354 /// For the instruction sequence of store below, F and I values
16355 /// are bundled together as an i64 value before being stored into memory.
16356 /// Sometimes it is more efficent to generate separate stores for F and I,
16357 /// which can remove the bitwise instructions or sink them to colder places.
16358 ///
16359 ///   (store (or (zext (bitcast F to i32) to i64),
16360 ///              (shl (zext I to i64), 32)), addr)  -->
16361 ///   (store F, addr) and (store I, addr+4)
16362 ///
16363 /// Similarly, splitting for other merged store can also be beneficial, like:
16364 /// For pair of {i32, i32}, i64 store --> two i32 stores.
16365 /// For pair of {i32, i16}, i64 store --> two i32 stores.
16366 /// For pair of {i16, i16}, i32 store --> two i16 stores.
16367 /// For pair of {i16, i8},  i32 store --> two i16 stores.
16368 /// For pair of {i8, i8},   i16 store --> two i8 stores.
16369 ///
16370 /// We allow each target to determine specifically which kind of splitting is
16371 /// supported.
16372 ///
16373 /// The store patterns are commonly seen from the simple code snippet below
16374 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
16375 ///   void goo(const std::pair<int, float> &);
16376 ///   hoo() {
16377 ///     ...
16378 ///     goo(std::make_pair(tmp, ftmp));
16379 ///     ...
16380 ///   }
16381 ///
16382 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
16383   if (OptLevel == CodeGenOpt::None)
16384     return SDValue();
16385
16386   // Can't change the number of memory accesses for a volatile store or break
16387   // atomicity for an atomic one.
16388   if (!ST->isSimple())
16389     return SDValue();
16390
16391   SDValue Val = ST->getValue();
16392   SDLoc DL(ST);
16393
16394   // Match OR operand.
16395   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
16396     return SDValue();
16397
16398   // Match SHL operand and get Lower and Higher parts of Val.
16399   SDValue Op1 = Val.getOperand(0);
16400   SDValue Op2 = Val.getOperand(1);
16401   SDValue Lo, Hi;
16402   if (Op1.getOpcode() != ISD::SHL) {
16403     std::swap(Op1, Op2);
16404     if (Op1.getOpcode() != ISD::SHL)
16405       return SDValue();
16406   }
16407   Lo = Op2;
16408   Hi = Op1.getOperand(0);
16409   if (!Op1.hasOneUse())
16410     return SDValue();
16411
16412   // Match shift amount to HalfValBitSize.
16413   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
16414   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
16415   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
16416     return SDValue();
16417
16418   // Lo and Hi are zero-extended from int with size less equal than 32
16419   // to i64.
16420   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
16421       !Lo.getOperand(0).getValueType().isScalarInteger() ||
16422       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
16423       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
16424       !Hi.getOperand(0).getValueType().isScalarInteger() ||
16425       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
16426     return SDValue();
16427
16428   // Use the EVT of low and high parts before bitcast as the input
16429   // of target query.
16430   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
16431                   ? Lo.getOperand(0).getValueType()
16432                   : Lo.getValueType();
16433   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
16434                    ? Hi.getOperand(0).getValueType()
16435                    : Hi.getValueType();
16436   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
16437     return SDValue();
16438
16439   // Start to split store.
16440   unsigned Alignment = ST->getAlignment();
16441   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16442   AAMDNodes AAInfo = ST->getAAInfo();
16443
16444   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
16445   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
16446   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
16447   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
16448
16449   SDValue Chain = ST->getChain();
16450   SDValue Ptr = ST->getBasePtr();
16451   // Lower value store.
16452   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16453                              ST->getAlignment(), MMOFlags, AAInfo);
16454   Ptr =
16455       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16456                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
16457   // Higher value store.
16458   SDValue St1 =
16459       DAG.getStore(St0, DL, Hi, Ptr,
16460                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
16461                    Alignment / 2, MMOFlags, AAInfo);
16462   return St1;
16463 }
16464
16465 /// Convert a disguised subvector insertion into a shuffle:
16466 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
16467   SDValue InsertVal = N->getOperand(1);
16468   SDValue Vec = N->getOperand(0);
16469
16470   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
16471   //   --> (vector_shuffle X, Y)
16472   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
16473       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16474       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
16475     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
16476     ArrayRef<int> Mask = SVN->getMask();
16477
16478     SDValue X = Vec.getOperand(0);
16479     SDValue Y = Vec.getOperand(1);
16480
16481     // Vec's operand 0 is using indices from 0 to N-1 and
16482     // operand 1 from N to 2N - 1, where N is the number of
16483     // elements in the vectors.
16484     int XOffset = -1;
16485     if (InsertVal.getOperand(0) == X) {
16486       XOffset = 0;
16487     } else if (InsertVal.getOperand(0) == Y) {
16488       XOffset = X.getValueType().getVectorNumElements();
16489     }
16490
16491     if (XOffset != -1) {
16492       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
16493
16494       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
16495       NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
16496       assert(NewMask[InsIndex] <
16497                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
16498              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
16499
16500       SDValue LegalShuffle =
16501               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
16502                                           Y, NewMask, DAG);
16503       if (LegalShuffle)
16504         return LegalShuffle;
16505     }
16506   }
16507
16508   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
16509   // bitcast(shuffle (bitcast V), (extended X), Mask)
16510   // Note: We do not use an insert_subvector node because that requires a
16511   // legal subvector type.
16512   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
16513       !InsertVal.getOperand(0).getValueType().isVector())
16514     return SDValue();
16515
16516   SDValue SubVec = InsertVal.getOperand(0);
16517   SDValue DestVec = N->getOperand(0);
16518   EVT SubVecVT = SubVec.getValueType();
16519   EVT VT = DestVec.getValueType();
16520   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
16521   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
16522   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
16523
16524   // Step 1: Create a shuffle mask that implements this insert operation. The
16525   // vector that we are inserting into will be operand 0 of the shuffle, so
16526   // those elements are just 'i'. The inserted subvector is in the first
16527   // positions of operand 1 of the shuffle. Example:
16528   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
16529   SmallVector<int, 16> Mask(NumMaskVals);
16530   for (unsigned i = 0; i != NumMaskVals; ++i) {
16531     if (i / NumSrcElts == InsIndex)
16532       Mask[i] = (i % NumSrcElts) + NumMaskVals;
16533     else
16534       Mask[i] = i;
16535   }
16536
16537   // Bail out if the target can not handle the shuffle we want to create.
16538   EVT SubVecEltVT = SubVecVT.getVectorElementType();
16539   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
16540   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
16541     return SDValue();
16542
16543   // Step 2: Create a wide vector from the inserted source vector by appending
16544   // undefined elements. This is the same size as our destination vector.
16545   SDLoc DL(N);
16546   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
16547   ConcatOps[0] = SubVec;
16548   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
16549
16550   // Step 3: Shuffle in the padded subvector.
16551   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
16552   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
16553   AddToWorklist(PaddedSubV.getNode());
16554   AddToWorklist(DestVecBC.getNode());
16555   AddToWorklist(Shuf.getNode());
16556   return DAG.getBitcast(VT, Shuf);
16557 }
16558
16559 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
16560   SDValue InVec = N->getOperand(0);
16561   SDValue InVal = N->getOperand(1);
16562   SDValue EltNo = N->getOperand(2);
16563   SDLoc DL(N);
16564
16565   // If the inserted element is an UNDEF, just use the input vector.
16566   if (InVal.isUndef())
16567     return InVec;
16568
16569   EVT VT = InVec.getValueType();
16570   unsigned NumElts = VT.getVectorNumElements();
16571
16572   // Remove redundant insertions:
16573   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16574   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16575       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
16576     return InVec;
16577
16578   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16579   if (!IndexC) {
16580     // If this is variable insert to undef vector, it might be better to splat:
16581     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16582     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
16583       SmallVector<SDValue, 8> Ops(NumElts, InVal);
16584       return DAG.getBuildVector(VT, DL, Ops);
16585     }
16586     return SDValue();
16587   }
16588
16589   // We must know which element is being inserted for folds below here.
16590   unsigned Elt = IndexC->getZExtValue();
16591   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16592     return Shuf;
16593
16594   // Canonicalize insert_vector_elt dag nodes.
16595   // Example:
16596   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16597   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16598   //
16599   // Do this only if the child insert_vector node has one use; also
16600   // do this only if indices are both constants and Idx1 < Idx0.
16601   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
16602       && isa<ConstantSDNode>(InVec.getOperand(2))) {
16603     unsigned OtherElt = InVec.getConstantOperandVal(2);
16604     if (Elt < OtherElt) {
16605       // Swap nodes.
16606       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16607                                   InVec.getOperand(0), InVal, EltNo);
16608       AddToWorklist(NewOp.getNode());
16609       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16610                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16611     }
16612   }
16613
16614   // If we can't generate a legal BUILD_VECTOR, exit
16615   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
16616     return SDValue();
16617
16618   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16619   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
16620   // vector elements.
16621   SmallVector<SDValue, 8> Ops;
16622   // Do not combine these two vectors if the output vector will not replace
16623   // the input vector.
16624   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
16625     Ops.append(InVec.getNode()->op_begin(),
16626                InVec.getNode()->op_end());
16627   } else if (InVec.isUndef()) {
16628     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16629   } else {
16630     return SDValue();
16631   }
16632   assert(Ops.size() == NumElts && "Unexpected vector size");
16633
16634   // Insert the element
16635   if (Elt < Ops.size()) {
16636     // All the operands of BUILD_VECTOR must have the same type;
16637     // we enforce that here.
16638     EVT OpVT = Ops[0].getValueType();
16639     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
16640   }
16641
16642   // Return the new vector
16643   return DAG.getBuildVector(VT, DL, Ops);
16644 }
16645
16646 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16647                                                   SDValue EltNo,
16648                                                   LoadSDNode *OriginalLoad) {
16649   assert(OriginalLoad->isSimple());
16650
16651   EVT ResultVT = EVE->getValueType(0);
16652   EVT VecEltVT = InVecVT.getVectorElementType();
16653   unsigned Align = OriginalLoad->getAlignment();
16654   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
16655       VecEltVT.getTypeForEVT(*DAG.getContext()));
16656
16657   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
16658     return SDValue();
16659
16660   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
16661     ISD::NON_EXTLOAD : ISD::EXTLOAD;
16662   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
16663     return SDValue();
16664
16665   Align = NewAlign;
16666
16667   SDValue NewPtr = OriginalLoad->getBasePtr();
16668   SDValue Offset;
16669   EVT PtrType = NewPtr.getValueType();
16670   MachinePointerInfo MPI;
16671   SDLoc DL(EVE);
16672   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
16673     int Elt = ConstEltNo->getZExtValue();
16674     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
16675     Offset = DAG.getConstant(PtrOff, DL, PtrType);
16676     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
16677   } else {
16678     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
16679     Offset = DAG.getNode(
16680         ISD::MUL, DL, PtrType, Offset,
16681         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
16682     // Discard the pointer info except the address space because the memory
16683     // operand can't represent this new access since the offset is variable.
16684     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
16685   }
16686   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
16687
16688   // The replacement we need to do here is a little tricky: we need to
16689   // replace an extractelement of a load with a load.
16690   // Use ReplaceAllUsesOfValuesWith to do the replacement.
16691   // Note that this replacement assumes that the extractvalue is the only
16692   // use of the load; that's okay because we don't want to perform this
16693   // transformation in other cases anyway.
16694   SDValue Load;
16695   SDValue Chain;
16696   if (ResultVT.bitsGT(VecEltVT)) {
16697     // If the result type of vextract is wider than the load, then issue an
16698     // extending load instead.
16699     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
16700                                                   VecEltVT)
16701                                    ? ISD::ZEXTLOAD
16702                                    : ISD::EXTLOAD;
16703     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
16704                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
16705                           Align, OriginalLoad->getMemOperand()->getFlags(),
16706                           OriginalLoad->getAAInfo());
16707     Chain = Load.getValue(1);
16708   } else {
16709     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
16710                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
16711                        OriginalLoad->getAAInfo());
16712     Chain = Load.getValue(1);
16713     if (ResultVT.bitsLT(VecEltVT))
16714       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
16715     else
16716       Load = DAG.getBitcast(ResultVT, Load);
16717   }
16718   WorklistRemover DeadNodes(*this);
16719   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
16720   SDValue To[] = { Load, Chain };
16721   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
16722   // Make sure to revisit this node to clean it up; it will usually be dead.
16723   AddToWorklist(EVE);
16724   // Since we're explicitly calling ReplaceAllUses, add the new node to the
16725   // worklist explicitly as well.
16726   AddUsersToWorklist(Load.getNode()); // Add users too
16727   AddToWorklist(Load.getNode());
16728   ++OpsNarrowed;
16729   return SDValue(EVE, 0);
16730 }
16731
16732 /// Transform a vector binary operation into a scalar binary operation by moving
16733 /// the math/logic after an extract element of a vector.
16734 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
16735                                        bool LegalOperations) {
16736   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16737   SDValue Vec = ExtElt->getOperand(0);
16738   SDValue Index = ExtElt->getOperand(1);
16739   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16740   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
16741       Vec.getNode()->getNumValues() != 1)
16742     return SDValue();
16743
16744   // Targets may want to avoid this to prevent an expensive register transfer.
16745   if (!TLI.shouldScalarizeBinop(Vec))
16746     return SDValue();
16747
16748   // Extracting an element of a vector constant is constant-folded, so this
16749   // transform is just replacing a vector op with a scalar op while moving the
16750   // extract.
16751   SDValue Op0 = Vec.getOperand(0);
16752   SDValue Op1 = Vec.getOperand(1);
16753   if (isAnyConstantBuildVector(Op0, true) ||
16754       isAnyConstantBuildVector(Op1, true)) {
16755     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16756     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16757     SDLoc DL(ExtElt);
16758     EVT VT = ExtElt->getValueType(0);
16759     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
16760     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
16761     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
16762   }
16763
16764   return SDValue();
16765 }
16766
16767 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
16768   SDValue VecOp = N->getOperand(0);
16769   SDValue Index = N->getOperand(1);
16770   EVT ScalarVT = N->getValueType(0);
16771   EVT VecVT = VecOp.getValueType();
16772   if (VecOp.isUndef())
16773     return DAG.getUNDEF(ScalarVT);
16774
16775   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16776   //
16777   // This only really matters if the index is non-constant since other combines
16778   // on the constant elements already work.
16779   SDLoc DL(N);
16780   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
16781       Index == VecOp.getOperand(2)) {
16782     SDValue Elt = VecOp.getOperand(1);
16783     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
16784   }
16785
16786   // (vextract (scalar_to_vector val, 0) -> val
16787   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16788     // Check if the result type doesn't match the inserted element type. A
16789     // SCALAR_TO_VECTOR may truncate the inserted element and the
16790     // EXTRACT_VECTOR_ELT may widen the extracted vector.
16791     SDValue InOp = VecOp.getOperand(0);
16792     if (InOp.getValueType() != ScalarVT) {
16793       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16794       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16795     }
16796     return InOp;
16797   }
16798
16799   // extract_vector_elt of out-of-bounds element -> UNDEF
16800   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16801   unsigned NumElts = VecVT.getVectorNumElements();
16802   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
16803     return DAG.getUNDEF(ScalarVT);
16804
16805   // extract_vector_elt (build_vector x, y), 1 -> y
16806   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
16807       TLI.isTypeLegal(VecVT) &&
16808       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
16809     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
16810     EVT InEltVT = Elt.getValueType();
16811
16812     // Sometimes build_vector's scalar input types do not match result type.
16813     if (ScalarVT == InEltVT)
16814       return Elt;
16815
16816     // TODO: It may be useful to truncate if free if the build_vector implicitly
16817     // converts.
16818   }
16819
16820   // TODO: These transforms should not require the 'hasOneUse' restriction, but
16821   // there are regressions on multiple targets without it. We can end up with a
16822   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16823   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
16824       VecOp.hasOneUse()) {
16825     // The vector index of the LSBs of the source depend on the endian-ness.
16826     bool IsLE = DAG.getDataLayout().isLittleEndian();
16827     unsigned ExtractIndex = IndexC->getZExtValue();
16828     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16829     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
16830     SDValue BCSrc = VecOp.getOperand(0);
16831     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
16832       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
16833
16834     if (LegalTypes && BCSrc.getValueType().isInteger() &&
16835         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16836       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16837       // trunc i64 X to i32
16838       SDValue X = BCSrc.getOperand(0);
16839       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
16840              "Extract element and scalar to vector can't change element type "
16841              "from FP to integer.");
16842       unsigned XBitWidth = X.getValueSizeInBits();
16843       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
16844       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
16845
16846       // An extract element return value type can be wider than its vector
16847       // operand element type. In that case, the high bits are undefined, so
16848       // it's possible that we may need to extend rather than truncate.
16849       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
16850         assert(XBitWidth % VecEltBitWidth == 0 &&
16851                "Scalar bitwidth must be a multiple of vector element bitwidth");
16852         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16853       }
16854     }
16855   }
16856
16857   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16858     return BO;
16859
16860   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16861   // We only perform this optimization before the op legalization phase because
16862   // we may introduce new vector instructions which are not backed by TD
16863   // patterns. For example on AVX, extracting elements from a wide vector
16864   // without using extract_subvector. However, if we can find an underlying
16865   // scalar value, then we can always use that.
16866   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16867     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16868     // Find the new index to extract from.
16869     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16870
16871     // Extracting an undef index is undef.
16872     if (OrigElt == -1)
16873       return DAG.getUNDEF(ScalarVT);
16874
16875     // Select the right vector half to extract from.
16876     SDValue SVInVec;
16877     if (OrigElt < (int)NumElts) {
16878       SVInVec = VecOp.getOperand(0);
16879     } else {
16880       SVInVec = VecOp.getOperand(1);
16881       OrigElt -= NumElts;
16882     }
16883
16884     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16885       SDValue InOp = SVInVec.getOperand(OrigElt);
16886       if (InOp.getValueType() != ScalarVT) {
16887         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16888         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16889       }
16890
16891       return InOp;
16892     }
16893
16894     // FIXME: We should handle recursing on other vector shuffles and
16895     // scalar_to_vector here as well.
16896
16897     if (!LegalOperations ||
16898         // FIXME: Should really be just isOperationLegalOrCustom.
16899         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
16900         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
16901       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
16902       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
16903                          DAG.getConstant(OrigElt, DL, IndexTy));
16904     }
16905   }
16906
16907   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
16908   // simplify it based on the (valid) extraction indices.
16909   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
16910         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16911                Use->getOperand(0) == VecOp &&
16912                isa<ConstantSDNode>(Use->getOperand(1));
16913       })) {
16914     APInt DemandedElts = APInt::getNullValue(NumElts);
16915     for (SDNode *Use : VecOp->uses()) {
16916       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
16917       if (CstElt->getAPIntValue().ult(NumElts))
16918         DemandedElts.setBit(CstElt->getZExtValue());
16919     }
16920     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
16921       // We simplified the vector operand of this extract element. If this
16922       // extract is not dead, visit it again so it is folded properly.
16923       if (N->getOpcode() != ISD::DELETED_NODE)
16924         AddToWorklist(N);
16925       return SDValue(N, 0);
16926     }
16927   }
16928
16929   // Everything under here is trying to match an extract of a loaded value.
16930   // If the result of load has to be truncated, then it's not necessarily
16931   // profitable.
16932   bool BCNumEltsChanged = false;
16933   EVT ExtVT = VecVT.getVectorElementType();
16934   EVT LVT = ExtVT;
16935   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
16936     return SDValue();
16937
16938   if (VecOp.getOpcode() == ISD::BITCAST) {
16939     // Don't duplicate a load with other uses.
16940     if (!VecOp.hasOneUse())
16941       return SDValue();
16942
16943     EVT BCVT = VecOp.getOperand(0).getValueType();
16944     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
16945       return SDValue();
16946     if (NumElts != BCVT.getVectorNumElements())
16947       BCNumEltsChanged = true;
16948     VecOp = VecOp.getOperand(0);
16949     ExtVT = BCVT.getVectorElementType();
16950   }
16951
16952   // extract (vector load $addr), i --> load $addr + i * size
16953   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
16954       ISD::isNormalLoad(VecOp.getNode()) &&
16955       !Index->hasPredecessor(VecOp.getNode())) {
16956     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
16957     if (VecLoad && VecLoad->isSimple())
16958       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
16959   }
16960
16961   // Perform only after legalization to ensure build_vector / vector_shuffle
16962   // optimizations have already been done.
16963   if (!LegalOperations || !IndexC)
16964     return SDValue();
16965
16966   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
16967   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
16968   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
16969   int Elt = IndexC->getZExtValue();
16970   LoadSDNode *LN0 = nullptr;
16971   if (ISD::isNormalLoad(VecOp.getNode())) {
16972     LN0 = cast<LoadSDNode>(VecOp);
16973   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
16974              VecOp.getOperand(0).getValueType() == ExtVT &&
16975              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
16976     // Don't duplicate a load with other uses.
16977     if (!VecOp.hasOneUse())
16978       return SDValue();
16979
16980     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
16981   }
16982   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
16983     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
16984     // =>
16985     // (load $addr+1*size)
16986
16987     // Don't duplicate a load with other uses.
16988     if (!VecOp.hasOneUse())
16989       return SDValue();
16990
16991     // If the bit convert changed the number of elements, it is unsafe
16992     // to examine the mask.
16993     if (BCNumEltsChanged)
16994       return SDValue();
16995
16996     // Select the input vector, guarding against out of range extract vector.
16997     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
16998     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
16999
17000     if (VecOp.getOpcode() == ISD::BITCAST) {
17001       // Don't duplicate a load with other uses.
17002       if (!VecOp.hasOneUse())
17003         return SDValue();
17004
17005       VecOp = VecOp.getOperand(0);
17006     }
17007     if (ISD::isNormalLoad(VecOp.getNode())) {
17008       LN0 = cast<LoadSDNode>(VecOp);
17009       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17010       Index = DAG.getConstant(Elt, DL, Index.getValueType());
17011     }
17012   }
17013
17014   // Make sure we found a non-volatile load and the extractelement is
17015   // the only use.
17016   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
17017     return SDValue();
17018
17019   // If Idx was -1 above, Elt is going to be -1, so just return undef.
17020   if (Elt == -1)
17021     return DAG.getUNDEF(LVT);
17022
17023   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17024 }
17025
17026 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
17027 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17028   // We perform this optimization post type-legalization because
17029   // the type-legalizer often scalarizes integer-promoted vectors.
17030   // Performing this optimization before may create bit-casts which
17031   // will be type-legalized to complex code sequences.
17032   // We perform this optimization only before the operation legalizer because we
17033   // may introduce illegal operations.
17034   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
17035     return SDValue();
17036
17037   unsigned NumInScalars = N->getNumOperands();
17038   SDLoc DL(N);
17039   EVT VT = N->getValueType(0);
17040
17041   // Check to see if this is a BUILD_VECTOR of a bunch of values
17042   // which come from any_extend or zero_extend nodes. If so, we can create
17043   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
17044   // optimizations. We do not handle sign-extend because we can't fill the sign
17045   // using shuffles.
17046   EVT SourceType = MVT::Other;
17047   bool AllAnyExt = true;
17048
17049   for (unsigned i = 0; i != NumInScalars; ++i) {
17050     SDValue In = N->getOperand(i);
17051     // Ignore undef inputs.
17052     if (In.isUndef()) continue;
17053
17054     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
17055     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
17056
17057     // Abort if the element is not an extension.
17058     if (!ZeroExt && !AnyExt) {
17059       SourceType = MVT::Other;
17060       break;
17061     }
17062
17063     // The input is a ZeroExt or AnyExt. Check the original type.
17064     EVT InTy = In.getOperand(0).getValueType();
17065
17066     // Check that all of the widened source types are the same.
17067     if (SourceType == MVT::Other)
17068       // First time.
17069       SourceType = InTy;
17070     else if (InTy != SourceType) {
17071       // Multiple income types. Abort.
17072       SourceType = MVT::Other;
17073       break;
17074     }
17075
17076     // Check if all of the extends are ANY_EXTENDs.
17077     AllAnyExt &= AnyExt;
17078   }
17079
17080   // In order to have valid types, all of the inputs must be extended from the
17081   // same source type and all of the inputs must be any or zero extend.
17082   // Scalar sizes must be a power of two.
17083   EVT OutScalarTy = VT.getScalarType();
17084   bool ValidTypes = SourceType != MVT::Other &&
17085                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
17086                  isPowerOf2_32(SourceType.getSizeInBits());
17087
17088   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
17089   // turn into a single shuffle instruction.
17090   if (!ValidTypes)
17091     return SDValue();
17092
17093   bool isLE = DAG.getDataLayout().isLittleEndian();
17094   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
17095   assert(ElemRatio > 1 && "Invalid element size ratio");
17096   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
17097                                DAG.getConstant(0, DL, SourceType);
17098
17099   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
17100   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
17101
17102   // Populate the new build_vector
17103   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17104     SDValue Cast = N->getOperand(i);
17105     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
17106             Cast.getOpcode() == ISD::ZERO_EXTEND ||
17107             Cast.isUndef()) && "Invalid cast opcode");
17108     SDValue In;
17109     if (Cast.isUndef())
17110       In = DAG.getUNDEF(SourceType);
17111     else
17112       In = Cast->getOperand(0);
17113     unsigned Index = isLE ? (i * ElemRatio) :
17114                             (i * ElemRatio + (ElemRatio - 1));
17115
17116     assert(Index < Ops.size() && "Invalid index");
17117     Ops[Index] = In;
17118   }
17119
17120   // The type of the new BUILD_VECTOR node.
17121   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
17122   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
17123          "Invalid vector size");
17124   // Check if the new vector type is legal.
17125   if (!isTypeLegal(VecVT) ||
17126       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
17127        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
17128     return SDValue();
17129
17130   // Make the new BUILD_VECTOR.
17131   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
17132
17133   // The new BUILD_VECTOR node has the potential to be further optimized.
17134   AddToWorklist(BV.getNode());
17135   // Bitcast to the desired type.
17136   return DAG.getBitcast(VT, BV);
17137 }
17138
17139 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
17140                                            ArrayRef<int> VectorMask,
17141                                            SDValue VecIn1, SDValue VecIn2,
17142                                            unsigned LeftIdx, bool DidSplitVec) {
17143   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17144   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
17145
17146   EVT VT = N->getValueType(0);
17147   EVT InVT1 = VecIn1.getValueType();
17148   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
17149
17150   unsigned NumElems = VT.getVectorNumElements();
17151   unsigned ShuffleNumElems = NumElems;
17152
17153   // If we artificially split a vector in two already, then the offsets in the
17154   // operands will all be based off of VecIn1, even those in VecIn2.
17155   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
17156
17157   // We can't generate a shuffle node with mismatched input and output types.
17158   // Try to make the types match the type of the output.
17159   if (InVT1 != VT || InVT2 != VT) {
17160     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
17161       // If the output vector length is a multiple of both input lengths,
17162       // we can concatenate them and pad the rest with undefs.
17163       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
17164       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
17165       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
17166       ConcatOps[0] = VecIn1;
17167       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
17168       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17169       VecIn2 = SDValue();
17170     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
17171       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
17172         return SDValue();
17173
17174       if (!VecIn2.getNode()) {
17175         // If we only have one input vector, and it's twice the size of the
17176         // output, split it in two.
17177         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
17178                              DAG.getConstant(NumElems, DL, IdxTy));
17179         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
17180         // Since we now have shorter input vectors, adjust the offset of the
17181         // second vector's start.
17182         Vec2Offset = NumElems;
17183       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
17184         // VecIn1 is wider than the output, and we have another, possibly
17185         // smaller input. Pad the smaller input with undefs, shuffle at the
17186         // input vector width, and extract the output.
17187         // The shuffle type is different than VT, so check legality again.
17188         if (LegalOperations &&
17189             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
17190           return SDValue();
17191
17192         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
17193         // lower it back into a BUILD_VECTOR. So if the inserted type is
17194         // illegal, don't even try.
17195         if (InVT1 != InVT2) {
17196           if (!TLI.isTypeLegal(InVT2))
17197             return SDValue();
17198           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
17199                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
17200         }
17201         ShuffleNumElems = NumElems * 2;
17202       } else {
17203         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
17204         // than VecIn1. We can't handle this for now - this case will disappear
17205         // when we start sorting the vectors by type.
17206         return SDValue();
17207       }
17208     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
17209                InVT1.getSizeInBits() == VT.getSizeInBits()) {
17210       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
17211       ConcatOps[0] = VecIn2;
17212       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17213     } else {
17214       // TODO: Support cases where the length mismatch isn't exactly by a
17215       // factor of 2.
17216       // TODO: Move this check upwards, so that if we have bad type
17217       // mismatches, we don't create any DAG nodes.
17218       return SDValue();
17219     }
17220   }
17221
17222   // Initialize mask to undef.
17223   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
17224
17225   // Only need to run up to the number of elements actually used, not the
17226   // total number of elements in the shuffle - if we are shuffling a wider
17227   // vector, the high lanes should be set to undef.
17228   for (unsigned i = 0; i != NumElems; ++i) {
17229     if (VectorMask[i] <= 0)
17230       continue;
17231
17232     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
17233     if (VectorMask[i] == (int)LeftIdx) {
17234       Mask[i] = ExtIndex;
17235     } else if (VectorMask[i] == (int)LeftIdx + 1) {
17236       Mask[i] = Vec2Offset + ExtIndex;
17237     }
17238   }
17239
17240   // The type the input vectors may have changed above.
17241   InVT1 = VecIn1.getValueType();
17242
17243   // If we already have a VecIn2, it should have the same type as VecIn1.
17244   // If we don't, get an undef/zero vector of the appropriate type.
17245   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
17246   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
17247
17248   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
17249   if (ShuffleNumElems > NumElems)
17250     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
17251
17252   return Shuffle;
17253 }
17254
17255 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
17256   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
17257
17258   // First, determine where the build vector is not undef.
17259   // TODO: We could extend this to handle zero elements as well as undefs.
17260   int NumBVOps = BV->getNumOperands();
17261   int ZextElt = -1;
17262   for (int i = 0; i != NumBVOps; ++i) {
17263     SDValue Op = BV->getOperand(i);
17264     if (Op.isUndef())
17265       continue;
17266     if (ZextElt == -1)
17267       ZextElt = i;
17268     else
17269       return SDValue();
17270   }
17271   // Bail out if there's no non-undef element.
17272   if (ZextElt == -1)
17273     return SDValue();
17274
17275   // The build vector contains some number of undef elements and exactly
17276   // one other element. That other element must be a zero-extended scalar
17277   // extracted from a vector at a constant index to turn this into a shuffle.
17278   // Also, require that the build vector does not implicitly truncate/extend
17279   // its elements.
17280   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
17281   EVT VT = BV->getValueType(0);
17282   SDValue Zext = BV->getOperand(ZextElt);
17283   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
17284       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17285       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
17286       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
17287     return SDValue();
17288
17289   // The zero-extend must be a multiple of the source size, and we must be
17290   // building a vector of the same size as the source of the extract element.
17291   SDValue Extract = Zext.getOperand(0);
17292   unsigned DestSize = Zext.getValueSizeInBits();
17293   unsigned SrcSize = Extract.getValueSizeInBits();
17294   if (DestSize % SrcSize != 0 ||
17295       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
17296     return SDValue();
17297
17298   // Create a shuffle mask that will combine the extracted element with zeros
17299   // and undefs.
17300   int ZextRatio = DestSize / SrcSize;
17301   int NumMaskElts = NumBVOps * ZextRatio;
17302   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
17303   for (int i = 0; i != NumMaskElts; ++i) {
17304     if (i / ZextRatio == ZextElt) {
17305       // The low bits of the (potentially translated) extracted element map to
17306       // the source vector. The high bits map to zero. We will use a zero vector
17307       // as the 2nd source operand of the shuffle, so use the 1st element of
17308       // that vector (mask value is number-of-elements) for the high bits.
17309       if (i % ZextRatio == 0)
17310         ShufMask[i] = Extract.getConstantOperandVal(1);
17311       else
17312         ShufMask[i] = NumMaskElts;
17313     }
17314
17315     // Undef elements of the build vector remain undef because we initialize
17316     // the shuffle mask with -1.
17317   }
17318
17319   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
17320   // bitcast (shuffle V, ZeroVec, VectorMask)
17321   SDLoc DL(BV);
17322   EVT VecVT = Extract.getOperand(0).getValueType();
17323   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
17324   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17325   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
17326                                              ZeroVec, ShufMask, DAG);
17327   if (!Shuf)
17328     return SDValue();
17329   return DAG.getBitcast(VT, Shuf);
17330 }
17331
17332 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
17333 // operations. If the types of the vectors we're extracting from allow it,
17334 // turn this into a vector_shuffle node.
17335 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
17336   SDLoc DL(N);
17337   EVT VT = N->getValueType(0);
17338
17339   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
17340   if (!isTypeLegal(VT))
17341     return SDValue();
17342
17343   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
17344     return V;
17345
17346   // May only combine to shuffle after legalize if shuffle is legal.
17347   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
17348     return SDValue();
17349
17350   bool UsesZeroVector = false;
17351   unsigned NumElems = N->getNumOperands();
17352
17353   // Record, for each element of the newly built vector, which input vector
17354   // that element comes from. -1 stands for undef, 0 for the zero vector,
17355   // and positive values for the input vectors.
17356   // VectorMask maps each element to its vector number, and VecIn maps vector
17357   // numbers to their initial SDValues.
17358
17359   SmallVector<int, 8> VectorMask(NumElems, -1);
17360   SmallVector<SDValue, 8> VecIn;
17361   VecIn.push_back(SDValue());
17362
17363   for (unsigned i = 0; i != NumElems; ++i) {
17364     SDValue Op = N->getOperand(i);
17365
17366     if (Op.isUndef())
17367       continue;
17368
17369     // See if we can use a blend with a zero vector.
17370     // TODO: Should we generalize this to a blend with an arbitrary constant
17371     // vector?
17372     if (isNullConstant(Op) || isNullFPConstant(Op)) {
17373       UsesZeroVector = true;
17374       VectorMask[i] = 0;
17375       continue;
17376     }
17377
17378     // Not an undef or zero. If the input is something other than an
17379     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
17380     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17381         !isa<ConstantSDNode>(Op.getOperand(1)))
17382       return SDValue();
17383     SDValue ExtractedFromVec = Op.getOperand(0);
17384
17385     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
17386     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
17387       return SDValue();
17388
17389     // All inputs must have the same element type as the output.
17390     if (VT.getVectorElementType() !=
17391         ExtractedFromVec.getValueType().getVectorElementType())
17392       return SDValue();
17393
17394     // Have we seen this input vector before?
17395     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
17396     // a map back from SDValues to numbers isn't worth it.
17397     unsigned Idx = std::distance(
17398         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
17399     if (Idx == VecIn.size())
17400       VecIn.push_back(ExtractedFromVec);
17401
17402     VectorMask[i] = Idx;
17403   }
17404
17405   // If we didn't find at least one input vector, bail out.
17406   if (VecIn.size() < 2)
17407     return SDValue();
17408
17409   // If all the Operands of BUILD_VECTOR extract from same
17410   // vector, then split the vector efficiently based on the maximum
17411   // vector access index and adjust the VectorMask and
17412   // VecIn accordingly.
17413   bool DidSplitVec = false;
17414   if (VecIn.size() == 2) {
17415     unsigned MaxIndex = 0;
17416     unsigned NearestPow2 = 0;
17417     SDValue Vec = VecIn.back();
17418     EVT InVT = Vec.getValueType();
17419     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17420     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
17421
17422     for (unsigned i = 0; i < NumElems; i++) {
17423       if (VectorMask[i] <= 0)
17424         continue;
17425       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
17426       IndexVec[i] = Index;
17427       MaxIndex = std::max(MaxIndex, Index);
17428     }
17429
17430     NearestPow2 = PowerOf2Ceil(MaxIndex);
17431     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
17432         NumElems * 2 < NearestPow2) {
17433       unsigned SplitSize = NearestPow2 / 2;
17434       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
17435                                      InVT.getVectorElementType(), SplitSize);
17436       if (TLI.isTypeLegal(SplitVT)) {
17437         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17438                                      DAG.getConstant(SplitSize, DL, IdxTy));
17439         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17440                                      DAG.getConstant(0, DL, IdxTy));
17441         VecIn.pop_back();
17442         VecIn.push_back(VecIn1);
17443         VecIn.push_back(VecIn2);
17444         DidSplitVec = true;
17445
17446         for (unsigned i = 0; i < NumElems; i++) {
17447           if (VectorMask[i] <= 0)
17448             continue;
17449           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
17450         }
17451       }
17452     }
17453   }
17454
17455   // TODO: We want to sort the vectors by descending length, so that adjacent
17456   // pairs have similar length, and the longer vector is always first in the
17457   // pair.
17458
17459   // TODO: Should this fire if some of the input vectors has illegal type (like
17460   // it does now), or should we let legalization run its course first?
17461
17462   // Shuffle phase:
17463   // Take pairs of vectors, and shuffle them so that the result has elements
17464   // from these vectors in the correct places.
17465   // For example, given:
17466   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
17467   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
17468   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
17469   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
17470   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
17471   // We will generate:
17472   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
17473   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
17474   SmallVector<SDValue, 4> Shuffles;
17475   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
17476     unsigned LeftIdx = 2 * In + 1;
17477     SDValue VecLeft = VecIn[LeftIdx];
17478     SDValue VecRight =
17479         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
17480
17481     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
17482                                                 VecRight, LeftIdx, DidSplitVec))
17483       Shuffles.push_back(Shuffle);
17484     else
17485       return SDValue();
17486   }
17487
17488   // If we need the zero vector as an "ingredient" in the blend tree, add it
17489   // to the list of shuffles.
17490   if (UsesZeroVector)
17491     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
17492                                       : DAG.getConstantFP(0.0, DL, VT));
17493
17494   // If we only have one shuffle, we're done.
17495   if (Shuffles.size() == 1)
17496     return Shuffles[0];
17497
17498   // Update the vector mask to point to the post-shuffle vectors.
17499   for (int &Vec : VectorMask)
17500     if (Vec == 0)
17501       Vec = Shuffles.size() - 1;
17502     else
17503       Vec = (Vec - 1) / 2;
17504
17505   // More than one shuffle. Generate a binary tree of blends, e.g. if from
17506   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
17507   // generate:
17508   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
17509   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
17510   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
17511   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
17512   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
17513   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
17514   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
17515
17516   // Make sure the initial size of the shuffle list is even.
17517   if (Shuffles.size() % 2)
17518     Shuffles.push_back(DAG.getUNDEF(VT));
17519
17520   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
17521     if (CurSize % 2) {
17522       Shuffles[CurSize] = DAG.getUNDEF(VT);
17523       CurSize++;
17524     }
17525     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
17526       int Left = 2 * In;
17527       int Right = 2 * In + 1;
17528       SmallVector<int, 8> Mask(NumElems, -1);
17529       for (unsigned i = 0; i != NumElems; ++i) {
17530         if (VectorMask[i] == Left) {
17531           Mask[i] = i;
17532           VectorMask[i] = In;
17533         } else if (VectorMask[i] == Right) {
17534           Mask[i] = i + NumElems;
17535           VectorMask[i] = In;
17536         }
17537       }
17538
17539       Shuffles[In] =
17540           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
17541     }
17542   }
17543   return Shuffles[0];
17544 }
17545
17546 // Try to turn a build vector of zero extends of extract vector elts into a
17547 // a vector zero extend and possibly an extract subvector.
17548 // TODO: Support sign extend?
17549 // TODO: Allow undef elements?
17550 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
17551   if (LegalOperations)
17552     return SDValue();
17553
17554   EVT VT = N->getValueType(0);
17555
17556   bool FoundZeroExtend = false;
17557   SDValue Op0 = N->getOperand(0);
17558   auto checkElem = [&](SDValue Op) -> int64_t {
17559     unsigned Opc = Op.getOpcode();
17560     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
17561     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
17562         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17563         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
17564       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17565         return C->getZExtValue();
17566     return -1;
17567   };
17568
17569   // Make sure the first element matches
17570   // (zext (extract_vector_elt X, C))
17571   int64_t Offset = checkElem(Op0);
17572   if (Offset < 0)
17573     return SDValue();
17574
17575   unsigned NumElems = N->getNumOperands();
17576   SDValue In = Op0.getOperand(0).getOperand(0);
17577   EVT InSVT = In.getValueType().getScalarType();
17578   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
17579
17580   // Don't create an illegal input type after type legalization.
17581   if (LegalTypes && !TLI.isTypeLegal(InVT))
17582     return SDValue();
17583
17584   // Ensure all the elements come from the same vector and are adjacent.
17585   for (unsigned i = 1; i != NumElems; ++i) {
17586     if ((Offset + i) != checkElem(N->getOperand(i)))
17587       return SDValue();
17588   }
17589
17590   SDLoc DL(N);
17591   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
17592                    Op0.getOperand(0).getOperand(1));
17593   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
17594                      VT, In);
17595 }
17596
17597 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
17598   EVT VT = N->getValueType(0);
17599
17600   // A vector built entirely of undefs is undef.
17601   if (ISD::allOperandsUndef(N))
17602     return DAG.getUNDEF(VT);
17603
17604   // If this is a splat of a bitcast from another vector, change to a
17605   // concat_vector.
17606   // For example:
17607   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17608   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17609   //
17610   // If X is a build_vector itself, the concat can become a larger build_vector.
17611   // TODO: Maybe this is useful for non-splat too?
17612   if (!LegalOperations) {
17613     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17614       Splat = peekThroughBitcasts(Splat);
17615       EVT SrcVT = Splat.getValueType();
17616       if (SrcVT.isVector()) {
17617         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
17618         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
17619                                      SrcVT.getVectorElementType(), NumElts);
17620         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
17621           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
17622           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
17623                                        NewVT, Ops);
17624           return DAG.getBitcast(VT, Concat);
17625         }
17626       }
17627     }
17628   }
17629
17630   // A splat of a single element is a SPLAT_VECTOR if supported on the target.
17631   if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
17632     if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17633       assert(!V.isUndef() && "Splat of undef should have been handled earlier");
17634       return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
17635     }
17636
17637   // Check if we can express BUILD VECTOR via subvector extract.
17638   if (!LegalTypes && (N->getNumOperands() > 1)) {
17639     SDValue Op0 = N->getOperand(0);
17640     auto checkElem = [&](SDValue Op) -> uint64_t {
17641       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
17642           (Op0.getOperand(0) == Op.getOperand(0)))
17643         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
17644           return CNode->getZExtValue();
17645       return -1;
17646     };
17647
17648     int Offset = checkElem(Op0);
17649     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
17650       if (Offset + i != checkElem(N->getOperand(i))) {
17651         Offset = -1;
17652         break;
17653       }
17654     }
17655
17656     if ((Offset == 0) &&
17657         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
17658       return Op0.getOperand(0);
17659     if ((Offset != -1) &&
17660         ((Offset % N->getValueType(0).getVectorNumElements()) ==
17661          0)) // IDX must be multiple of output size.
17662       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
17663                          Op0.getOperand(0), Op0.getOperand(1));
17664   }
17665
17666   if (SDValue V = convertBuildVecZextToZext(N))
17667     return V;
17668
17669   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
17670     return V;
17671
17672   if (SDValue V = reduceBuildVecToShuffle(N))
17673     return V;
17674
17675   return SDValue();
17676 }
17677
17678 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
17679   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17680   EVT OpVT = N->getOperand(0).getValueType();
17681
17682   // If the operands are legal vectors, leave them alone.
17683   if (TLI.isTypeLegal(OpVT))
17684     return SDValue();
17685
17686   SDLoc DL(N);
17687   EVT VT = N->getValueType(0);
17688   SmallVector<SDValue, 8> Ops;
17689
17690   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
17691   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17692
17693   // Keep track of what we encounter.
17694   bool AnyInteger = false;
17695   bool AnyFP = false;
17696   for (const SDValue &Op : N->ops()) {
17697     if (ISD::BITCAST == Op.getOpcode() &&
17698         !Op.getOperand(0).getValueType().isVector())
17699       Ops.push_back(Op.getOperand(0));
17700     else if (ISD::UNDEF == Op.getOpcode())
17701       Ops.push_back(ScalarUndef);
17702     else
17703       return SDValue();
17704
17705     // Note whether we encounter an integer or floating point scalar.
17706     // If it's neither, bail out, it could be something weird like x86mmx.
17707     EVT LastOpVT = Ops.back().getValueType();
17708     if (LastOpVT.isFloatingPoint())
17709       AnyFP = true;
17710     else if (LastOpVT.isInteger())
17711       AnyInteger = true;
17712     else
17713       return SDValue();
17714   }
17715
17716   // If any of the operands is a floating point scalar bitcast to a vector,
17717   // use floating point types throughout, and bitcast everything.
17718   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17719   if (AnyFP) {
17720     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
17721     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17722     if (AnyInteger) {
17723       for (SDValue &Op : Ops) {
17724         if (Op.getValueType() == SVT)
17725           continue;
17726         if (Op.isUndef())
17727           Op = ScalarUndef;
17728         else
17729           Op = DAG.getBitcast(SVT, Op);
17730       }
17731     }
17732   }
17733
17734   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
17735                                VT.getSizeInBits() / SVT.getSizeInBits());
17736   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
17737 }
17738
17739 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17740 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17741 // most two distinct vectors the same size as the result, attempt to turn this
17742 // into a legal shuffle.
17743 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
17744   EVT VT = N->getValueType(0);
17745   EVT OpVT = N->getOperand(0).getValueType();
17746   int NumElts = VT.getVectorNumElements();
17747   int NumOpElts = OpVT.getVectorNumElements();
17748
17749   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
17750   SmallVector<int, 8> Mask;
17751
17752   for (SDValue Op : N->ops()) {
17753     Op = peekThroughBitcasts(Op);
17754
17755     // UNDEF nodes convert to UNDEF shuffle mask values.
17756     if (Op.isUndef()) {
17757       Mask.append((unsigned)NumOpElts, -1);
17758       continue;
17759     }
17760
17761     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17762       return SDValue();
17763
17764     // What vector are we extracting the subvector from and at what index?
17765     SDValue ExtVec = Op.getOperand(0);
17766
17767     // We want the EVT of the original extraction to correctly scale the
17768     // extraction index.
17769     EVT ExtVT = ExtVec.getValueType();
17770     ExtVec = peekThroughBitcasts(ExtVec);
17771
17772     // UNDEF nodes convert to UNDEF shuffle mask values.
17773     if (ExtVec.isUndef()) {
17774       Mask.append((unsigned)NumOpElts, -1);
17775       continue;
17776     }
17777
17778     if (!isa<ConstantSDNode>(Op.getOperand(1)))
17779       return SDValue();
17780     int ExtIdx = Op.getConstantOperandVal(1);
17781
17782     // Ensure that we are extracting a subvector from a vector the same
17783     // size as the result.
17784     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
17785       return SDValue();
17786
17787     // Scale the subvector index to account for any bitcast.
17788     int NumExtElts = ExtVT.getVectorNumElements();
17789     if (0 == (NumExtElts % NumElts))
17790       ExtIdx /= (NumExtElts / NumElts);
17791     else if (0 == (NumElts % NumExtElts))
17792       ExtIdx *= (NumElts / NumExtElts);
17793     else
17794       return SDValue();
17795
17796     // At most we can reference 2 inputs in the final shuffle.
17797     if (SV0.isUndef() || SV0 == ExtVec) {
17798       SV0 = ExtVec;
17799       for (int i = 0; i != NumOpElts; ++i)
17800         Mask.push_back(i + ExtIdx);
17801     } else if (SV1.isUndef() || SV1 == ExtVec) {
17802       SV1 = ExtVec;
17803       for (int i = 0; i != NumOpElts; ++i)
17804         Mask.push_back(i + ExtIdx + NumElts);
17805     } else {
17806       return SDValue();
17807     }
17808   }
17809
17810   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17811   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
17812                                      DAG.getBitcast(VT, SV1), Mask, DAG);
17813 }
17814
17815 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
17816   // If we only have one input vector, we don't need to do any concatenation.
17817   if (N->getNumOperands() == 1)
17818     return N->getOperand(0);
17819
17820   // Check if all of the operands are undefs.
17821   EVT VT = N->getValueType(0);
17822   if (ISD::allOperandsUndef(N))
17823     return DAG.getUNDEF(VT);
17824
17825   // Optimize concat_vectors where all but the first of the vectors are undef.
17826   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
17827         return Op.isUndef();
17828       })) {
17829     SDValue In = N->getOperand(0);
17830     assert(In.getValueType().isVector() && "Must concat vectors");
17831
17832     // If the input is a concat_vectors, just make a larger concat by padding
17833     // with smaller undefs.
17834     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
17835       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
17836       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
17837       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
17838       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17839     }
17840
17841     SDValue Scalar = peekThroughOneUseBitcasts(In);
17842
17843     // concat_vectors(scalar_to_vector(scalar), undef) ->
17844     //     scalar_to_vector(scalar)
17845     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17846          Scalar.hasOneUse()) {
17847       EVT SVT = Scalar.getValueType().getVectorElementType();
17848       if (SVT == Scalar.getOperand(0).getValueType())
17849         Scalar = Scalar.getOperand(0);
17850     }
17851
17852     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17853     if (!Scalar.getValueType().isVector()) {
17854       // If the bitcast type isn't legal, it might be a trunc of a legal type;
17855       // look through the trunc so we can still do the transform:
17856       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17857       if (Scalar->getOpcode() == ISD::TRUNCATE &&
17858           !TLI.isTypeLegal(Scalar.getValueType()) &&
17859           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
17860         Scalar = Scalar->getOperand(0);
17861
17862       EVT SclTy = Scalar.getValueType();
17863
17864       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17865         return SDValue();
17866
17867       // Bail out if the vector size is not a multiple of the scalar size.
17868       if (VT.getSizeInBits() % SclTy.getSizeInBits())
17869         return SDValue();
17870
17871       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17872       if (VNTNumElms < 2)
17873         return SDValue();
17874
17875       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17876       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17877         return SDValue();
17878
17879       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17880       return DAG.getBitcast(VT, Res);
17881     }
17882   }
17883
17884   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17885   // We have already tested above for an UNDEF only concatenation.
17886   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17887   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17888   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17889     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17890   };
17891   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17892     SmallVector<SDValue, 8> Opnds;
17893     EVT SVT = VT.getScalarType();
17894
17895     EVT MinVT = SVT;
17896     if (!SVT.isFloatingPoint()) {
17897       // If BUILD_VECTOR are from built from integer, they may have different
17898       // operand types. Get the smallest type and truncate all operands to it.
17899       bool FoundMinVT = false;
17900       for (const SDValue &Op : N->ops())
17901         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17902           EVT OpSVT = Op.getOperand(0).getValueType();
17903           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
17904           FoundMinVT = true;
17905         }
17906       assert(FoundMinVT && "Concat vector type mismatch");
17907     }
17908
17909     for (const SDValue &Op : N->ops()) {
17910       EVT OpVT = Op.getValueType();
17911       unsigned NumElts = OpVT.getVectorNumElements();
17912
17913       if (ISD::UNDEF == Op.getOpcode())
17914         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
17915
17916       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
17917         if (SVT.isFloatingPoint()) {
17918           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
17919           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
17920         } else {
17921           for (unsigned i = 0; i != NumElts; ++i)
17922             Opnds.push_back(
17923                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
17924         }
17925       }
17926     }
17927
17928     assert(VT.getVectorNumElements() == Opnds.size() &&
17929            "Concat vector type mismatch");
17930     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
17931   }
17932
17933   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
17934   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
17935     return V;
17936
17937   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
17938   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
17939     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
17940       return V;
17941
17942   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
17943   // nodes often generate nop CONCAT_VECTOR nodes.
17944   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
17945   // place the incoming vectors at the exact same location.
17946   SDValue SingleSource = SDValue();
17947   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
17948
17949   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17950     SDValue Op = N->getOperand(i);
17951
17952     if (Op.isUndef())
17953       continue;
17954
17955     // Check if this is the identity extract:
17956     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17957       return SDValue();
17958
17959     // Find the single incoming vector for the extract_subvector.
17960     if (SingleSource.getNode()) {
17961       if (Op.getOperand(0) != SingleSource)
17962         return SDValue();
17963     } else {
17964       SingleSource = Op.getOperand(0);
17965
17966       // Check the source type is the same as the type of the result.
17967       // If not, this concat may extend the vector, so we can not
17968       // optimize it away.
17969       if (SingleSource.getValueType() != N->getValueType(0))
17970         return SDValue();
17971     }
17972
17973     auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
17974     // The extract index must be constant.
17975     if (!CS)
17976       return SDValue();
17977
17978     // Check that we are reading from the identity index.
17979     unsigned IdentityIndex = i * PartNumElem;
17980     if (CS->getAPIntValue() != IdentityIndex)
17981       return SDValue();
17982   }
17983
17984   if (SingleSource.getNode())
17985     return SingleSource;
17986
17987   return SDValue();
17988 }
17989
17990 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
17991 // if the subvector can be sourced for free.
17992 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
17993   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
17994       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
17995     return V.getOperand(1);
17996   }
17997   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
17998   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
17999       V.getOperand(0).getValueType() == SubVT &&
18000       (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
18001     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
18002     return V.getOperand(SubIdx);
18003   }
18004   return SDValue();
18005 }
18006
18007 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
18008                                               SelectionDAG &DAG) {
18009   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18010   SDValue BinOp = Extract->getOperand(0);
18011   unsigned BinOpcode = BinOp.getOpcode();
18012   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
18013     return SDValue();
18014
18015   EVT VecVT = BinOp.getValueType();
18016   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
18017   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
18018     return SDValue();
18019
18020   SDValue Index = Extract->getOperand(1);
18021   EVT SubVT = Extract->getValueType(0);
18022   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
18023     return SDValue();
18024
18025   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
18026   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
18027
18028   // TODO: We could handle the case where only 1 operand is being inserted by
18029   //       creating an extract of the other operand, but that requires checking
18030   //       number of uses and/or costs.
18031   if (!Sub0 || !Sub1)
18032     return SDValue();
18033
18034   // We are inserting both operands of the wide binop only to extract back
18035   // to the narrow vector size. Eliminate all of the insert/extract:
18036   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
18037   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
18038                      BinOp->getFlags());
18039 }
18040
18041 /// If we are extracting a subvector produced by a wide binary operator try
18042 /// to use a narrow binary operator and/or avoid concatenation and extraction.
18043 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
18044   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
18045   // some of these bailouts with other transforms.
18046
18047   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
18048     return V;
18049
18050   // The extract index must be a constant, so we can map it to a concat operand.
18051   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18052   if (!ExtractIndexC)
18053     return SDValue();
18054
18055   // We are looking for an optionally bitcasted wide vector binary operator
18056   // feeding an extract subvector.
18057   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18058   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
18059   unsigned BOpcode = BinOp.getOpcode();
18060   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
18061     return SDValue();
18062
18063   // The binop must be a vector type, so we can extract some fraction of it.
18064   EVT WideBVT = BinOp.getValueType();
18065   if (!WideBVT.isVector())
18066     return SDValue();
18067
18068   EVT VT = Extract->getValueType(0);
18069   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
18070   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
18071          "Extract index is not a multiple of the vector length.");
18072
18073   // Bail out if this is not a proper multiple width extraction.
18074   unsigned WideWidth = WideBVT.getSizeInBits();
18075   unsigned NarrowWidth = VT.getSizeInBits();
18076   if (WideWidth % NarrowWidth != 0)
18077     return SDValue();
18078
18079   // Bail out if we are extracting a fraction of a single operation. This can
18080   // occur because we potentially looked through a bitcast of the binop.
18081   unsigned NarrowingRatio = WideWidth / NarrowWidth;
18082   unsigned WideNumElts = WideBVT.getVectorNumElements();
18083   if (WideNumElts % NarrowingRatio != 0)
18084     return SDValue();
18085
18086   // Bail out if the target does not support a narrower version of the binop.
18087   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
18088                                    WideNumElts / NarrowingRatio);
18089   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
18090     return SDValue();
18091
18092   // If extraction is cheap, we don't need to look at the binop operands
18093   // for concat ops. The narrow binop alone makes this transform profitable.
18094   // We can't just reuse the original extract index operand because we may have
18095   // bitcasted.
18096   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
18097   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
18098   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
18099   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
18100       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
18101     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
18102     SDLoc DL(Extract);
18103     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18104     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18105                             BinOp.getOperand(0), NewExtIndex);
18106     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18107                             BinOp.getOperand(1), NewExtIndex);
18108     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
18109                                       BinOp.getNode()->getFlags());
18110     return DAG.getBitcast(VT, NarrowBinOp);
18111   }
18112
18113   // Only handle the case where we are doubling and then halving. A larger ratio
18114   // may require more than two narrow binops to replace the wide binop.
18115   if (NarrowingRatio != 2)
18116     return SDValue();
18117
18118   // TODO: The motivating case for this transform is an x86 AVX1 target. That
18119   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
18120   // flavors, but no other 256-bit integer support. This could be extended to
18121   // handle any binop, but that may require fixing/adding other folds to avoid
18122   // codegen regressions.
18123   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
18124     return SDValue();
18125
18126   // We need at least one concatenation operation of a binop operand to make
18127   // this transform worthwhile. The concat must double the input vector sizes.
18128   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
18129     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
18130       return V.getOperand(ConcatOpNum);
18131     return SDValue();
18132   };
18133   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
18134   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
18135
18136   if (SubVecL || SubVecR) {
18137     // If a binop operand was not the result of a concat, we must extract a
18138     // half-sized operand for our new narrow binop:
18139     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
18140     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
18141     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
18142     SDLoc DL(Extract);
18143     SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18144     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
18145                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18146                                       BinOp.getOperand(0), IndexC);
18147
18148     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
18149                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18150                                       BinOp.getOperand(1), IndexC);
18151
18152     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
18153     return DAG.getBitcast(VT, NarrowBinOp);
18154   }
18155
18156   return SDValue();
18157 }
18158
18159 /// If we are extracting a subvector from a wide vector load, convert to a
18160 /// narrow load to eliminate the extraction:
18161 /// (extract_subvector (load wide vector)) --> (load narrow vector)
18162 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
18163   // TODO: Add support for big-endian. The offset calculation must be adjusted.
18164   if (DAG.getDataLayout().isBigEndian())
18165     return SDValue();
18166
18167   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
18168   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18169   if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
18170       !ExtIdx)
18171     return SDValue();
18172
18173   // Allow targets to opt-out.
18174   EVT VT = Extract->getValueType(0);
18175   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18176   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
18177     return SDValue();
18178
18179   // The narrow load will be offset from the base address of the old load if
18180   // we are extracting from something besides index 0 (little-endian).
18181   SDLoc DL(Extract);
18182   SDValue BaseAddr = Ld->getOperand(1);
18183   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
18184
18185   // TODO: Use "BaseIndexOffset" to make this more effective.
18186   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
18187   MachineFunction &MF = DAG.getMachineFunction();
18188   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
18189                                                    VT.getStoreSize());
18190   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
18191   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
18192   return NewLd;
18193 }
18194
18195 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
18196   EVT NVT = N->getValueType(0);
18197   SDValue V = N->getOperand(0);
18198
18199   // Extract from UNDEF is UNDEF.
18200   if (V.isUndef())
18201     return DAG.getUNDEF(NVT);
18202
18203   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
18204     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
18205       return NarrowLoad;
18206
18207   // Combine an extract of an extract into a single extract_subvector.
18208   // ext (ext X, C), 0 --> ext X, C
18209   SDValue Index = N->getOperand(1);
18210   if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18211       V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
18212     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
18213                                     V.getConstantOperandVal(1)) &&
18214         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
18215       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
18216                          V.getOperand(1));
18217     }
18218   }
18219
18220   // Try to move vector bitcast after extract_subv by scaling extraction index:
18221   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
18222   if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
18223       V.getOperand(0).getValueType().isVector()) {
18224     SDValue SrcOp = V.getOperand(0);
18225     EVT SrcVT = SrcOp.getValueType();
18226     unsigned SrcNumElts = SrcVT.getVectorNumElements();
18227     unsigned DestNumElts = V.getValueType().getVectorNumElements();
18228     if ((SrcNumElts % DestNumElts) == 0) {
18229       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
18230       unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
18231       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
18232                                       NewExtNumElts);
18233       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
18234         unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
18235         SDLoc DL(N);
18236         SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
18237         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18238                                          V.getOperand(0), NewIndex);
18239         return DAG.getBitcast(NVT, NewExtract);
18240       }
18241     }
18242     // TODO - handle (DestNumElts % SrcNumElts) == 0
18243   }
18244
18245   // Combine:
18246   //    (extract_subvec (concat V1, V2, ...), i)
18247   // Into:
18248   //    Vi if possible
18249   // Only operand 0 is checked as 'concat' assumes all inputs of the same
18250   // type.
18251   if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
18252       V.getOperand(0).getValueType() == NVT) {
18253     unsigned Idx = N->getConstantOperandVal(1);
18254     unsigned NumElems = NVT.getVectorNumElements();
18255     assert((Idx % NumElems) == 0 &&
18256            "IDX in concat is not a multiple of the result vector length.");
18257     return V->getOperand(Idx / NumElems);
18258   }
18259
18260   V = peekThroughBitcasts(V);
18261
18262   // If the input is a build vector. Try to make a smaller build vector.
18263   if (V.getOpcode() == ISD::BUILD_VECTOR) {
18264     if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
18265       EVT InVT = V.getValueType();
18266       unsigned ExtractSize = NVT.getSizeInBits();
18267       unsigned EltSize = InVT.getScalarSizeInBits();
18268       // Only do this if we won't split any elements.
18269       if (ExtractSize % EltSize == 0) {
18270         unsigned NumElems = ExtractSize / EltSize;
18271         EVT EltVT = InVT.getVectorElementType();
18272         EVT ExtractVT = NumElems == 1 ? EltVT
18273                                       : EVT::getVectorVT(*DAG.getContext(),
18274                                                          EltVT, NumElems);
18275         if ((Level < AfterLegalizeDAG ||
18276              (NumElems == 1 ||
18277               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
18278             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
18279           unsigned IdxVal = IdxC->getZExtValue();
18280           IdxVal *= NVT.getScalarSizeInBits();
18281           IdxVal /= EltSize;
18282
18283           if (NumElems == 1) {
18284             SDValue Src = V->getOperand(IdxVal);
18285             if (EltVT != Src.getValueType())
18286               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
18287             return DAG.getBitcast(NVT, Src);
18288           }
18289
18290           // Extract the pieces from the original build_vector.
18291           SDValue BuildVec = DAG.getBuildVector(
18292               ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
18293           return DAG.getBitcast(NVT, BuildVec);
18294         }
18295       }
18296     }
18297   }
18298
18299   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
18300     // Handle only simple case where vector being inserted and vector
18301     // being extracted are of same size.
18302     EVT SmallVT = V.getOperand(1).getValueType();
18303     if (!NVT.bitsEq(SmallVT))
18304       return SDValue();
18305
18306     // Only handle cases where both indexes are constants.
18307     auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
18308     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
18309     if (InsIdx && ExtIdx) {
18310       // Combine:
18311       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
18312       // Into:
18313       //    indices are equal or bit offsets are equal => V1
18314       //    otherwise => (extract_subvec V1, ExtIdx)
18315       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
18316           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
18317         return DAG.getBitcast(NVT, V.getOperand(1));
18318       return DAG.getNode(
18319           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
18320           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
18321           Index);
18322     }
18323   }
18324
18325   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
18326     return NarrowBOp;
18327
18328   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18329     return SDValue(N, 0);
18330
18331   return SDValue();
18332 }
18333
18334 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
18335 /// followed by concatenation. Narrow vector ops may have better performance
18336 /// than wide ops, and this can unlock further narrowing of other vector ops.
18337 /// Targets can invert this transform later if it is not profitable.
18338 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
18339                                          SelectionDAG &DAG) {
18340   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
18341   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
18342       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
18343       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
18344     return SDValue();
18345
18346   // Split the wide shuffle mask into halves. Any mask element that is accessing
18347   // operand 1 is offset down to account for narrowing of the vectors.
18348   ArrayRef<int> Mask = Shuf->getMask();
18349   EVT VT = Shuf->getValueType(0);
18350   unsigned NumElts = VT.getVectorNumElements();
18351   unsigned HalfNumElts = NumElts / 2;
18352   SmallVector<int, 16> Mask0(HalfNumElts, -1);
18353   SmallVector<int, 16> Mask1(HalfNumElts, -1);
18354   for (unsigned i = 0; i != NumElts; ++i) {
18355     if (Mask[i] == -1)
18356       continue;
18357     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
18358     if (i < HalfNumElts)
18359       Mask0[i] = M;
18360     else
18361       Mask1[i - HalfNumElts] = M;
18362   }
18363
18364   // Ask the target if this is a valid transform.
18365   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18366   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
18367                                 HalfNumElts);
18368   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
18369       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
18370     return SDValue();
18371
18372   // shuffle (concat X, undef), (concat Y, undef), Mask -->
18373   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
18374   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
18375   SDLoc DL(Shuf);
18376   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
18377   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
18378   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
18379 }
18380
18381 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
18382 // or turn a shuffle of a single concat into simpler shuffle then concat.
18383 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
18384   EVT VT = N->getValueType(0);
18385   unsigned NumElts = VT.getVectorNumElements();
18386
18387   SDValue N0 = N->getOperand(0);
18388   SDValue N1 = N->getOperand(1);
18389   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18390   ArrayRef<int> Mask = SVN->getMask();
18391
18392   SmallVector<SDValue, 4> Ops;
18393   EVT ConcatVT = N0.getOperand(0).getValueType();
18394   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
18395   unsigned NumConcats = NumElts / NumElemsPerConcat;
18396
18397   auto IsUndefMaskElt = [](int i) { return i == -1; };
18398
18399   // Special case: shuffle(concat(A,B)) can be more efficiently represented
18400   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
18401   // half vector elements.
18402   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
18403       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
18404                    IsUndefMaskElt)) {
18405     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
18406                               N0.getOperand(1),
18407                               Mask.slice(0, NumElemsPerConcat));
18408     N1 = DAG.getUNDEF(ConcatVT);
18409     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
18410   }
18411
18412   // Look at every vector that's inserted. We're looking for exact
18413   // subvector-sized copies from a concatenated vector
18414   for (unsigned I = 0; I != NumConcats; ++I) {
18415     unsigned Begin = I * NumElemsPerConcat;
18416     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
18417
18418     // Make sure we're dealing with a copy.
18419     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
18420       Ops.push_back(DAG.getUNDEF(ConcatVT));
18421       continue;
18422     }
18423
18424     int OpIdx = -1;
18425     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
18426       if (IsUndefMaskElt(SubMask[i]))
18427         continue;
18428       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
18429         return SDValue();
18430       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
18431       if (0 <= OpIdx && EltOpIdx != OpIdx)
18432         return SDValue();
18433       OpIdx = EltOpIdx;
18434     }
18435     assert(0 <= OpIdx && "Unknown concat_vectors op");
18436
18437     if (OpIdx < (int)N0.getNumOperands())
18438       Ops.push_back(N0.getOperand(OpIdx));
18439     else
18440       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
18441   }
18442
18443   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18444 }
18445
18446 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18447 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18448 //
18449 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
18450 // a simplification in some sense, but it isn't appropriate in general: some
18451 // BUILD_VECTORs are substantially cheaper than others. The general case
18452 // of a BUILD_VECTOR requires inserting each element individually (or
18453 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
18454 // all constants is a single constant pool load.  A BUILD_VECTOR where each
18455 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
18456 // are undef lowers to a small number of element insertions.
18457 //
18458 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
18459 // We don't fold shuffles where one side is a non-zero constant, and we don't
18460 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
18461 // non-constant operands. This seems to work out reasonably well in practice.
18462 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
18463                                        SelectionDAG &DAG,
18464                                        const TargetLowering &TLI) {
18465   EVT VT = SVN->getValueType(0);
18466   unsigned NumElts = VT.getVectorNumElements();
18467   SDValue N0 = SVN->getOperand(0);
18468   SDValue N1 = SVN->getOperand(1);
18469
18470   if (!N0->hasOneUse())
18471     return SDValue();
18472
18473   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
18474   // discussed above.
18475   if (!N1.isUndef()) {
18476     if (!N1->hasOneUse())
18477       return SDValue();
18478
18479     bool N0AnyConst = isAnyConstantBuildVector(N0);
18480     bool N1AnyConst = isAnyConstantBuildVector(N1);
18481     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
18482       return SDValue();
18483     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
18484       return SDValue();
18485   }
18486
18487   // If both inputs are splats of the same value then we can safely merge this
18488   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
18489   bool IsSplat = false;
18490   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
18491   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
18492   if (BV0 && BV1)
18493     if (SDValue Splat0 = BV0->getSplatValue())
18494       IsSplat = (Splat0 == BV1->getSplatValue());
18495
18496   SmallVector<SDValue, 8> Ops;
18497   SmallSet<SDValue, 16> DuplicateOps;
18498   for (int M : SVN->getMask()) {
18499     SDValue Op = DAG.getUNDEF(VT.getScalarType());
18500     if (M >= 0) {
18501       int Idx = M < (int)NumElts ? M : M - NumElts;
18502       SDValue &S = (M < (int)NumElts ? N0 : N1);
18503       if (S.getOpcode() == ISD::BUILD_VECTOR) {
18504         Op = S.getOperand(Idx);
18505       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18506         SDValue Op0 = S.getOperand(0);
18507         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
18508       } else {
18509         // Operand can't be combined - bail out.
18510         return SDValue();
18511       }
18512     }
18513
18514     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
18515     // generating a splat; semantically, this is fine, but it's likely to
18516     // generate low-quality code if the target can't reconstruct an appropriate
18517     // shuffle.
18518     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
18519       if (!IsSplat && !DuplicateOps.insert(Op).second)
18520         return SDValue();
18521
18522     Ops.push_back(Op);
18523   }
18524
18525   // BUILD_VECTOR requires all inputs to be of the same type, find the
18526   // maximum type and extend them all.
18527   EVT SVT = VT.getScalarType();
18528   if (SVT.isInteger())
18529     for (SDValue &Op : Ops)
18530       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
18531   if (SVT != VT.getScalarType())
18532     for (SDValue &Op : Ops)
18533       Op = TLI.isZExtFree(Op.getValueType(), SVT)
18534                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
18535                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
18536   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
18537 }
18538
18539 // Match shuffles that can be converted to any_vector_extend_in_reg.
18540 // This is often generated during legalization.
18541 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
18542 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
18543 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
18544                                             SelectionDAG &DAG,
18545                                             const TargetLowering &TLI,
18546                                             bool LegalOperations) {
18547   EVT VT = SVN->getValueType(0);
18548   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18549
18550   // TODO Add support for big-endian when we have a test case.
18551   if (!VT.isInteger() || IsBigEndian)
18552     return SDValue();
18553
18554   unsigned NumElts = VT.getVectorNumElements();
18555   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18556   ArrayRef<int> Mask = SVN->getMask();
18557   SDValue N0 = SVN->getOperand(0);
18558
18559   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
18560   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
18561     for (unsigned i = 0; i != NumElts; ++i) {
18562       if (Mask[i] < 0)
18563         continue;
18564       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
18565         continue;
18566       return false;
18567     }
18568     return true;
18569   };
18570
18571   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
18572   // power-of-2 extensions as they are the most likely.
18573   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
18574     // Check for non power of 2 vector sizes
18575     if (NumElts % Scale != 0)
18576       continue;
18577     if (!isAnyExtend(Scale))
18578       continue;
18579
18580     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
18581     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
18582     // Never create an illegal type. Only create unsupported operations if we
18583     // are pre-legalization.
18584     if (TLI.isTypeLegal(OutVT))
18585       if (!LegalOperations ||
18586           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
18587         return DAG.getBitcast(VT,
18588                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
18589                                           SDLoc(SVN), OutVT, N0));
18590   }
18591
18592   return SDValue();
18593 }
18594
18595 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
18596 // each source element of a large type into the lowest elements of a smaller
18597 // destination type. This is often generated during legalization.
18598 // If the source node itself was a '*_extend_vector_inreg' node then we should
18599 // then be able to remove it.
18600 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
18601                                         SelectionDAG &DAG) {
18602   EVT VT = SVN->getValueType(0);
18603   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18604
18605   // TODO Add support for big-endian when we have a test case.
18606   if (!VT.isInteger() || IsBigEndian)
18607     return SDValue();
18608
18609   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
18610
18611   unsigned Opcode = N0.getOpcode();
18612   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
18613       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
18614       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
18615     return SDValue();
18616
18617   SDValue N00 = N0.getOperand(0);
18618   ArrayRef<int> Mask = SVN->getMask();
18619   unsigned NumElts = VT.getVectorNumElements();
18620   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18621   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
18622   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
18623
18624   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
18625     return SDValue();
18626   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
18627
18628   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
18629   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
18630   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
18631   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
18632     for (unsigned i = 0; i != NumElts; ++i) {
18633       if (Mask[i] < 0)
18634         continue;
18635       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
18636         continue;
18637       return false;
18638     }
18639     return true;
18640   };
18641
18642   // At the moment we just handle the case where we've truncated back to the
18643   // same size as before the extension.
18644   // TODO: handle more extension/truncation cases as cases arise.
18645   if (EltSizeInBits != ExtSrcSizeInBits)
18646     return SDValue();
18647
18648   // We can remove *extend_vector_inreg only if the truncation happens at
18649   // the same scale as the extension.
18650   if (isTruncate(ExtScale))
18651     return DAG.getBitcast(VT, N00);
18652
18653   return SDValue();
18654 }
18655
18656 // Combine shuffles of splat-shuffles of the form:
18657 // shuffle (shuffle V, undef, splat-mask), undef, M
18658 // If splat-mask contains undef elements, we need to be careful about
18659 // introducing undef's in the folded mask which are not the result of composing
18660 // the masks of the shuffles.
18661 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
18662                                         SelectionDAG &DAG) {
18663   if (!Shuf->getOperand(1).isUndef())
18664     return SDValue();
18665   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18666   if (!Splat || !Splat->isSplat())
18667     return SDValue();
18668
18669   ArrayRef<int> ShufMask = Shuf->getMask();
18670   ArrayRef<int> SplatMask = Splat->getMask();
18671   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
18672
18673   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18674   // every undef mask element in the splat-shuffle has a corresponding undef
18675   // element in the user-shuffle's mask or if the composition of mask elements
18676   // would result in undef.
18677   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18678   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18679   //   In this case it is not legal to simplify to the splat-shuffle because we
18680   //   may be exposing the users of the shuffle an undef element at index 1
18681   //   which was not there before the combine.
18682   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18683   //   In this case the composition of masks yields SplatMask, so it's ok to
18684   //   simplify to the splat-shuffle.
18685   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18686   //   In this case the composed mask includes all undef elements of SplatMask
18687   //   and in addition sets element zero to undef. It is safe to simplify to
18688   //   the splat-shuffle.
18689   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
18690                                        ArrayRef<int> SplatMask) {
18691     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
18692       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
18693           SplatMask[UserMask[i]] != -1)
18694         return false;
18695     return true;
18696   };
18697   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
18698     return Shuf->getOperand(0);
18699
18700   // Create a new shuffle with a mask that is composed of the two shuffles'
18701   // masks.
18702   SmallVector<int, 32> NewMask;
18703   for (int Idx : ShufMask)
18704     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
18705
18706   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
18707                               Splat->getOperand(0), Splat->getOperand(1),
18708                               NewMask);
18709 }
18710
18711 /// If the shuffle mask is taking exactly one element from the first vector
18712 /// operand and passing through all other elements from the second vector
18713 /// operand, return the index of the mask element that is choosing an element
18714 /// from the first operand. Otherwise, return -1.
18715 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
18716   int MaskSize = Mask.size();
18717   int EltFromOp0 = -1;
18718   // TODO: This does not match if there are undef elements in the shuffle mask.
18719   // Should we ignore undefs in the shuffle mask instead? The trade-off is
18720   // removing an instruction (a shuffle), but losing the knowledge that some
18721   // vector lanes are not needed.
18722   for (int i = 0; i != MaskSize; ++i) {
18723     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
18724       // We're looking for a shuffle of exactly one element from operand 0.
18725       if (EltFromOp0 != -1)
18726         return -1;
18727       EltFromOp0 = i;
18728     } else if (Mask[i] != i + MaskSize) {
18729       // Nothing from operand 1 can change lanes.
18730       return -1;
18731     }
18732   }
18733   return EltFromOp0;
18734 }
18735
18736 /// If a shuffle inserts exactly one element from a source vector operand into
18737 /// another vector operand and we can access the specified element as a scalar,
18738 /// then we can eliminate the shuffle.
18739 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
18740                                       SelectionDAG &DAG) {
18741   // First, check if we are taking one element of a vector and shuffling that
18742   // element into another vector.
18743   ArrayRef<int> Mask = Shuf->getMask();
18744   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
18745   SDValue Op0 = Shuf->getOperand(0);
18746   SDValue Op1 = Shuf->getOperand(1);
18747   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
18748   if (ShufOp0Index == -1) {
18749     // Commute mask and check again.
18750     ShuffleVectorSDNode::commuteMask(CommutedMask);
18751     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
18752     if (ShufOp0Index == -1)
18753       return SDValue();
18754     // Commute operands to match the commuted shuffle mask.
18755     std::swap(Op0, Op1);
18756     Mask = CommutedMask;
18757   }
18758
18759   // The shuffle inserts exactly one element from operand 0 into operand 1.
18760   // Now see if we can access that element as a scalar via a real insert element
18761   // instruction.
18762   // TODO: We can try harder to locate the element as a scalar. Examples: it
18763   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18764   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
18765          "Shuffle mask value must be from operand 0");
18766   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
18767     return SDValue();
18768
18769   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
18770   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
18771     return SDValue();
18772
18773   // There's an existing insertelement with constant insertion index, so we
18774   // don't need to check the legality/profitability of a replacement operation
18775   // that differs at most in the constant value. The target should be able to
18776   // lower any of those in a similar way. If not, legalization will expand this
18777   // to a scalar-to-vector plus shuffle.
18778   //
18779   // Note that the shuffle may move the scalar from the position that the insert
18780   // element used. Therefore, our new insert element occurs at the shuffle's
18781   // mask index value, not the insert's index value.
18782   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18783   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
18784                                         Op0.getOperand(2).getValueType());
18785   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
18786                      Op1, Op0.getOperand(1), NewInsIndex);
18787 }
18788
18789 /// If we have a unary shuffle of a shuffle, see if it can be folded away
18790 /// completely. This has the potential to lose undef knowledge because the first
18791 /// shuffle may not have an undef mask element where the second one does. So
18792 /// only call this after doing simplifications based on demanded elements.
18793 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
18794   // shuf (shuf0 X, Y, Mask0), undef, Mask
18795   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18796   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
18797     return SDValue();
18798
18799   ArrayRef<int> Mask = Shuf->getMask();
18800   ArrayRef<int> Mask0 = Shuf0->getMask();
18801   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
18802     // Ignore undef elements.
18803     if (Mask[i] == -1)
18804       continue;
18805     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
18806
18807     // Is the element of the shuffle operand chosen by this shuffle the same as
18808     // the element chosen by the shuffle operand itself?
18809     if (Mask0[Mask[i]] != Mask0[i])
18810       return SDValue();
18811   }
18812   // Every element of this shuffle is identical to the result of the previous
18813   // shuffle, so we can replace this value.
18814   return Shuf->getOperand(0);
18815 }
18816
18817 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
18818   EVT VT = N->getValueType(0);
18819   unsigned NumElts = VT.getVectorNumElements();
18820
18821   SDValue N0 = N->getOperand(0);
18822   SDValue N1 = N->getOperand(1);
18823
18824   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
18825
18826   // Canonicalize shuffle undef, undef -> undef
18827   if (N0.isUndef() && N1.isUndef())
18828     return DAG.getUNDEF(VT);
18829
18830   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18831
18832   // Canonicalize shuffle v, v -> v, undef
18833   if (N0 == N1) {
18834     SmallVector<int, 8> NewMask;
18835     for (unsigned i = 0; i != NumElts; ++i) {
18836       int Idx = SVN->getMaskElt(i);
18837       if (Idx >= (int)NumElts) Idx -= NumElts;
18838       NewMask.push_back(Idx);
18839     }
18840     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
18841   }
18842
18843   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
18844   if (N0.isUndef())
18845     return DAG.getCommutedVectorShuffle(*SVN);
18846
18847   // Remove references to rhs if it is undef
18848   if (N1.isUndef()) {
18849     bool Changed = false;
18850     SmallVector<int, 8> NewMask;
18851     for (unsigned i = 0; i != NumElts; ++i) {
18852       int Idx = SVN->getMaskElt(i);
18853       if (Idx >= (int)NumElts) {
18854         Idx = -1;
18855         Changed = true;
18856       }
18857       NewMask.push_back(Idx);
18858     }
18859     if (Changed)
18860       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
18861   }
18862
18863   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
18864     return InsElt;
18865
18866   // A shuffle of a single vector that is a splatted value can always be folded.
18867   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
18868     return V;
18869
18870   // If it is a splat, check if the argument vector is another splat or a
18871   // build_vector.
18872   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
18873     int SplatIndex = SVN->getSplatIndex();
18874     if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
18875         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
18876       // splat (vector_bo L, R), Index -->
18877       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18878       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
18879       SDLoc DL(N);
18880       EVT EltVT = VT.getScalarType();
18881       SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
18882       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
18883       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
18884       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
18885                                   N0.getNode()->getFlags());
18886       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
18887       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
18888       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
18889     }
18890
18891     // If this is a bit convert that changes the element type of the vector but
18892     // not the number of vector elements, look through it.  Be careful not to
18893     // look though conversions that change things like v4f32 to v2f64.
18894     SDNode *V = N0.getNode();
18895     if (V->getOpcode() == ISD::BITCAST) {
18896       SDValue ConvInput = V->getOperand(0);
18897       if (ConvInput.getValueType().isVector() &&
18898           ConvInput.getValueType().getVectorNumElements() == NumElts)
18899         V = ConvInput.getNode();
18900     }
18901
18902     if (V->getOpcode() == ISD::BUILD_VECTOR) {
18903       assert(V->getNumOperands() == NumElts &&
18904              "BUILD_VECTOR has wrong number of operands");
18905       SDValue Base;
18906       bool AllSame = true;
18907       for (unsigned i = 0; i != NumElts; ++i) {
18908         if (!V->getOperand(i).isUndef()) {
18909           Base = V->getOperand(i);
18910           break;
18911         }
18912       }
18913       // Splat of <u, u, u, u>, return <u, u, u, u>
18914       if (!Base.getNode())
18915         return N0;
18916       for (unsigned i = 0; i != NumElts; ++i) {
18917         if (V->getOperand(i) != Base) {
18918           AllSame = false;
18919           break;
18920         }
18921       }
18922       // Splat of <x, x, x, x>, return <x, x, x, x>
18923       if (AllSame)
18924         return N0;
18925
18926       // Canonicalize any other splat as a build_vector.
18927       SDValue Splatted = V->getOperand(SplatIndex);
18928       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
18929       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
18930
18931       // We may have jumped through bitcasts, so the type of the
18932       // BUILD_VECTOR may not match the type of the shuffle.
18933       if (V->getValueType(0) != VT)
18934         NewBV = DAG.getBitcast(VT, NewBV);
18935       return NewBV;
18936     }
18937   }
18938
18939   // Simplify source operands based on shuffle mask.
18940   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18941     return SDValue(N, 0);
18942
18943   // This is intentionally placed after demanded elements simplification because
18944   // it could eliminate knowledge of undef elements created by this shuffle.
18945   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
18946     return ShufOp;
18947
18948   // Match shuffles that can be converted to any_vector_extend_in_reg.
18949   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
18950     return V;
18951
18952   // Combine "truncate_vector_in_reg" style shuffles.
18953   if (SDValue V = combineTruncationShuffle(SVN, DAG))
18954     return V;
18955
18956   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
18957       Level < AfterLegalizeVectorOps &&
18958       (N1.isUndef() ||
18959       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
18960        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
18961     if (SDValue V = partitionShuffleOfConcats(N, DAG))
18962       return V;
18963   }
18964
18965   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18966   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18967   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
18968     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
18969       return Res;
18970
18971   // If this shuffle only has a single input that is a bitcasted shuffle,
18972   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
18973   // back to their original types.
18974   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
18975       N1.isUndef() && Level < AfterLegalizeVectorOps &&
18976       TLI.isTypeLegal(VT)) {
18977     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
18978       if (Scale == 1)
18979         return SmallVector<int, 8>(Mask.begin(), Mask.end());
18980
18981       SmallVector<int, 8> NewMask;
18982       for (int M : Mask)
18983         for (int s = 0; s != Scale; ++s)
18984           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
18985       return NewMask;
18986     };
18987
18988     SDValue BC0 = peekThroughOneUseBitcasts(N0);
18989     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
18990       EVT SVT = VT.getScalarType();
18991       EVT InnerVT = BC0->getValueType(0);
18992       EVT InnerSVT = InnerVT.getScalarType();
18993
18994       // Determine which shuffle works with the smaller scalar type.
18995       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
18996       EVT ScaleSVT = ScaleVT.getScalarType();
18997
18998       if (TLI.isTypeLegal(ScaleVT) &&
18999           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
19000           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
19001         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19002         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19003
19004         // Scale the shuffle masks to the smaller scalar type.
19005         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
19006         SmallVector<int, 8> InnerMask =
19007             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
19008         SmallVector<int, 8> OuterMask =
19009             ScaleShuffleMask(SVN->getMask(), OuterScale);
19010
19011         // Merge the shuffle masks.
19012         SmallVector<int, 8> NewMask;
19013         for (int M : OuterMask)
19014           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
19015
19016         // Test for shuffle mask legality over both commutations.
19017         SDValue SV0 = BC0->getOperand(0);
19018         SDValue SV1 = BC0->getOperand(1);
19019         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19020         if (!LegalMask) {
19021           std::swap(SV0, SV1);
19022           ShuffleVectorSDNode::commuteMask(NewMask);
19023           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19024         }
19025
19026         if (LegalMask) {
19027           SV0 = DAG.getBitcast(ScaleVT, SV0);
19028           SV1 = DAG.getBitcast(ScaleVT, SV1);
19029           return DAG.getBitcast(
19030               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
19031         }
19032       }
19033     }
19034   }
19035
19036   // Canonicalize shuffles according to rules:
19037   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
19038   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
19039   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
19040   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
19041       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
19042       TLI.isTypeLegal(VT)) {
19043     // The incoming shuffle must be of the same type as the result of the
19044     // current shuffle.
19045     assert(N1->getOperand(0).getValueType() == VT &&
19046            "Shuffle types don't match");
19047
19048     SDValue SV0 = N1->getOperand(0);
19049     SDValue SV1 = N1->getOperand(1);
19050     bool HasSameOp0 = N0 == SV0;
19051     bool IsSV1Undef = SV1.isUndef();
19052     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
19053       // Commute the operands of this shuffle so that next rule
19054       // will trigger.
19055       return DAG.getCommutedVectorShuffle(*SVN);
19056   }
19057
19058   // Try to fold according to rules:
19059   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19060   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19061   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19062   // Don't try to fold shuffles with illegal type.
19063   // Only fold if this shuffle is the only user of the other shuffle.
19064   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
19065       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
19066     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
19067
19068     // Don't try to fold splats; they're likely to simplify somehow, or they
19069     // might be free.
19070     if (OtherSV->isSplat())
19071       return SDValue();
19072
19073     // The incoming shuffle must be of the same type as the result of the
19074     // current shuffle.
19075     assert(OtherSV->getOperand(0).getValueType() == VT &&
19076            "Shuffle types don't match");
19077
19078     SDValue SV0, SV1;
19079     SmallVector<int, 4> Mask;
19080     // Compute the combined shuffle mask for a shuffle with SV0 as the first
19081     // operand, and SV1 as the second operand.
19082     for (unsigned i = 0; i != NumElts; ++i) {
19083       int Idx = SVN->getMaskElt(i);
19084       if (Idx < 0) {
19085         // Propagate Undef.
19086         Mask.push_back(Idx);
19087         continue;
19088       }
19089
19090       SDValue CurrentVec;
19091       if (Idx < (int)NumElts) {
19092         // This shuffle index refers to the inner shuffle N0. Lookup the inner
19093         // shuffle mask to identify which vector is actually referenced.
19094         Idx = OtherSV->getMaskElt(Idx);
19095         if (Idx < 0) {
19096           // Propagate Undef.
19097           Mask.push_back(Idx);
19098           continue;
19099         }
19100
19101         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
19102                                            : OtherSV->getOperand(1);
19103       } else {
19104         // This shuffle index references an element within N1.
19105         CurrentVec = N1;
19106       }
19107
19108       // Simple case where 'CurrentVec' is UNDEF.
19109       if (CurrentVec.isUndef()) {
19110         Mask.push_back(-1);
19111         continue;
19112       }
19113
19114       // Canonicalize the shuffle index. We don't know yet if CurrentVec
19115       // will be the first or second operand of the combined shuffle.
19116       Idx = Idx % NumElts;
19117       if (!SV0.getNode() || SV0 == CurrentVec) {
19118         // Ok. CurrentVec is the left hand side.
19119         // Update the mask accordingly.
19120         SV0 = CurrentVec;
19121         Mask.push_back(Idx);
19122         continue;
19123       }
19124
19125       // Bail out if we cannot convert the shuffle pair into a single shuffle.
19126       if (SV1.getNode() && SV1 != CurrentVec)
19127         return SDValue();
19128
19129       // Ok. CurrentVec is the right hand side.
19130       // Update the mask accordingly.
19131       SV1 = CurrentVec;
19132       Mask.push_back(Idx + NumElts);
19133     }
19134
19135     // Check if all indices in Mask are Undef. In case, propagate Undef.
19136     bool isUndefMask = true;
19137     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
19138       isUndefMask &= Mask[i] < 0;
19139
19140     if (isUndefMask)
19141       return DAG.getUNDEF(VT);
19142
19143     if (!SV0.getNode())
19144       SV0 = DAG.getUNDEF(VT);
19145     if (!SV1.getNode())
19146       SV1 = DAG.getUNDEF(VT);
19147
19148     // Avoid introducing shuffles with illegal mask.
19149     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19150     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19151     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19152     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
19153     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
19154     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
19155     return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
19156   }
19157
19158   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
19159     return V;
19160
19161   return SDValue();
19162 }
19163
19164 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
19165   SDValue InVal = N->getOperand(0);
19166   EVT VT = N->getValueType(0);
19167
19168   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
19169   // with a VECTOR_SHUFFLE and possible truncate.
19170   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19171     SDValue InVec = InVal->getOperand(0);
19172     SDValue EltNo = InVal->getOperand(1);
19173     auto InVecT = InVec.getValueType();
19174     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
19175       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
19176       int Elt = C0->getZExtValue();
19177       NewMask[0] = Elt;
19178       // If we have an implict truncate do truncate here as long as it's legal.
19179       // if it's not legal, this should
19180       if (VT.getScalarType() != InVal.getValueType() &&
19181           InVal.getValueType().isScalarInteger() &&
19182           isTypeLegal(VT.getScalarType())) {
19183         SDValue Val =
19184             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
19185         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
19186       }
19187       if (VT.getScalarType() == InVecT.getScalarType() &&
19188           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
19189         SDValue LegalShuffle =
19190           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
19191                                       DAG.getUNDEF(InVecT), NewMask, DAG);
19192         if (LegalShuffle) {
19193           // If the initial vector is the correct size this shuffle is a
19194           // valid result.
19195           if (VT == InVecT)
19196             return LegalShuffle;
19197           // If not we must truncate the vector.
19198           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
19199             MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
19200             SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
19201             EVT SubVT =
19202                 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
19203                                  VT.getVectorNumElements());
19204             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
19205                                LegalShuffle, ZeroIdx);
19206           }
19207         }
19208       }
19209     }
19210   }
19211
19212   return SDValue();
19213 }
19214
19215 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
19216   EVT VT = N->getValueType(0);
19217   SDValue N0 = N->getOperand(0);
19218   SDValue N1 = N->getOperand(1);
19219   SDValue N2 = N->getOperand(2);
19220
19221   // If inserting an UNDEF, just return the original vector.
19222   if (N1.isUndef())
19223     return N0;
19224
19225   // If this is an insert of an extracted vector into an undef vector, we can
19226   // just use the input to the extract.
19227   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19228       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
19229     return N1.getOperand(0);
19230
19231   // If we are inserting a bitcast value into an undef, with the same
19232   // number of elements, just use the bitcast input of the extract.
19233   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
19234   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
19235   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
19236       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19237       N1.getOperand(0).getOperand(1) == N2 &&
19238       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
19239           VT.getVectorNumElements() &&
19240       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
19241           VT.getSizeInBits()) {
19242     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
19243   }
19244
19245   // If both N1 and N2 are bitcast values on which insert_subvector
19246   // would makes sense, pull the bitcast through.
19247   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
19248   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
19249   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
19250     SDValue CN0 = N0.getOperand(0);
19251     SDValue CN1 = N1.getOperand(0);
19252     EVT CN0VT = CN0.getValueType();
19253     EVT CN1VT = CN1.getValueType();
19254     if (CN0VT.isVector() && CN1VT.isVector() &&
19255         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
19256         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
19257       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
19258                                       CN0.getValueType(), CN0, CN1, N2);
19259       return DAG.getBitcast(VT, NewINSERT);
19260     }
19261   }
19262
19263   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
19264   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
19265   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
19266   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
19267       N0.getOperand(1).getValueType() == N1.getValueType() &&
19268       N0.getOperand(2) == N2)
19269     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
19270                        N1, N2);
19271
19272   // Eliminate an intermediate insert into an undef vector:
19273   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
19274   // insert_subvector undef, X, N2
19275   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
19276       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
19277     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
19278                        N1.getOperand(1), N2);
19279
19280   if (!isa<ConstantSDNode>(N2))
19281     return SDValue();
19282
19283   uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
19284
19285   // Push subvector bitcasts to the output, adjusting the index as we go.
19286   // insert_subvector(bitcast(v), bitcast(s), c1)
19287   // -> bitcast(insert_subvector(v, s, c2))
19288   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
19289       N1.getOpcode() == ISD::BITCAST) {
19290     SDValue N0Src = peekThroughBitcasts(N0);
19291     SDValue N1Src = peekThroughBitcasts(N1);
19292     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
19293     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
19294     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
19295         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
19296       EVT NewVT;
19297       SDLoc DL(N);
19298       SDValue NewIdx;
19299       MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
19300       LLVMContext &Ctx = *DAG.getContext();
19301       unsigned NumElts = VT.getVectorNumElements();
19302       unsigned EltSizeInBits = VT.getScalarSizeInBits();
19303       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
19304         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
19305         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
19306         NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
19307       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
19308         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
19309         if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
19310           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
19311           NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
19312         }
19313       }
19314       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
19315         SDValue Res = DAG.getBitcast(NewVT, N0Src);
19316         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
19317         return DAG.getBitcast(VT, Res);
19318       }
19319     }
19320   }
19321
19322   // Canonicalize insert_subvector dag nodes.
19323   // Example:
19324   // (insert_subvector (insert_subvector A, Idx0), Idx1)
19325   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
19326   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
19327       N1.getValueType() == N0.getOperand(1).getValueType() &&
19328       isa<ConstantSDNode>(N0.getOperand(2))) {
19329     unsigned OtherIdx = N0.getConstantOperandVal(2);
19330     if (InsIdx < OtherIdx) {
19331       // Swap nodes.
19332       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
19333                                   N0.getOperand(0), N1, N2);
19334       AddToWorklist(NewOp.getNode());
19335       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
19336                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
19337     }
19338   }
19339
19340   // If the input vector is a concatenation, and the insert replaces
19341   // one of the pieces, we can optimize into a single concat_vectors.
19342   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
19343       N0.getOperand(0).getValueType() == N1.getValueType()) {
19344     unsigned Factor = N1.getValueType().getVectorNumElements();
19345
19346     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
19347     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
19348
19349     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19350   }
19351
19352   // Simplify source operands based on insertion.
19353   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19354     return SDValue(N, 0);
19355
19356   return SDValue();
19357 }
19358
19359 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
19360   SDValue N0 = N->getOperand(0);
19361
19362   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
19363   if (N0->getOpcode() == ISD::FP16_TO_FP)
19364     return N0->getOperand(0);
19365
19366   return SDValue();
19367 }
19368
19369 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
19370   SDValue N0 = N->getOperand(0);
19371
19372   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
19373   if (N0->getOpcode() == ISD::AND) {
19374     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
19375     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
19376       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
19377                          N0.getOperand(0));
19378     }
19379   }
19380
19381   return SDValue();
19382 }
19383
19384 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
19385   SDValue N0 = N->getOperand(0);
19386   EVT VT = N0.getValueType();
19387   unsigned Opcode = N->getOpcode();
19388
19389   // VECREDUCE over 1-element vector is just an extract.
19390   if (VT.getVectorNumElements() == 1) {
19391     SDLoc dl(N);
19392     SDValue Res = DAG.getNode(
19393         ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
19394         DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
19395     if (Res.getValueType() != N->getValueType(0))
19396       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
19397     return Res;
19398   }
19399
19400   // On an boolean vector an and/or reduction is the same as a umin/umax
19401   // reduction. Convert them if the latter is legal while the former isn't.
19402   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
19403     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
19404         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
19405     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
19406         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
19407         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
19408       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
19409   }
19410
19411   return SDValue();
19412 }
19413
19414 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
19415 /// with the destination vector and a zero vector.
19416 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
19417 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
19418 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
19419   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
19420
19421   EVT VT = N->getValueType(0);
19422   SDValue LHS = N->getOperand(0);
19423   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
19424   SDLoc DL(N);
19425
19426   // Make sure we're not running after operation legalization where it
19427   // may have custom lowered the vector shuffles.
19428   if (LegalOperations)
19429     return SDValue();
19430
19431   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19432     return SDValue();
19433
19434   EVT RVT = RHS.getValueType();
19435   unsigned NumElts = RHS.getNumOperands();
19436
19437   // Attempt to create a valid clear mask, splitting the mask into
19438   // sub elements and checking to see if each is
19439   // all zeros or all ones - suitable for shuffle masking.
19440   auto BuildClearMask = [&](int Split) {
19441     int NumSubElts = NumElts * Split;
19442     int NumSubBits = RVT.getScalarSizeInBits() / Split;
19443
19444     SmallVector<int, 8> Indices;
19445     for (int i = 0; i != NumSubElts; ++i) {
19446       int EltIdx = i / Split;
19447       int SubIdx = i % Split;
19448       SDValue Elt = RHS.getOperand(EltIdx);
19449       if (Elt.isUndef()) {
19450         Indices.push_back(-1);
19451         continue;
19452       }
19453
19454       APInt Bits;
19455       if (isa<ConstantSDNode>(Elt))
19456         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
19457       else if (isa<ConstantFPSDNode>(Elt))
19458         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
19459       else
19460         return SDValue();
19461
19462       // Extract the sub element from the constant bit mask.
19463       if (DAG.getDataLayout().isBigEndian()) {
19464         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
19465       } else {
19466         Bits.lshrInPlace(SubIdx * NumSubBits);
19467       }
19468
19469       if (Split > 1)
19470         Bits = Bits.trunc(NumSubBits);
19471
19472       if (Bits.isAllOnesValue())
19473         Indices.push_back(i);
19474       else if (Bits == 0)
19475         Indices.push_back(i + NumSubElts);
19476       else
19477         return SDValue();
19478     }
19479
19480     // Let's see if the target supports this vector_shuffle.
19481     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
19482     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
19483     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
19484       return SDValue();
19485
19486     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
19487     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
19488                                                    DAG.getBitcast(ClearVT, LHS),
19489                                                    Zero, Indices));
19490   };
19491
19492   // Determine maximum split level (byte level masking).
19493   int MaxSplit = 1;
19494   if (RVT.getScalarSizeInBits() % 8 == 0)
19495     MaxSplit = RVT.getScalarSizeInBits() / 8;
19496
19497   for (int Split = 1; Split <= MaxSplit; ++Split)
19498     if (RVT.getScalarSizeInBits() % Split == 0)
19499       if (SDValue S = BuildClearMask(Split))
19500         return S;
19501
19502   return SDValue();
19503 }
19504
19505 /// If a vector binop is performed on splat values, it may be profitable to
19506 /// extract, scalarize, and insert/splat.
19507 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
19508   SDValue N0 = N->getOperand(0);
19509   SDValue N1 = N->getOperand(1);
19510   unsigned Opcode = N->getOpcode();
19511   EVT VT = N->getValueType(0);
19512   EVT EltVT = VT.getVectorElementType();
19513   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19514
19515   // TODO: Remove/replace the extract cost check? If the elements are available
19516   //       as scalars, then there may be no extract cost. Should we ask if
19517   //       inserting a scalar back into a vector is cheap instead?
19518   int Index0, Index1;
19519   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
19520   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
19521   if (!Src0 || !Src1 || Index0 != Index1 ||
19522       Src0.getValueType().getVectorElementType() != EltVT ||
19523       Src1.getValueType().getVectorElementType() != EltVT ||
19524       !TLI.isExtractVecEltCheap(VT, Index0) ||
19525       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
19526     return SDValue();
19527
19528   SDLoc DL(N);
19529   SDValue IndexC =
19530       DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
19531   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
19532   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
19533   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
19534
19535   // If all lanes but 1 are undefined, no need to splat the scalar result.
19536   // TODO: Keep track of undefs and use that info in the general case.
19537   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
19538       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
19539       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
19540     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
19541     // build_vec ..undef, (bo X, Y), undef...
19542     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
19543     Ops[Index0] = ScalarBO;
19544     return DAG.getBuildVector(VT, DL, Ops);
19545   }
19546
19547   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
19548   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
19549   return DAG.getBuildVector(VT, DL, Ops);
19550 }
19551
19552 /// Visit a binary vector operation, like ADD.
19553 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
19554   assert(N->getValueType(0).isVector() &&
19555          "SimplifyVBinOp only works on vectors!");
19556
19557   SDValue LHS = N->getOperand(0);
19558   SDValue RHS = N->getOperand(1);
19559   SDValue Ops[] = {LHS, RHS};
19560   EVT VT = N->getValueType(0);
19561   unsigned Opcode = N->getOpcode();
19562
19563   // See if we can constant fold the vector operation.
19564   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
19565           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
19566     return Fold;
19567
19568   // Move unary shuffles with identical masks after a vector binop:
19569   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
19570   //   --> shuffle (VBinOp A, B), Undef, Mask
19571   // This does not require type legality checks because we are creating the
19572   // same types of operations that are in the original sequence. We do have to
19573   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
19574   // though. This code is adapted from the identical transform in instcombine.
19575   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
19576       Opcode != ISD::UREM && Opcode != ISD::SREM &&
19577       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
19578     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
19579     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
19580     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
19581         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
19582         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
19583       SDLoc DL(N);
19584       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
19585                                      RHS.getOperand(0), N->getFlags());
19586       SDValue UndefV = LHS.getOperand(1);
19587       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
19588     }
19589   }
19590
19591   // The following pattern is likely to emerge with vector reduction ops. Moving
19592   // the binary operation ahead of insertion may allow using a narrower vector
19593   // instruction that has better performance than the wide version of the op:
19594   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
19595   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
19596       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
19597       LHS.getOperand(2) == RHS.getOperand(2) &&
19598       (LHS.hasOneUse() || RHS.hasOneUse())) {
19599     SDValue X = LHS.getOperand(1);
19600     SDValue Y = RHS.getOperand(1);
19601     SDValue Z = LHS.getOperand(2);
19602     EVT NarrowVT = X.getValueType();
19603     if (NarrowVT == Y.getValueType() &&
19604         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19605       // (binop undef, undef) may not return undef, so compute that result.
19606       SDLoc DL(N);
19607       SDValue VecC =
19608           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
19609       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
19610       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
19611     }
19612   }
19613
19614   // Make sure all but the first op are undef or constant.
19615   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
19616     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
19617            std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
19618                      [](const SDValue &Op) {
19619                        return Op.isUndef() ||
19620                               ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
19621                      });
19622   };
19623
19624   // The following pattern is likely to emerge with vector reduction ops. Moving
19625   // the binary operation ahead of the concat may allow using a narrower vector
19626   // instruction that has better performance than the wide version of the op:
19627   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
19628   //   concat (VBinOp X, Y), VecC
19629   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
19630       (LHS.hasOneUse() || RHS.hasOneUse())) {
19631     EVT NarrowVT = LHS.getOperand(0).getValueType();
19632     if (NarrowVT == RHS.getOperand(0).getValueType() &&
19633         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19634       SDLoc DL(N);
19635       unsigned NumOperands = LHS.getNumOperands();
19636       SmallVector<SDValue, 4> ConcatOps;
19637       for (unsigned i = 0; i != NumOperands; ++i) {
19638         // This constant fold for operands 1 and up.
19639         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
19640                                         RHS.getOperand(i)));
19641       }
19642
19643       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19644     }
19645   }
19646
19647   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
19648     return V;
19649
19650   return SDValue();
19651 }
19652
19653 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
19654                                     SDValue N2) {
19655   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
19656
19657   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
19658                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
19659
19660   // If we got a simplified select_cc node back from SimplifySelectCC, then
19661   // break it down into a new SETCC node, and a new SELECT node, and then return
19662   // the SELECT node, since we were called with a SELECT node.
19663   if (SCC.getNode()) {
19664     // Check to see if we got a select_cc back (to turn into setcc/select).
19665     // Otherwise, just return whatever node we got back, like fabs.
19666     if (SCC.getOpcode() == ISD::SELECT_CC) {
19667       const SDNodeFlags Flags = N0.getNode()->getFlags();
19668       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
19669                                   N0.getValueType(),
19670                                   SCC.getOperand(0), SCC.getOperand(1),
19671                                   SCC.getOperand(4), Flags);
19672       AddToWorklist(SETCC.getNode());
19673       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
19674                                          SCC.getOperand(2), SCC.getOperand(3));
19675       SelectNode->setFlags(Flags);
19676       return SelectNode;
19677     }
19678
19679     return SCC;
19680   }
19681   return SDValue();
19682 }
19683
19684 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
19685 /// being selected between, see if we can simplify the select.  Callers of this
19686 /// should assume that TheSelect is deleted if this returns true.  As such, they
19687 /// should return the appropriate thing (e.g. the node) back to the top-level of
19688 /// the DAG combiner loop to avoid it being looked at.
19689 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
19690                                     SDValue RHS) {
19691   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19692   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
19693   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
19694     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
19695       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
19696       SDValue Sqrt = RHS;
19697       ISD::CondCode CC;
19698       SDValue CmpLHS;
19699       const ConstantFPSDNode *Zero = nullptr;
19700
19701       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
19702         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
19703         CmpLHS = TheSelect->getOperand(0);
19704         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
19705       } else {
19706         // SELECT or VSELECT
19707         SDValue Cmp = TheSelect->getOperand(0);
19708         if (Cmp.getOpcode() == ISD::SETCC) {
19709           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
19710           CmpLHS = Cmp.getOperand(0);
19711           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
19712         }
19713       }
19714       if (Zero && Zero->isZero() &&
19715           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
19716           CC == ISD::SETULT || CC == ISD::SETLT)) {
19717         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19718         CombineTo(TheSelect, Sqrt);
19719         return true;
19720       }
19721     }
19722   }
19723   // Cannot simplify select with vector condition
19724   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
19725
19726   // If this is a select from two identical things, try to pull the operation
19727   // through the select.
19728   if (LHS.getOpcode() != RHS.getOpcode() ||
19729       !LHS.hasOneUse() || !RHS.hasOneUse())
19730     return false;
19731
19732   // If this is a load and the token chain is identical, replace the select
19733   // of two loads with a load through a select of the address to load from.
19734   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19735   // constants have been dropped into the constant pool.
19736   if (LHS.getOpcode() == ISD::LOAD) {
19737     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
19738     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
19739
19740     // Token chains must be identical.
19741     if (LHS.getOperand(0) != RHS.getOperand(0) ||
19742         // Do not let this transformation reduce the number of volatile loads.
19743         // Be conservative for atomics for the moment
19744         // TODO: This does appear to be legal for unordered atomics (see D66309)
19745         !LLD->isSimple() || !RLD->isSimple() ||
19746         // FIXME: If either is a pre/post inc/dec load,
19747         // we'd need to split out the address adjustment.
19748         LLD->isIndexed() || RLD->isIndexed() ||
19749         // If this is an EXTLOAD, the VT's must match.
19750         LLD->getMemoryVT() != RLD->getMemoryVT() ||
19751         // If this is an EXTLOAD, the kind of extension must match.
19752         (LLD->getExtensionType() != RLD->getExtensionType() &&
19753          // The only exception is if one of the extensions is anyext.
19754          LLD->getExtensionType() != ISD::EXTLOAD &&
19755          RLD->getExtensionType() != ISD::EXTLOAD) ||
19756         // FIXME: this discards src value information.  This is
19757         // over-conservative. It would be beneficial to be able to remember
19758         // both potential memory locations.  Since we are discarding
19759         // src value info, don't do the transformation if the memory
19760         // locations are not in the default address space.
19761         LLD->getPointerInfo().getAddrSpace() != 0 ||
19762         RLD->getPointerInfo().getAddrSpace() != 0 ||
19763         // We can't produce a CMOV of a TargetFrameIndex since we won't
19764         // generate the address generation required.
19765         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19766         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19767         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
19768                                       LLD->getBasePtr().getValueType()))
19769       return false;
19770
19771     // The loads must not depend on one another.
19772     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
19773       return false;
19774
19775     // Check that the select condition doesn't reach either load.  If so,
19776     // folding this will induce a cycle into the DAG.  If not, this is safe to
19777     // xform, so create a select of the addresses.
19778
19779     SmallPtrSet<const SDNode *, 32> Visited;
19780     SmallVector<const SDNode *, 16> Worklist;
19781
19782     // Always fail if LLD and RLD are not independent. TheSelect is a
19783     // predecessor to all Nodes in question so we need not search past it.
19784
19785     Visited.insert(TheSelect);
19786     Worklist.push_back(LLD);
19787     Worklist.push_back(RLD);
19788
19789     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
19790         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
19791       return false;
19792
19793     SDValue Addr;
19794     if (TheSelect->getOpcode() == ISD::SELECT) {
19795       // We cannot do this optimization if any pair of {RLD, LLD} is a
19796       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19797       // Loads, we only need to check if CondNode is a successor to one of the
19798       // loads. We can further avoid this if there's no use of their chain
19799       // value.
19800       SDNode *CondNode = TheSelect->getOperand(0).getNode();
19801       Worklist.push_back(CondNode);
19802
19803       if ((LLD->hasAnyUseOfValue(1) &&
19804            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19805           (RLD->hasAnyUseOfValue(1) &&
19806            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19807         return false;
19808
19809       Addr = DAG.getSelect(SDLoc(TheSelect),
19810                            LLD->getBasePtr().getValueType(),
19811                            TheSelect->getOperand(0), LLD->getBasePtr(),
19812                            RLD->getBasePtr());
19813     } else {  // Otherwise SELECT_CC
19814       // We cannot do this optimization if any pair of {RLD, LLD} is a
19815       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19816       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19817       // one of the loads. We can further avoid this if there's no use of their
19818       // chain value.
19819
19820       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
19821       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
19822       Worklist.push_back(CondLHS);
19823       Worklist.push_back(CondRHS);
19824
19825       if ((LLD->hasAnyUseOfValue(1) &&
19826            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19827           (RLD->hasAnyUseOfValue(1) &&
19828            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19829         return false;
19830
19831       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
19832                          LLD->getBasePtr().getValueType(),
19833                          TheSelect->getOperand(0),
19834                          TheSelect->getOperand(1),
19835                          LLD->getBasePtr(), RLD->getBasePtr(),
19836                          TheSelect->getOperand(4));
19837     }
19838
19839     SDValue Load;
19840     // It is safe to replace the two loads if they have different alignments,
19841     // but the new load must be the minimum (most restrictive) alignment of the
19842     // inputs.
19843     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
19844     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
19845     if (!RLD->isInvariant())
19846       MMOFlags &= ~MachineMemOperand::MOInvariant;
19847     if (!RLD->isDereferenceable())
19848       MMOFlags &= ~MachineMemOperand::MODereferenceable;
19849     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
19850       // FIXME: Discards pointer and AA info.
19851       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
19852                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
19853                          MMOFlags);
19854     } else {
19855       // FIXME: Discards pointer and AA info.
19856       Load = DAG.getExtLoad(
19857           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
19858                                                   : LLD->getExtensionType(),
19859           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
19860           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
19861     }
19862
19863     // Users of the select now use the result of the load.
19864     CombineTo(TheSelect, Load);
19865
19866     // Users of the old loads now use the new load's chain.  We know the
19867     // old-load value is dead now.
19868     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
19869     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
19870     return true;
19871   }
19872
19873   return false;
19874 }
19875
19876 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19877 /// bitwise 'and'.
19878 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
19879                                             SDValue N1, SDValue N2, SDValue N3,
19880                                             ISD::CondCode CC) {
19881   // If this is a select where the false operand is zero and the compare is a
19882   // check of the sign bit, see if we can perform the "gzip trick":
19883   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19884   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19885   EVT XType = N0.getValueType();
19886   EVT AType = N2.getValueType();
19887   if (!isNullConstant(N3) || !XType.bitsGE(AType))
19888     return SDValue();
19889
19890   // If the comparison is testing for a positive value, we have to invert
19891   // the sign bit mask, so only do that transform if the target has a bitwise
19892   // 'and not' instruction (the invert is free).
19893   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
19894     // (X > -1) ? A : 0
19895     // (X >  0) ? X : 0 <-- This is canonical signed max.
19896     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
19897       return SDValue();
19898   } else if (CC == ISD::SETLT) {
19899     // (X <  0) ? A : 0
19900     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
19901     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
19902       return SDValue();
19903   } else {
19904     return SDValue();
19905   }
19906
19907   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
19908   // constant.
19909   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
19910   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
19911   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
19912     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
19913     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
19914     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
19915     AddToWorklist(Shift.getNode());
19916
19917     if (XType.bitsGT(AType)) {
19918       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19919       AddToWorklist(Shift.getNode());
19920     }
19921
19922     if (CC == ISD::SETGT)
19923       Shift = DAG.getNOT(DL, Shift, AType);
19924
19925     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19926   }
19927
19928   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
19929   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
19930   AddToWorklist(Shift.getNode());
19931
19932   if (XType.bitsGT(AType)) {
19933     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
19934     AddToWorklist(Shift.getNode());
19935   }
19936
19937   if (CC == ISD::SETGT)
19938     Shift = DAG.getNOT(DL, Shift, AType);
19939
19940   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
19941 }
19942
19943 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
19944 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
19945 /// in it. This may be a win when the constant is not otherwise available
19946 /// because it replaces two constant pool loads with one.
19947 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
19948     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
19949     ISD::CondCode CC) {
19950   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
19951     return SDValue();
19952
19953   // If we are before legalize types, we want the other legalization to happen
19954   // first (for example, to avoid messing with soft float).
19955   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
19956   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
19957   EVT VT = N2.getValueType();
19958   if (!TV || !FV || !TLI.isTypeLegal(VT))
19959     return SDValue();
19960
19961   // If a constant can be materialized without loads, this does not make sense.
19962   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
19963       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
19964       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
19965     return SDValue();
19966
19967   // If both constants have multiple uses, then we won't need to do an extra
19968   // load. The values are likely around in registers for other users.
19969   if (!TV->hasOneUse() && !FV->hasOneUse())
19970     return SDValue();
19971
19972   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
19973                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
19974   Type *FPTy = Elts[0]->getType();
19975   const DataLayout &TD = DAG.getDataLayout();
19976
19977   // Create a ConstantArray of the two constants.
19978   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
19979   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
19980                                       TD.getPrefTypeAlignment(FPTy));
19981   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
19982
19983   // Get offsets to the 0 and 1 elements of the array, so we can select between
19984   // them.
19985   SDValue Zero = DAG.getIntPtrConstant(0, DL);
19986   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
19987   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
19988   SDValue Cond =
19989       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
19990   AddToWorklist(Cond.getNode());
19991   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
19992   AddToWorklist(CstOffset.getNode());
19993   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
19994   AddToWorklist(CPIdx.getNode());
19995   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
19996                      MachinePointerInfo::getConstantPool(
19997                          DAG.getMachineFunction()), Alignment);
19998 }
19999
20000 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
20001 /// where 'cond' is the comparison specified by CC.
20002 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
20003                                       SDValue N2, SDValue N3, ISD::CondCode CC,
20004                                       bool NotExtCompare) {
20005   // (x ? y : y) -> y.
20006   if (N2 == N3) return N2;
20007
20008   EVT CmpOpVT = N0.getValueType();
20009   EVT CmpResVT = getSetCCResultType(CmpOpVT);
20010   EVT VT = N2.getValueType();
20011   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
20012   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
20013   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
20014
20015   // Determine if the condition we're dealing with is constant.
20016   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
20017     AddToWorklist(SCC.getNode());
20018     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
20019       // fold select_cc true, x, y -> x
20020       // fold select_cc false, x, y -> y
20021       return !(SCCC->isNullValue()) ? N2 : N3;
20022     }
20023   }
20024
20025   if (SDValue V =
20026           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
20027     return V;
20028
20029   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
20030     return V;
20031
20032   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
20033   // where y is has a single bit set.
20034   // A plaintext description would be, we can turn the SELECT_CC into an AND
20035   // when the condition can be materialized as an all-ones register.  Any
20036   // single bit-test can be materialized as an all-ones register with
20037   // shift-left and shift-right-arith.
20038   // TODO: The operation legality checks could be loosened to include "custom",
20039   //       but that may cause regressions for targets that do not have shift
20040   //       instructions.
20041   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
20042       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&
20043       TLI.isOperationLegal(ISD::SHL, VT) &&
20044       TLI.isOperationLegal(ISD::SRA, VT)) {
20045     SDValue AndLHS = N0->getOperand(0);
20046     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
20047     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
20048       // Shift the tested bit over the sign bit.
20049       const APInt &AndMask = ConstAndRHS->getAPIntValue();
20050       SDValue ShlAmt =
20051         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
20052                         getShiftAmountTy(AndLHS.getValueType()));
20053       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
20054
20055       // Now arithmetic right shift it all the way over, so the result is either
20056       // all-ones, or zero.
20057       SDValue ShrAmt =
20058         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
20059                         getShiftAmountTy(Shl.getValueType()));
20060       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
20061
20062       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
20063     }
20064   }
20065
20066   // fold select C, 16, 0 -> shl C, 4
20067   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
20068   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
20069
20070   if ((Fold || Swap) &&
20071       TLI.getBooleanContents(CmpOpVT) ==
20072           TargetLowering::ZeroOrOneBooleanContent &&
20073       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
20074
20075     if (Swap) {
20076       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
20077       std::swap(N2C, N3C);
20078     }
20079
20080     // If the caller doesn't want us to simplify this into a zext of a compare,
20081     // don't do it.
20082     if (NotExtCompare && N2C->isOne())
20083       return SDValue();
20084
20085     SDValue Temp, SCC;
20086     // zext (setcc n0, n1)
20087     if (LegalTypes) {
20088       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
20089       if (VT.bitsLT(SCC.getValueType()))
20090         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
20091       else
20092         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20093     } else {
20094       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
20095       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20096     }
20097
20098     AddToWorklist(SCC.getNode());
20099     AddToWorklist(Temp.getNode());
20100
20101     if (N2C->isOne())
20102       return Temp;
20103
20104     // shl setcc result by log2 n2c
20105     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
20106                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
20107                                        SDLoc(Temp),
20108                                        getShiftAmountTy(Temp.getValueType())));
20109   }
20110
20111   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
20112   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
20113   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
20114   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
20115   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
20116   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
20117   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
20118   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
20119   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
20120     SDValue ValueOnZero = N2;
20121     SDValue Count = N3;
20122     // If the condition is NE instead of E, swap the operands.
20123     if (CC == ISD::SETNE)
20124       std::swap(ValueOnZero, Count);
20125     // Check if the value on zero is a constant equal to the bits in the type.
20126     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
20127       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
20128         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
20129         // legal, combine to just cttz.
20130         if ((Count.getOpcode() == ISD::CTTZ ||
20131              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
20132             N0 == Count.getOperand(0) &&
20133             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
20134           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
20135         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
20136         // legal, combine to just ctlz.
20137         if ((Count.getOpcode() == ISD::CTLZ ||
20138              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
20139             N0 == Count.getOperand(0) &&
20140             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
20141           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
20142       }
20143     }
20144   }
20145
20146   return SDValue();
20147 }
20148
20149 /// This is a stub for TargetLowering::SimplifySetCC.
20150 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
20151                                    ISD::CondCode Cond, const SDLoc &DL,
20152                                    bool foldBooleans) {
20153   TargetLowering::DAGCombinerInfo
20154     DagCombineInfo(DAG, Level, false, this);
20155   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
20156 }
20157
20158 /// Given an ISD::SDIV node expressing a divide by constant, return
20159 /// a DAG expression to select that will generate the same value by multiplying
20160 /// by a magic number.
20161 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20162 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
20163   // when optimising for minimum size, we don't want to expand a div to a mul
20164   // and a shift.
20165   if (DAG.getMachineFunction().getFunction().hasMinSize())
20166     return SDValue();
20167
20168   SmallVector<SDNode *, 8> Built;
20169   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
20170     for (SDNode *N : Built)
20171       AddToWorklist(N);
20172     return S;
20173   }
20174
20175   return SDValue();
20176 }
20177
20178 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
20179 /// DAG expression that will generate the same value by right shifting.
20180 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
20181   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
20182   if (!C)
20183     return SDValue();
20184
20185   // Avoid division by zero.
20186   if (C->isNullValue())
20187     return SDValue();
20188
20189   SmallVector<SDNode *, 8> Built;
20190   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
20191     for (SDNode *N : Built)
20192       AddToWorklist(N);
20193     return S;
20194   }
20195
20196   return SDValue();
20197 }
20198
20199 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
20200 /// expression that will generate the same value by multiplying by a magic
20201 /// number.
20202 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20203 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
20204   // when optimising for minimum size, we don't want to expand a div to a mul
20205   // and a shift.
20206   if (DAG.getMachineFunction().getFunction().hasMinSize())
20207     return SDValue();
20208
20209   SmallVector<SDNode *, 8> Built;
20210   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
20211     for (SDNode *N : Built)
20212       AddToWorklist(N);
20213     return S;
20214   }
20215
20216   return SDValue();
20217 }
20218
20219 /// Determines the LogBase2 value for a non-null input value using the
20220 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
20221 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
20222   EVT VT = V.getValueType();
20223   unsigned EltBits = VT.getScalarSizeInBits();
20224   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
20225   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
20226   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
20227   return LogBase2;
20228 }
20229
20230 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20231 /// For the reciprocal, we need to find the zero of the function:
20232 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
20233 ///     =>
20234 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
20235 ///     does not require additional intermediate precision]
20236 /// For the last iteration, put numerator N into it to gain more precision:
20237 ///   Result = N X_i + X_i (N - N A X_i)
20238 SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
20239                                       SDNodeFlags Flags) {
20240   if (Level >= AfterLegalizeDAG)
20241     return SDValue();
20242
20243   // TODO: Handle half and/or extended types?
20244   EVT VT = Op.getValueType();
20245   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20246     return SDValue();
20247
20248   // If estimates are explicitly disabled for this function, we're done.
20249   MachineFunction &MF = DAG.getMachineFunction();
20250   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
20251   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20252     return SDValue();
20253
20254   // Estimates may be explicitly enabled for this type with a custom number of
20255   // refinement steps.
20256   int Iterations = TLI.getDivRefinementSteps(VT, MF);
20257   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
20258     AddToWorklist(Est.getNode());
20259
20260     SDLoc DL(Op);
20261     if (Iterations) {
20262       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
20263
20264       // Newton iterations: Est = Est + Est (N - Arg * Est)
20265       // If this is the last iteration, also multiply by the numerator.
20266       for (int i = 0; i < Iterations; ++i) {
20267         SDValue MulEst = Est;
20268
20269         if (i == Iterations - 1) {
20270           MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
20271           AddToWorklist(MulEst.getNode());
20272         }
20273
20274         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
20275         AddToWorklist(NewEst.getNode());
20276
20277         NewEst = DAG.getNode(ISD::FSUB, DL, VT,
20278                              (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
20279         AddToWorklist(NewEst.getNode());
20280
20281         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20282         AddToWorklist(NewEst.getNode());
20283
20284         Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
20285         AddToWorklist(Est.getNode());
20286       }
20287     } else {
20288       // If no iterations are available, multiply with N.
20289       Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
20290       AddToWorklist(Est.getNode());
20291     }
20292
20293     return Est;
20294   }
20295
20296   return SDValue();
20297 }
20298
20299 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20300 /// For the reciprocal sqrt, we need to find the zero of the function:
20301 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20302 ///     =>
20303 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
20304 /// As a result, we precompute A/2 prior to the iteration loop.
20305 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
20306                                          unsigned Iterations,
20307                                          SDNodeFlags Flags, bool Reciprocal) {
20308   EVT VT = Arg.getValueType();
20309   SDLoc DL(Arg);
20310   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
20311
20312   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
20313   // this entire sequence requires only one FP constant.
20314   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
20315   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
20316
20317   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
20318   for (unsigned i = 0; i < Iterations; ++i) {
20319     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
20320     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
20321     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
20322     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20323   }
20324
20325   // If non-reciprocal square root is requested, multiply the result by Arg.
20326   if (!Reciprocal)
20327     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
20328
20329   return Est;
20330 }
20331
20332 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20333 /// For the reciprocal sqrt, we need to find the zero of the function:
20334 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20335 ///     =>
20336 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
20337 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
20338                                          unsigned Iterations,
20339                                          SDNodeFlags Flags, bool Reciprocal) {
20340   EVT VT = Arg.getValueType();
20341   SDLoc DL(Arg);
20342   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
20343   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
20344
20345   // This routine must enter the loop below to work correctly
20346   // when (Reciprocal == false).
20347   assert(Iterations > 0);
20348
20349   // Newton iterations for reciprocal square root:
20350   // E = (E * -0.5) * ((A * E) * E + -3.0)
20351   for (unsigned i = 0; i < Iterations; ++i) {
20352     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
20353     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
20354     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
20355
20356     // When calculating a square root at the last iteration build:
20357     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
20358     // (notice a common subexpression)
20359     SDValue LHS;
20360     if (Reciprocal || (i + 1) < Iterations) {
20361       // RSQRT: LHS = (E * -0.5)
20362       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
20363     } else {
20364       // SQRT: LHS = (A * E) * -0.5
20365       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
20366     }
20367
20368     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
20369   }
20370
20371   return Est;
20372 }
20373
20374 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
20375 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
20376 /// Op can be zero.
20377 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
20378                                            bool Reciprocal) {
20379   if (Level >= AfterLegalizeDAG)
20380     return SDValue();
20381
20382   // TODO: Handle half and/or extended types?
20383   EVT VT = Op.getValueType();
20384   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20385     return SDValue();
20386
20387   // If estimates are explicitly disabled for this function, we're done.
20388   MachineFunction &MF = DAG.getMachineFunction();
20389   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
20390   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20391     return SDValue();
20392
20393   // Estimates may be explicitly enabled for this type with a custom number of
20394   // refinement steps.
20395   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
20396
20397   bool UseOneConstNR = false;
20398   if (SDValue Est =
20399       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
20400                           Reciprocal)) {
20401     AddToWorklist(Est.getNode());
20402
20403     if (Iterations) {
20404       Est = UseOneConstNR
20405             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
20406             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
20407
20408       if (!Reciprocal) {
20409         // The estimate is now completely wrong if the input was exactly 0.0 or
20410         // possibly a denormal. Force the answer to 0.0 for those cases.
20411         SDLoc DL(Op);
20412         EVT CCVT = getSetCCResultType(VT);
20413         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
20414         const Function &F = DAG.getMachineFunction().getFunction();
20415         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
20416         if (Denorms.getValueAsString().equals("ieee")) {
20417           // fabs(X) < SmallestNormal ? 0.0 : Est
20418           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
20419           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
20420           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
20421           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20422           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
20423           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
20424           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
20425         } else {
20426           // X == 0.0 ? 0.0 : Est
20427           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20428           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
20429           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
20430         }
20431       }
20432     }
20433     return Est;
20434   }
20435
20436   return SDValue();
20437 }
20438
20439 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20440   return buildSqrtEstimateImpl(Op, Flags, true);
20441 }
20442
20443 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20444   return buildSqrtEstimateImpl(Op, Flags, false);
20445 }
20446
20447 /// Return true if there is any possibility that the two addresses overlap.
20448 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
20449
20450   struct MemUseCharacteristics {
20451     bool IsVolatile;
20452     bool IsAtomic;
20453     SDValue BasePtr;
20454     int64_t Offset;
20455     Optional<int64_t> NumBytes;
20456     MachineMemOperand *MMO;
20457   };
20458
20459   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
20460     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
20461       int64_t Offset = 0;
20462       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
20463         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
20464                      ? C->getSExtValue()
20465                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
20466                            ? -1 * C->getSExtValue()
20467                            : 0;
20468       return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
20469               Offset /*base offset*/,
20470               Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
20471               LSN->getMemOperand()};
20472     }
20473     if (const auto *LN = cast<LifetimeSDNode>(N))
20474       return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
20475               (LN->hasOffset()) ? LN->getOffset() : 0,
20476               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
20477                                 : Optional<int64_t>(),
20478               (MachineMemOperand *)nullptr};
20479     // Default.
20480     return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
20481             (int64_t)0 /*offset*/,
20482             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
20483   };
20484
20485   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
20486                         MUC1 = getCharacteristics(Op1);
20487
20488   // If they are to the same address, then they must be aliases.
20489   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
20490       MUC0.Offset == MUC1.Offset)
20491     return true;
20492
20493   // If they are both volatile then they cannot be reordered.
20494   if (MUC0.IsVolatile && MUC1.IsVolatile)
20495     return true;
20496
20497   // Be conservative about atomics for the moment
20498   // TODO: This is way overconservative for unordered atomics (see D66309)
20499   if (MUC0.IsAtomic && MUC1.IsAtomic)
20500     return true;
20501
20502   if (MUC0.MMO && MUC1.MMO) {
20503     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20504         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20505       return false;
20506   }
20507
20508   // Try to prove that there is aliasing, or that there is no aliasing. Either
20509   // way, we can return now. If nothing can be proved, proceed with more tests.
20510   bool IsAlias;
20511   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
20512                                        DAG, IsAlias))
20513     return IsAlias;
20514
20515   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
20516   // either are not known.
20517   if (!MUC0.MMO || !MUC1.MMO)
20518     return true;
20519
20520   // If one operation reads from invariant memory, and the other may store, they
20521   // cannot alias. These should really be checking the equivalent of mayWrite,
20522   // but it only matters for memory nodes other than load /store.
20523   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20524       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20525     return false;
20526
20527   // If we know required SrcValue1 and SrcValue2 have relatively large
20528   // alignment compared to the size and offset of the access, we may be able
20529   // to prove they do not alias. This check is conservative for now to catch
20530   // cases created by splitting vector types.
20531   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
20532   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
20533   unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
20534   unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
20535   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
20536       MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
20537       *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
20538     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
20539     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
20540
20541     // There is no overlap between these relatively aligned accesses of
20542     // similar size. Return no alias.
20543     if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
20544         (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
20545       return false;
20546   }
20547
20548   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
20549                    ? CombinerGlobalAA
20550                    : DAG.getSubtarget().useAA();
20551 #ifndef NDEBUG
20552   if (CombinerAAOnlyFunc.getNumOccurrences() &&
20553       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
20554     UseAA = false;
20555 #endif
20556
20557   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
20558     // Use alias analysis information.
20559     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
20560     int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
20561     int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
20562     AliasResult AAResult = AA->alias(
20563         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
20564                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
20565         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
20566                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
20567     if (AAResult == NoAlias)
20568       return false;
20569   }
20570
20571   // Otherwise we have to assume they alias.
20572   return true;
20573 }
20574
20575 /// Walk up chain skipping non-aliasing memory nodes,
20576 /// looking for aliasing nodes and adding them to the Aliases vector.
20577 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
20578                                    SmallVectorImpl<SDValue> &Aliases) {
20579   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
20580   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
20581
20582   // Get alias information for node.
20583   // TODO: relax aliasing for unordered atomics (see D66309)
20584   const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
20585
20586   // Starting off.
20587   Chains.push_back(OriginalChain);
20588   unsigned Depth = 0;
20589
20590   // Attempt to improve chain by a single step
20591   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
20592     switch (C.getOpcode()) {
20593     case ISD::EntryToken:
20594       // No need to mark EntryToken.
20595       C = SDValue();
20596       return true;
20597     case ISD::LOAD:
20598     case ISD::STORE: {
20599       // Get alias information for C.
20600       // TODO: Relax aliasing for unordered atomics (see D66309)
20601       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
20602                       cast<LSBaseSDNode>(C.getNode())->isSimple();
20603       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
20604         // Look further up the chain.
20605         C = C.getOperand(0);
20606         return true;
20607       }
20608       // Alias, so stop here.
20609       return false;
20610     }
20611
20612     case ISD::CopyFromReg:
20613       // Always forward past past CopyFromReg.
20614       C = C.getOperand(0);
20615       return true;
20616
20617     case ISD::LIFETIME_START:
20618     case ISD::LIFETIME_END: {
20619       // We can forward past any lifetime start/end that can be proven not to
20620       // alias the memory access.
20621       if (!isAlias(N, C.getNode())) {
20622         // Look further up the chain.
20623         C = C.getOperand(0);
20624         return true;
20625       }
20626       return false;
20627     }
20628     default:
20629       return false;
20630     }
20631   };
20632
20633   // Look at each chain and determine if it is an alias.  If so, add it to the
20634   // aliases list.  If not, then continue up the chain looking for the next
20635   // candidate.
20636   while (!Chains.empty()) {
20637     SDValue Chain = Chains.pop_back_val();
20638
20639     // Don't bother if we've seen Chain before.
20640     if (!Visited.insert(Chain.getNode()).second)
20641       continue;
20642
20643     // For TokenFactor nodes, look at each operand and only continue up the
20644     // chain until we reach the depth limit.
20645     //
20646     // FIXME: The depth check could be made to return the last non-aliasing
20647     // chain we found before we hit a tokenfactor rather than the original
20648     // chain.
20649     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
20650       Aliases.clear();
20651       Aliases.push_back(OriginalChain);
20652       return;
20653     }
20654
20655     if (Chain.getOpcode() == ISD::TokenFactor) {
20656       // We have to check each of the operands of the token factor for "small"
20657       // token factors, so we queue them up.  Adding the operands to the queue
20658       // (stack) in reverse order maintains the original order and increases the
20659       // likelihood that getNode will find a matching token factor (CSE.)
20660       if (Chain.getNumOperands() > 16) {
20661         Aliases.push_back(Chain);
20662         continue;
20663       }
20664       for (unsigned n = Chain.getNumOperands(); n;)
20665         Chains.push_back(Chain.getOperand(--n));
20666       ++Depth;
20667       continue;
20668     }
20669     // Everything else
20670     if (ImproveChain(Chain)) {
20671       // Updated Chain Found, Consider new chain if one exists.
20672       if (Chain.getNode())
20673         Chains.push_back(Chain);
20674       ++Depth;
20675       continue;
20676     }
20677     // No Improved Chain Possible, treat as Alias.
20678     Aliases.push_back(Chain);
20679   }
20680 }
20681
20682 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
20683 /// (aliasing node.)
20684 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
20685   if (OptLevel == CodeGenOpt::None)
20686     return OldChain;
20687
20688   // Ops for replacing token factor.
20689   SmallVector<SDValue, 8> Aliases;
20690
20691   // Accumulate all the aliases to this node.
20692   GatherAllAliases(N, OldChain, Aliases);
20693
20694   // If no operands then chain to entry token.
20695   if (Aliases.size() == 0)
20696     return DAG.getEntryNode();
20697
20698   // If a single operand then chain to it.  We don't need to revisit it.
20699   if (Aliases.size() == 1)
20700     return Aliases[0];
20701
20702   // Construct a custom tailored token factor.
20703   return DAG.getTokenFactor(SDLoc(N), Aliases);
20704 }
20705
20706 namespace {
20707 // TODO: Replace with with std::monostate when we move to C++17.
20708 struct UnitT { } Unit;
20709 bool operator==(const UnitT &, const UnitT &) { return true; }
20710 bool operator!=(const UnitT &, const UnitT &) { return false; }
20711 } // namespace
20712
20713 // This function tries to collect a bunch of potentially interesting
20714 // nodes to improve the chains of, all at once. This might seem
20715 // redundant, as this function gets called when visiting every store
20716 // node, so why not let the work be done on each store as it's visited?
20717 //
20718 // I believe this is mainly important because MergeConsecutiveStores
20719 // is unable to deal with merging stores of different sizes, so unless
20720 // we improve the chains of all the potential candidates up-front
20721 // before running MergeConsecutiveStores, it might only see some of
20722 // the nodes that will eventually be candidates, and then not be able
20723 // to go from a partially-merged state to the desired final
20724 // fully-merged state.
20725
20726 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
20727   SmallVector<StoreSDNode *, 8> ChainedStores;
20728   StoreSDNode *STChain = St;
20729   // Intervals records which offsets from BaseIndex have been covered. In
20730   // the common case, every store writes to the immediately previous address
20731   // space and thus merged with the previous interval at insertion time.
20732
20733   using IMap =
20734       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
20735   IMap::Allocator A;
20736   IMap Intervals(A);
20737
20738   // This holds the base pointer, index, and the offset in bytes from the base
20739   // pointer.
20740   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20741
20742   // We must have a base and an offset.
20743   if (!BasePtr.getBase().getNode())
20744     return false;
20745
20746   // Do not handle stores to undef base pointers.
20747   if (BasePtr.getBase().isUndef())
20748     return false;
20749
20750   // Add ST's interval.
20751   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
20752
20753   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
20754     // If the chain has more than one use, then we can't reorder the mem ops.
20755     if (!SDValue(Chain, 0)->hasOneUse())
20756       break;
20757     // TODO: Relax for unordered atomics (see D66309)
20758     if (!Chain->isSimple() || Chain->isIndexed())
20759       break;
20760
20761     // Find the base pointer and offset for this memory node.
20762     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
20763     // Check that the base pointer is the same as the original one.
20764     int64_t Offset;
20765     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
20766       break;
20767     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
20768     // Make sure we don't overlap with other intervals by checking the ones to
20769     // the left or right before inserting.
20770     auto I = Intervals.find(Offset);
20771     // If there's a next interval, we should end before it.
20772     if (I != Intervals.end() && I.start() < (Offset + Length))
20773       break;
20774     // If there's a previous interval, we should start after it.
20775     if (I != Intervals.begin() && (--I).stop() <= Offset)
20776       break;
20777     Intervals.insert(Offset, Offset + Length, Unit);
20778
20779     ChainedStores.push_back(Chain);
20780     STChain = Chain;
20781   }
20782
20783   // If we didn't find a chained store, exit.
20784   if (ChainedStores.size() == 0)
20785     return false;
20786
20787   // Improve all chained stores (St and ChainedStores members) starting from
20788   // where the store chain ended and return single TokenFactor.
20789   SDValue NewChain = STChain->getChain();
20790   SmallVector<SDValue, 8> TFOps;
20791   for (unsigned I = ChainedStores.size(); I;) {
20792     StoreSDNode *S = ChainedStores[--I];
20793     SDValue BetterChain = FindBetterChain(S, NewChain);
20794     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
20795         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
20796     TFOps.push_back(SDValue(S, 0));
20797     ChainedStores[I] = S;
20798   }
20799
20800   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20801   SDValue BetterChain = FindBetterChain(St, NewChain);
20802   SDValue NewST;
20803   if (St->isTruncatingStore())
20804     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
20805                               St->getBasePtr(), St->getMemoryVT(),
20806                               St->getMemOperand());
20807   else
20808     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
20809                          St->getBasePtr(), St->getMemOperand());
20810
20811   TFOps.push_back(NewST);
20812
20813   // If we improved every element of TFOps, then we've lost the dependence on
20814   // NewChain to successors of St and we need to add it back to TFOps. Do so at
20815   // the beginning to keep relative order consistent with FindBetterChains.
20816   auto hasImprovedChain = [&](SDValue ST) -> bool {
20817     return ST->getOperand(0) != NewChain;
20818   };
20819   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
20820   if (AddNewChain)
20821     TFOps.insert(TFOps.begin(), NewChain);
20822
20823   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
20824   CombineTo(St, TF);
20825
20826   // Add TF and its operands to the worklist.
20827   AddToWorklist(TF.getNode());
20828   for (const SDValue &Op : TF->ops())
20829     AddToWorklist(Op.getNode());
20830   AddToWorklist(STChain);
20831   return true;
20832 }
20833
20834 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
20835   if (OptLevel == CodeGenOpt::None)
20836     return false;
20837
20838   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20839
20840   // We must have a base and an offset.
20841   if (!BasePtr.getBase().getNode())
20842     return false;
20843
20844   // Do not handle stores to undef base pointers.
20845   if (BasePtr.getBase().isUndef())
20846     return false;
20847
20848   // Directly improve a chain of disjoint stores starting at St.
20849   if (parallelizeChainedStores(St))
20850     return true;
20851
20852   // Improve St's Chain..
20853   SDValue BetterChain = FindBetterChain(St, St->getChain());
20854   if (St->getChain() != BetterChain) {
20855     replaceStoreChain(St, BetterChain);
20856     return true;
20857   }
20858   return false;
20859 }
20860
20861 /// This is the entry point for the file.
20862 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
20863                            CodeGenOpt::Level OptLevel) {
20864   /// This is the main entry point to this class.
20865   DAGCombiner(*this, AA, OptLevel).Run(Level);
20866 }