lib/CodeGen/SelectionDAG/DAGCombiner.cpp

   1 //===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
  10 // both before and after the DAG is legalized.
  11 //
  12 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
  13 // primarily intended to handle simplification opportunities that are implicit
  14 // in the LLVM IR and exposed by the various codegen lowering phases.
  15 //
  16 //===----------------------------------------------------------------------===//
  17
  18 #include "llvm/ADT/APFloat.h"
  19 #include "llvm/ADT/APInt.h"
  20 #include "llvm/ADT/ArrayRef.h"
  21 #include "llvm/ADT/DenseMap.h"
  22 #include "llvm/ADT/IntervalMap.h"
  23 #include "llvm/ADT/None.h"
  24 #include "llvm/ADT/Optional.h"
  25 #include "llvm/ADT/STLExtras.h"
  26 #include "llvm/ADT/SetVector.h"
  27 #include "llvm/ADT/SmallPtrSet.h"
  28 #include "llvm/ADT/SmallSet.h"
  29 #include "llvm/ADT/SmallVector.h"
  30 #include "llvm/ADT/Statistic.h"
  31 #include "llvm/Analysis/AliasAnalysis.h"
  32 #include "llvm/Analysis/MemoryLocation.h"
  33 #include "llvm/CodeGen/DAGCombine.h"
  34 #include "llvm/CodeGen/ISDOpcodes.h"
  35 #include "llvm/CodeGen/MachineFrameInfo.h"
  36 #include "llvm/CodeGen/MachineFunction.h"
  37 #include "llvm/CodeGen/MachineMemOperand.h"
  38 #include "llvm/CodeGen/RuntimeLibcalls.h"
  39 #include "llvm/CodeGen/SelectionDAG.h"
  40 #include "llvm/CodeGen/SelectionDAGAddressAnalysis.h"
  41 #include "llvm/CodeGen/SelectionDAGNodes.h"
  42 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
  43 #include "llvm/CodeGen/TargetLowering.h"
  44 #include "llvm/CodeGen/TargetRegisterInfo.h"
  45 #include "llvm/CodeGen/TargetSubtargetInfo.h"
  46 #include "llvm/CodeGen/ValueTypes.h"
  47 #include "llvm/IR/Attributes.h"
  48 #include "llvm/IR/Constant.h"
  49 #include "llvm/IR/DataLayout.h"
  50 #include "llvm/IR/DerivedTypes.h"
  51 #include "llvm/IR/Function.h"
  52 #include "llvm/IR/LLVMContext.h"
  53 #include "llvm/IR/Metadata.h"
  54 #include "llvm/Support/Casting.h"
  55 #include "llvm/Support/CodeGen.h"
  56 #include "llvm/Support/CommandLine.h"
  57 #include "llvm/Support/Compiler.h"
  58 #include "llvm/Support/Debug.h"
  59 #include "llvm/Support/ErrorHandling.h"
  60 #include "llvm/Support/KnownBits.h"
  61 #include "llvm/Support/MachineValueType.h"
  62 #include "llvm/Support/MathExtras.h"
  63 #include "llvm/Support/raw_ostream.h"
  64 #include "llvm/Target/TargetMachine.h"
  65 #include "llvm/Target/TargetOptions.h"
  66 #include <algorithm>
  67 #include <cassert>
  68 #include <cstdint>
  69 #include <functional>
  70 #include <iterator>
  71 #include <string>
  72 #include <tuple>
  73 #include <utility>
  74
  75 using namespace llvm;
  76
  77 #define DEBUG_TYPE "dagcombine"
  78
  79 STATISTIC(NodesCombined   , "Number of dag nodes combined");
  80 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
  81 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
  82 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
  83 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
  84 STATISTIC(SlicedLoads, "Number of load sliced");
  85 STATISTIC(NumFPLogicOpsConv, "Number of logic ops converted to fp ops");
  86
  87 static cl::opt<bool>
  88 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
  89                  cl::desc("Enable DAG combiner's use of IR alias analysis"));
  90
  91 static cl::opt<bool>
  92 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
  93         cl::desc("Enable DAG combiner's use of TBAA"));
  94
  95 #ifndef NDEBUG
  96 static cl::opt<std::string>
  97 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
  98                    cl::desc("Only use DAG-combiner alias analysis in this"
  99                             " function"));
 100 #endif
 101
 102 /// Hidden option to stress test load slicing, i.e., when this option
 103 /// is enabled, load slicing bypasses most of its profitability guards.
 104 static cl::opt<bool>
 105 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
 106                   cl::desc("Bypass the profitability model of load slicing"),
 107                   cl::init(false));
 108
 109 static cl::opt<bool>
 110   MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
 111                     cl::desc("DAG combiner may split indexing from loads"));
 112
 113 static cl::opt<bool>
 114     EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
 115                        cl::desc("DAG combiner enable merging multiple stores "
 116                                 "into a wider store"));
 117
 118 static cl::opt<unsigned> TokenFactorInlineLimit(
 119     "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
 120     cl::desc("Limit the number of operands to inline for Token Factors"));
 121
 122 static cl::opt<unsigned> StoreMergeDependenceLimit(
 123     "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
 124     cl::desc("Limit the number of times for the same StoreNode and RootNode "
 125              "to bail out in store merging dependence check"));
 126
 127 namespace {
 128
 129   class DAGCombiner {
 130     SelectionDAG &DAG;
 131     const TargetLowering &TLI;
 132     CombineLevel Level;
 133     CodeGenOpt::Level OptLevel;
 134     bool LegalOperations = false;
 135     bool LegalTypes = false;
 136     bool ForCodeSize;
 137
 138     /// Worklist of all of the nodes that need to be simplified.
 139     ///
 140     /// This must behave as a stack -- new nodes to process are pushed onto the
 141     /// back and when processing we pop off of the back.
 142     ///
 143     /// The worklist will not contain duplicates but may contain null entries
 144     /// due to nodes being deleted from the underlying DAG.
 145     SmallVector<SDNode *, 64> Worklist;
 146
 147     /// Mapping from an SDNode to its position on the worklist.
 148     ///
 149     /// This is used to find and remove nodes from the worklist (by nulling
 150     /// them) when they are deleted from the underlying DAG. It relies on
 151     /// stable indices of nodes within the worklist.
 152     DenseMap<SDNode *, unsigned> WorklistMap;
 153     /// This records all nodes attempted to add to the worklist since we
 154     /// considered a new worklist entry. As we keep do not add duplicate nodes
 155     /// in the worklist, this is different from the tail of the worklist.
 156     SmallSetVector<SDNode *, 32> PruningList;
 157
 158     /// Set of nodes which have been combined (at least once).
 159     ///
 160     /// This is used to allow us to reliably add any operands of a DAG node
 161     /// which have not yet been combined to the worklist.
 162     SmallPtrSet<SDNode *, 32> CombinedNodes;
 163
 164     /// Map from candidate StoreNode to the pair of RootNode and count.
 165     /// The count is used to track how many times we have seen the StoreNode
 166     /// with the same RootNode bail out in dependence check. If we have seen
 167     /// the bail out for the same pair many times over a limit, we won't
 168     /// consider the StoreNode with the same RootNode as store merging
 169     /// candidate again.
 170     DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
 171
 172     // AA - Used for DAG load/store alias analysis.
 173     AliasAnalysis *AA;
 174
 175     /// When an instruction is simplified, add all users of the instruction to
 176     /// the work lists because they might get more simplified now.
 177     void AddUsersToWorklist(SDNode *N) {
 178       for (SDNode *Node : N->uses())
 179         AddToWorklist(Node);
 180     }
 181
 182     // Prune potentially dangling nodes. This is called after
 183     // any visit to a node, but should also be called during a visit after any
 184     // failed combine which may have created a DAG node.
 185     void clearAddedDanglingWorklistEntries() {
 186       // Check any nodes added to the worklist to see if they are prunable.
 187       while (!PruningList.empty()) {
 188         auto *N = PruningList.pop_back_val();
 189         if (N->use_empty())
 190           recursivelyDeleteUnusedNodes(N);
 191       }
 192     }
 193
 194     SDNode *getNextWorklistEntry() {
 195       // Before we do any work, remove nodes that are not in use.
 196       clearAddedDanglingWorklistEntries();
 197       SDNode *N = nullptr;
 198       // The Worklist holds the SDNodes in order, but it may contain null
 199       // entries.
 200       while (!N && !Worklist.empty()) {
 201         N = Worklist.pop_back_val();
 202       }
 203
 204       if (N) {
 205         bool GoodWorklistEntry = WorklistMap.erase(N);
 206         (void)GoodWorklistEntry;
 207         assert(GoodWorklistEntry &&
 208                "Found a worklist entry without a corresponding map entry!");
 209       }
 210       return N;
 211     }
 212
 213     /// Call the node-specific routine that folds each particular type of node.
 214     SDValue visit(SDNode *N);
 215
 216   public:
 217     DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
 218         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
 219           OptLevel(OL), AA(AA) {
 220       ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
 221
 222       MaximumLegalStoreInBits = 0;
 223       for (MVT VT : MVT::all_valuetypes())
 224         if (EVT(VT).isSimple() && VT != MVT::Other &&
 225             TLI.isTypeLegal(EVT(VT)) &&
 226             VT.getSizeInBits() >= MaximumLegalStoreInBits)
 227           MaximumLegalStoreInBits = VT.getSizeInBits();
 228     }
 229
 230     void ConsiderForPruning(SDNode *N) {
 231       // Mark this for potential pruning.
 232       PruningList.insert(N);
 233     }
 234
 235     /// Add to the worklist making sure its instance is at the back (next to be
 236     /// processed.)
 237     void AddToWorklist(SDNode *N) {
 238       assert(N->getOpcode() != ISD::DELETED_NODE &&
 239              "Deleted Node added to Worklist");
 240
 241       // Skip handle nodes as they can't usefully be combined and confuse the
 242       // zero-use deletion strategy.
 243       if (N->getOpcode() == ISD::HANDLENODE)
 244         return;
 245
 246       ConsiderForPruning(N);
 247
 248       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
 249         Worklist.push_back(N);
 250     }
 251
 252     /// Remove all instances of N from the worklist.
 253     void removeFromWorklist(SDNode *N) {
 254       CombinedNodes.erase(N);
 255       PruningList.remove(N);
 256       StoreRootCountMap.erase(N);
 257
 258       auto It = WorklistMap.find(N);
 259       if (It == WorklistMap.end())
 260         return; // Not in the worklist.
 261
 262       // Null out the entry rather than erasing it to avoid a linear operation.
 263       Worklist[It->second] = nullptr;
 264       WorklistMap.erase(It);
 265     }
 266
 267     void deleteAndRecombine(SDNode *N);
 268     bool recursivelyDeleteUnusedNodes(SDNode *N);
 269
 270     /// Replaces all uses of the results of one DAG node with new values.
 271     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
 272                       bool AddTo = true);
 273
 274     /// Replaces all uses of the results of one DAG node with new values.
 275     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
 276       return CombineTo(N, &Res, 1, AddTo);
 277     }
 278
 279     /// Replaces all uses of the results of one DAG node with new values.
 280     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
 281                       bool AddTo = true) {
 282       SDValue To[] = { Res0, Res1 };
 283       return CombineTo(N, To, 2, AddTo);
 284     }
 285
 286     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
 287
 288   private:
 289     unsigned MaximumLegalStoreInBits;
 290
 291     /// Check the specified integer node value to see if it can be simplified or
 292     /// if things it uses can be simplified by bit propagation.
 293     /// If so, return true.
 294     bool SimplifyDemandedBits(SDValue Op) {
 295       unsigned BitWidth = Op.getScalarValueSizeInBits();
 296       APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
 297       return SimplifyDemandedBits(Op, DemandedBits);
 298     }
 299
 300     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
 301       EVT VT = Op.getValueType();
 302       unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
 303       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 304       return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
 305     }
 306
 307     /// Check the specified vector node value to see if it can be simplified or
 308     /// if things it uses can be simplified as it only uses some of the
 309     /// elements. If so, return true.
 310     bool SimplifyDemandedVectorElts(SDValue Op) {
 311       unsigned NumElts = Op.getValueType().getVectorNumElements();
 312       APInt DemandedElts = APInt::getAllOnesValue(NumElts);
 313       return SimplifyDemandedVectorElts(Op, DemandedElts);
 314     }
 315
 316     bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
 317                               const APInt &DemandedElts);
 318     bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
 319                                     bool AssumeSingleUse = false);
 320
 321     bool CombineToPreIndexedLoadStore(SDNode *N);
 322     bool CombineToPostIndexedLoadStore(SDNode *N);
 323     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
 324     bool SliceUpLoad(SDNode *N);
 325
 326     // Scalars have size 0 to distinguish from singleton vectors.
 327     SDValue ForwardStoreValueToDirectLoad(LoadSDNode *LD);
 328     bool getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val);
 329     bool extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val);
 330
 331     /// Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
 332     ///   load.
 333     ///
 334     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
 335     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
 336     /// \param EltNo index of the vector element to load.
 337     /// \param OriginalLoad load that EVE came from to be replaced.
 338     /// \returns EVE on success SDValue() on failure.
 339     SDValue scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
 340                                          SDValue EltNo,
 341                                          LoadSDNode *OriginalLoad);
 342     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
 343     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
 344     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
 345     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
 346     SDValue PromoteIntBinOp(SDValue Op);
 347     SDValue PromoteIntShiftOp(SDValue Op);
 348     SDValue PromoteExtend(SDValue Op);
 349     bool PromoteLoad(SDValue Op);
 350
 351     /// Call the node-specific routine that knows how to fold each
 352     /// particular type of node. If that doesn't do anything, try the
 353     /// target-specific DAG combines.
 354     SDValue combine(SDNode *N);
 355
 356     // Visitation implementation - Implement dag node combining for different
 357     // node types.  The semantics are as follows:
 358     // Return Value:
 359     //   SDValue.getNode() == 0 - No change was made
 360     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
 361     //   otherwise              - N should be replaced by the returned Operand.
 362     //
 363     SDValue visitTokenFactor(SDNode *N);
 364     SDValue visitMERGE_VALUES(SDNode *N);
 365     SDValue visitADD(SDNode *N);
 366     SDValue visitADDLike(SDNode *N);
 367     SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
 368     SDValue visitSUB(SDNode *N);
 369     SDValue visitADDSAT(SDNode *N);
 370     SDValue visitSUBSAT(SDNode *N);
 371     SDValue visitADDC(SDNode *N);
 372     SDValue visitADDO(SDNode *N);
 373     SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
 374     SDValue visitSUBC(SDNode *N);
 375     SDValue visitSUBO(SDNode *N);
 376     SDValue visitADDE(SDNode *N);
 377     SDValue visitADDCARRY(SDNode *N);
 378     SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
 379     SDValue visitSUBE(SDNode *N);
 380     SDValue visitSUBCARRY(SDNode *N);
 381     SDValue visitMUL(SDNode *N);
 382     SDValue visitMULFIX(SDNode *N);
 383     SDValue useDivRem(SDNode *N);
 384     SDValue visitSDIV(SDNode *N);
 385     SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
 386     SDValue visitUDIV(SDNode *N);
 387     SDValue visitUDIVLike(SDValue N0, SDValue N1, SDNode *N);
 388     SDValue visitREM(SDNode *N);
 389     SDValue visitMULHU(SDNode *N);
 390     SDValue visitMULHS(SDNode *N);
 391     SDValue visitSMUL_LOHI(SDNode *N);
 392     SDValue visitUMUL_LOHI(SDNode *N);
 393     SDValue visitMULO(SDNode *N);
 394     SDValue visitIMINMAX(SDNode *N);
 395     SDValue visitAND(SDNode *N);
 396     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
 397     SDValue visitOR(SDNode *N);
 398     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *N);
 399     SDValue visitXOR(SDNode *N);
 400     SDValue SimplifyVBinOp(SDNode *N);
 401     SDValue visitSHL(SDNode *N);
 402     SDValue visitSRA(SDNode *N);
 403     SDValue visitSRL(SDNode *N);
 404     SDValue visitFunnelShift(SDNode *N);
 405     SDValue visitRotate(SDNode *N);
 406     SDValue visitABS(SDNode *N);
 407     SDValue visitBSWAP(SDNode *N);
 408     SDValue visitBITREVERSE(SDNode *N);
 409     SDValue visitCTLZ(SDNode *N);
 410     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
 411     SDValue visitCTTZ(SDNode *N);
 412     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
 413     SDValue visitCTPOP(SDNode *N);
 414     SDValue visitSELECT(SDNode *N);
 415     SDValue visitVSELECT(SDNode *N);
 416     SDValue visitSELECT_CC(SDNode *N);
 417     SDValue visitSETCC(SDNode *N);
 418     SDValue visitSETCCCARRY(SDNode *N);
 419     SDValue visitSIGN_EXTEND(SDNode *N);
 420     SDValue visitZERO_EXTEND(SDNode *N);
 421     SDValue visitANY_EXTEND(SDNode *N);
 422     SDValue visitAssertExt(SDNode *N);
 423     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
 424     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
 425     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
 426     SDValue visitTRUNCATE(SDNode *N);
 427     SDValue visitBITCAST(SDNode *N);
 428     SDValue visitBUILD_PAIR(SDNode *N);
 429     SDValue visitFADD(SDNode *N);
 430     SDValue visitFSUB(SDNode *N);
 431     SDValue visitFMUL(SDNode *N);
 432     SDValue visitFMA(SDNode *N);
 433     SDValue visitFDIV(SDNode *N);
 434     SDValue visitFREM(SDNode *N);
 435     SDValue visitFSQRT(SDNode *N);
 436     SDValue visitFCOPYSIGN(SDNode *N);
 437     SDValue visitFPOW(SDNode *N);
 438     SDValue visitSINT_TO_FP(SDNode *N);
 439     SDValue visitUINT_TO_FP(SDNode *N);
 440     SDValue visitFP_TO_SINT(SDNode *N);
 441     SDValue visitFP_TO_UINT(SDNode *N);
 442     SDValue visitFP_ROUND(SDNode *N);
 443     SDValue visitFP_ROUND_INREG(SDNode *N);
 444     SDValue visitFP_EXTEND(SDNode *N);
 445     SDValue visitFNEG(SDNode *N);
 446     SDValue visitFABS(SDNode *N);
 447     SDValue visitFCEIL(SDNode *N);
 448     SDValue visitFTRUNC(SDNode *N);
 449     SDValue visitFFLOOR(SDNode *N);
 450     SDValue visitFMINNUM(SDNode *N);
 451     SDValue visitFMAXNUM(SDNode *N);
 452     SDValue visitFMINIMUM(SDNode *N);
 453     SDValue visitFMAXIMUM(SDNode *N);
 454     SDValue visitBRCOND(SDNode *N);
 455     SDValue visitBR_CC(SDNode *N);
 456     SDValue visitLOAD(SDNode *N);
 457
 458     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
 459     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
 460
 461     SDValue visitSTORE(SDNode *N);
 462     SDValue visitLIFETIME_END(SDNode *N);
 463     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
 464     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
 465     SDValue visitBUILD_VECTOR(SDNode *N);
 466     SDValue visitCONCAT_VECTORS(SDNode *N);
 467     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
 468     SDValue visitVECTOR_SHUFFLE(SDNode *N);
 469     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
 470     SDValue visitINSERT_SUBVECTOR(SDNode *N);
 471     SDValue visitMLOAD(SDNode *N);
 472     SDValue visitMSTORE(SDNode *N);
 473     SDValue visitMGATHER(SDNode *N);
 474     SDValue visitMSCATTER(SDNode *N);
 475     SDValue visitFP_TO_FP16(SDNode *N);
 476     SDValue visitFP16_TO_FP(SDNode *N);
 477     SDValue visitVECREDUCE(SDNode *N);
 478
 479     SDValue visitFADDForFMACombine(SDNode *N);
 480     SDValue visitFSUBForFMACombine(SDNode *N);
 481     SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 482
 483     SDValue XformToShuffleWithZero(SDNode *N);
 484     bool reassociationCanBreakAddressingModePattern(unsigned Opc,
 485                                                     const SDLoc &DL, SDValue N0,
 486                                                     SDValue N1);
 487     SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
 488                                       SDValue N1);
 489     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
 490                            SDValue N1, SDNodeFlags Flags);
 491
 492     SDValue visitShiftByConstant(SDNode *N);
 493
 494     SDValue foldSelectOfConstants(SDNode *N);
 495     SDValue foldVSelectOfConstants(SDNode *N);
 496     SDValue foldBinOpIntoSelect(SDNode *BO);
 497     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
 498     SDValue hoistLogicOpWithSameOpcodeHands(SDNode *N);
 499     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
 500     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
 501                              SDValue N2, SDValue N3, ISD::CondCode CC,
 502                              bool NotExtCompare = false);
 503     SDValue convertSelectOfFPConstantsToLoadOffset(
 504         const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
 505         ISD::CondCode CC);
 506     SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
 507                                    SDValue N2, SDValue N3, ISD::CondCode CC);
 508     SDValue foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
 509                               const SDLoc &DL);
 510     SDValue unfoldMaskedMerge(SDNode *N);
 511     SDValue unfoldExtremeBitClearingToShifts(SDNode *N);
 512     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
 513                           const SDLoc &DL, bool foldBooleans);
 514     SDValue rebuildSetCC(SDValue N);
 515
 516     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
 517                            SDValue &CC) const;
 518     bool isOneUseSetCC(SDValue N) const;
 519     bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
 520
 521     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
 522                                          unsigned HiOp);
 523     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
 524     SDValue CombineExtLoad(SDNode *N);
 525     SDValue CombineZExtLogicopShiftLoad(SDNode *N);
 526     SDValue combineRepeatedFPDivisors(SDNode *N);
 527     SDValue combineInsertEltToShuffle(SDNode *N, unsigned InsIndex);
 528     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
 529     SDValue BuildSDIV(SDNode *N);
 530     SDValue BuildSDIVPow2(SDNode *N);
 531     SDValue BuildUDIV(SDNode *N);
 532     SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
 533     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
 534     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
 535     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
 536     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
 537     SDValue buildSqrtNROneConst(SDValue Arg, SDValue Est, unsigned Iterations,
 538                                 SDNodeFlags Flags, bool Reciprocal);
 539     SDValue buildSqrtNRTwoConst(SDValue Arg, SDValue Est, unsigned Iterations,
 540                                 SDNodeFlags Flags, bool Reciprocal);
 541     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
 542                                bool DemandHighBits = true);
 543     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
 544     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
 545                               SDValue InnerPos, SDValue InnerNeg,
 546                               unsigned PosOpcode, unsigned NegOpcode,
 547                               const SDLoc &DL);
 548     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
 549     SDValue MatchLoadCombine(SDNode *N);
 550     SDValue MatchStoreCombine(StoreSDNode *N);
 551     SDValue ReduceLoadWidth(SDNode *N);
 552     SDValue ReduceLoadOpStoreWidth(SDNode *N);
 553     SDValue splitMergedValStore(StoreSDNode *ST);
 554     SDValue TransformFPLoadStorePair(SDNode *N);
 555     SDValue convertBuildVecZextToZext(SDNode *N);
 556     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
 557     SDValue reduceBuildVecToShuffle(SDNode *N);
 558     SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
 559                                   ArrayRef<int> VectorMask, SDValue VecIn1,
 560                                   SDValue VecIn2, unsigned LeftIdx,
 561                                   bool DidSplitVec);
 562     SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
 563
 564     /// Walk up chain skipping non-aliasing memory nodes,
 565     /// looking for aliasing nodes and adding them to the Aliases vector.
 566     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
 567                           SmallVectorImpl<SDValue> &Aliases);
 568
 569     /// Return true if there is any possibility that the two addresses overlap.
 570     bool isAlias(SDNode *Op0, SDNode *Op1) const;
 571
 572     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
 573     /// chain (aliasing node.)
 574     SDValue FindBetterChain(SDNode *N, SDValue Chain);
 575
 576     /// Try to replace a store and any possibly adjacent stores on
 577     /// consecutive chains with better chains. Return true only if St is
 578     /// replaced.
 579     ///
 580     /// Notice that other chains may still be replaced even if the function
 581     /// returns false.
 582     bool findBetterNeighborChains(StoreSDNode *St);
 583
 584     // Helper for findBetterNeighborChains. Walk up store chain add additional
 585     // chained stores that do not overlap and can be parallelized.
 586     bool parallelizeChainedStores(StoreSDNode *St);
 587
 588     /// Holds a pointer to an LSBaseSDNode as well as information on where it
 589     /// is located in a sequence of memory operations connected by a chain.
 590     struct MemOpLink {
 591       // Ptr to the mem node.
 592       LSBaseSDNode *MemNode;
 593
 594       // Offset from the base ptr.
 595       int64_t OffsetFromBase;
 596
 597       MemOpLink(LSBaseSDNode *N, int64_t Offset)
 598           : MemNode(N), OffsetFromBase(Offset) {}
 599     };
 600
 601     /// This is a helper function for visitMUL to check the profitability
 602     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 603     /// MulNode is the original multiply, AddNode is (add x, c1),
 604     /// and ConstNode is c2.
 605     bool isMulAddWithConstProfitable(SDNode *MulNode,
 606                                      SDValue &AddNode,
 607                                      SDValue &ConstNode);
 608
 609     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
 610     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
 611     /// the type of the loaded value to be extended.
 612     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
 613                           EVT LoadResultTy, EVT &ExtVT);
 614
 615     /// Helper function to calculate whether the given Load/Store can have its
 616     /// width reduced to ExtVT.
 617     bool isLegalNarrowLdSt(LSBaseSDNode *LDSTN, ISD::LoadExtType ExtType,
 618                            EVT &MemVT, unsigned ShAmt = 0);
 619
 620     /// Used by BackwardsPropagateMask to find suitable loads.
 621     bool SearchForAndLoads(SDNode *N, SmallVectorImpl<LoadSDNode*> &Loads,
 622                            SmallPtrSetImpl<SDNode*> &NodesWithConsts,
 623                            ConstantSDNode *Mask, SDNode *&NodeToMask);
 624     /// Attempt to propagate a given AND node back to load leaves so that they
 625     /// can be combined into narrow loads.
 626     bool BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG);
 627
 628     /// Helper function for MergeConsecutiveStores which merges the
 629     /// component store chains.
 630     SDValue getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
 631                                 unsigned NumStores);
 632
 633     /// This is a helper function for MergeConsecutiveStores. When the
 634     /// source elements of the consecutive stores are all constants or
 635     /// all extracted vector elements, try to merge them into one
 636     /// larger store introducing bitcasts if necessary.  \return True
 637     /// if a merged store was created.
 638     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
 639                                          EVT MemVT, unsigned NumStores,
 640                                          bool IsConstantSrc, bool UseVector,
 641                                          bool UseTrunc);
 642
 643     /// This is a helper function for MergeConsecutiveStores. Stores
 644     /// that potentially may be merged with St are placed in
 645     /// StoreNodes. RootNode is a chain predecessor to all store
 646     /// candidates.
 647     void getStoreMergeCandidates(StoreSDNode *St,
 648                                  SmallVectorImpl<MemOpLink> &StoreNodes,
 649                                  SDNode *&Root);
 650
 651     /// Helper function for MergeConsecutiveStores. Checks if
 652     /// candidate stores have indirect dependency through their
 653     /// operands. RootNode is the predecessor to all stores calculated
 654     /// by getStoreMergeCandidates and is used to prune the dependency check.
 655     /// \return True if safe to merge.
 656     bool checkMergeStoreCandidatesForDependencies(
 657         SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
 658         SDNode *RootNode);
 659
 660     /// Merge consecutive store operations into a wide store.
 661     /// This optimization uses wide integers or vectors when possible.
 662     /// \return number of stores that were merged into a merged store (the
 663     /// affected nodes are stored as a prefix in \p StoreNodes).
 664     bool MergeConsecutiveStores(StoreSDNode *St);
 665
 666     /// Try to transform a truncation where C is a constant:
 667     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
 668     ///
 669     /// \p N needs to be a truncation and its first operand an AND. Other
 670     /// requirements are checked by the function (e.g. that trunc is
 671     /// single-use) and if missed an empty SDValue is returned.
 672     SDValue distributeTruncateThroughAnd(SDNode *N);
 673
 674     /// Helper function to determine whether the target supports operation
 675     /// given by \p Opcode for type \p VT, that is, whether the operation
 676     /// is legal or custom before legalizing operations, and whether is
 677     /// legal (but not custom) after legalization.
 678     bool hasOperation(unsigned Opcode, EVT VT) {
 679       if (LegalOperations)
 680         return TLI.isOperationLegal(Opcode, VT);
 681       return TLI.isOperationLegalOrCustom(Opcode, VT);
 682     }
 683
 684   public:
 685     /// Runs the dag combiner on all nodes in the work list
 686     void Run(CombineLevel AtLevel);
 687
 688     SelectionDAG &getDAG() const { return DAG; }
 689
 690     /// Returns a type large enough to hold any valid shift amount - before type
 691     /// legalization these can be huge.
 692     EVT getShiftAmountTy(EVT LHSTy) {
 693       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
 694       return TLI.getShiftAmountTy(LHSTy, DAG.getDataLayout(), LegalTypes);
 695     }
 696
 697     /// This method returns true if we are running before type legalization or
 698     /// if the specified VT is legal.
 699     bool isTypeLegal(const EVT &VT) {
 700       if (!LegalTypes) return true;
 701       return TLI.isTypeLegal(VT);
 702     }
 703
 704     /// Convenience wrapper around TargetLowering::getSetCCResultType
 705     EVT getSetCCResultType(EVT VT) const {
 706       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 707     }
 708
 709     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
 710                          SDValue OrigLoad, SDValue ExtLoad,
 711                          ISD::NodeType ExtType);
 712   };
 713
 714 /// This class is a DAGUpdateListener that removes any deleted
 715 /// nodes from the worklist.
 716 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
 717   DAGCombiner &DC;
 718
 719 public:
 720   explicit WorklistRemover(DAGCombiner &dc)
 721     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 722
 723   void NodeDeleted(SDNode *N, SDNode *E) override {
 724     DC.removeFromWorklist(N);
 725   }
 726 };
 727
 728 class WorklistInserter : public SelectionDAG::DAGUpdateListener {
 729   DAGCombiner &DC;
 730
 731 public:
 732   explicit WorklistInserter(DAGCombiner &dc)
 733       : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
 734
 735   // FIXME: Ideally we could add N to the worklist, but this causes exponential
 736   //        compile time costs in large DAGs, e.g. Halide.
 737   void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
 738 };
 739
 740 } // end anonymous namespace
 741
 742 //===----------------------------------------------------------------------===//
 743 //  TargetLowering::DAGCombinerInfo implementation
 744 //===----------------------------------------------------------------------===//
 745
 746 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
 747   ((DAGCombiner*)DC)->AddToWorklist(N);
 748 }
 749
 750 SDValue TargetLowering::DAGCombinerInfo::
 751 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
 752   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
 753 }
 754
 755 SDValue TargetLowering::DAGCombinerInfo::
 756 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
 757   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
 758 }
 759
 760 SDValue TargetLowering::DAGCombinerInfo::
 761 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
 762   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
 763 }
 764
 765 void TargetLowering::DAGCombinerInfo::
 766 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
 767   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
 768 }
 769
 770 //===----------------------------------------------------------------------===//
 771 // Helper Functions
 772 //===----------------------------------------------------------------------===//
 773
 774 void DAGCombiner::deleteAndRecombine(SDNode *N) {
 775   removeFromWorklist(N);
 776
 777   // If the operands of this node are only used by the node, they will now be
 778   // dead. Make sure to re-visit them and recursively delete dead nodes.
 779   for (const SDValue &Op : N->ops())
 780     // For an operand generating multiple values, one of the values may
 781     // become dead allowing further simplification (e.g. split index
 782     // arithmetic from an indexed load).
 783     if (Op->hasOneUse() || Op->getNumValues() > 1)
 784       AddToWorklist(Op.getNode());
 785
 786   DAG.DeleteNode(N);
 787 }
 788
 789 /// Return 1 if we can compute the negated form of the specified expression for
 790 /// the same cost as the expression itself, or 2 if we can compute the negated
 791 /// form more cheaply than the expression itself.
 792 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
 793                                const TargetLowering &TLI,
 794                                const TargetOptions *Options,
 795                                bool ForCodeSize,
 796                                unsigned Depth = 0) {
 797   // fneg is removable even if it has multiple uses.
 798   if (Op.getOpcode() == ISD::FNEG)
 799     return 2;
 800
 801   // Don't allow anything with multiple uses unless we know it is free.
 802   EVT VT = Op.getValueType();
 803   const SDNodeFlags Flags = Op->getFlags();
 804   if (!Op.hasOneUse() &&
 805       !(Op.getOpcode() == ISD::FP_EXTEND &&
 806         TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
 807     return 0;
 808
 809   // Don't recurse exponentially.
 810   if (Depth > 6)
 811     return 0;
 812
 813   switch (Op.getOpcode()) {
 814   default: return false;
 815   case ISD::ConstantFP: {
 816     if (!LegalOperations)
 817       return 1;
 818
 819     // Don't invert constant FP values after legalization unless the target says
 820     // the negated constant is legal.
 821     return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
 822            TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
 823                             ForCodeSize);
 824   }
 825   case ISD::BUILD_VECTOR: {
 826     // Only permit BUILD_VECTOR of constants.
 827     if (llvm::any_of(Op->op_values(), [&](SDValue N) {
 828           return !N.isUndef() && !isa<ConstantFPSDNode>(N);
 829         }))
 830       return 0;
 831     if (!LegalOperations)
 832       return 1;
 833     if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
 834         TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
 835       return 1;
 836     return llvm::all_of(Op->op_values(), [&](SDValue N) {
 837       return N.isUndef() ||
 838              TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
 839                               ForCodeSize);
 840     });
 841   }
 842   case ISD::FADD:
 843     if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
 844       return 0;
 845
 846     // After operation legalization, it might not be legal to create new FSUBs.
 847     if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
 848       return 0;
 849
 850     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 851     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 852                                     Options, ForCodeSize, Depth + 1))
 853       return V;
 854     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 855     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 856                               ForCodeSize, Depth + 1);
 857   case ISD::FSUB:
 858     // We can't turn -(A-B) into B-A when we honor signed zeros.
 859     if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
 860       return 0;
 861
 862     // fold (fneg (fsub A, B)) -> (fsub B, A)
 863     return 1;
 864
 865   case ISD::FMUL:
 866   case ISD::FDIV:
 867     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
 868     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 869                                     Options, ForCodeSize, Depth + 1))
 870       return V;
 871
 872     // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
 873     if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
 874       if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
 875         return 0;
 876
 877     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
 878                               ForCodeSize, Depth + 1);
 879
 880   case ISD::FMA:
 881   case ISD::FMAD: {
 882     if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
 883       return 0;
 884
 885     // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
 886     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
 887     char V2 = isNegatibleForFree(Op.getOperand(2), LegalOperations, TLI,
 888                                  Options, ForCodeSize, Depth + 1);
 889     if (!V2)
 890       return 0;
 891
 892     // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
 893     char V0 = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
 894                                  Options, ForCodeSize, Depth + 1);
 895     char V1 = isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI,
 896                                  Options, ForCodeSize, Depth + 1);
 897     char V01 = std::max(V0, V1);
 898     return V01 ? std::max(V01, V2) : 0;
 899   }
 900
 901   case ISD::FP_EXTEND:
 902   case ISD::FP_ROUND:
 903   case ISD::FSIN:
 904     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
 905                               ForCodeSize, Depth + 1);
 906   }
 907 }
 908
 909 /// If isNegatibleForFree returns true, return the newly negated expression.
 910 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
 911                                     bool LegalOperations, bool ForCodeSize,
 912                                     unsigned Depth = 0) {
 913   // fneg is removable even if it has multiple uses.
 914   if (Op.getOpcode() == ISD::FNEG)
 915     return Op.getOperand(0);
 916
 917   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
 918   const TargetOptions &Options = DAG.getTarget().Options;
 919   const SDNodeFlags Flags = Op->getFlags();
 920
 921   switch (Op.getOpcode()) {
 922   default: llvm_unreachable("Unknown code");
 923   case ISD::ConstantFP: {
 924     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
 925     V.changeSign();
 926     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
 927   }
 928   case ISD::BUILD_VECTOR: {
 929     SmallVector<SDValue, 4> Ops;
 930     for (SDValue C : Op->op_values()) {
 931       if (C.isUndef()) {
 932         Ops.push_back(C);
 933         continue;
 934       }
 935       APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
 936       V.changeSign();
 937       Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
 938     }
 939     return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
 940   }
 941   case ISD::FADD:
 942     assert((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
 943            "Expected NSZ fp-flag");
 944
 945     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
 946     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 947                            DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
 948                            Depth + 1))
 949       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 950                          GetNegatedExpression(Op.getOperand(0), DAG,
 951                                               LegalOperations, ForCodeSize,
 952                                               Depth + 1),
 953                          Op.getOperand(1), Flags);
 954     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
 955     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 956                        GetNegatedExpression(Op.getOperand(1), DAG,
 957                                             LegalOperations, ForCodeSize,
 958                                             Depth + 1),
 959                        Op.getOperand(0), Flags);
 960   case ISD::FSUB:
 961     // fold (fneg (fsub 0, B)) -> B
 962     if (ConstantFPSDNode *N0CFP =
 963             isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
 964       if (N0CFP->isZero())
 965         return Op.getOperand(1);
 966
 967     // fold (fneg (fsub A, B)) -> (fsub B, A)
 968     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
 969                        Op.getOperand(1), Op.getOperand(0), Flags);
 970
 971   case ISD::FMUL:
 972   case ISD::FDIV:
 973     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
 974     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
 975                            DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
 976                            Depth + 1))
 977       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 978                          GetNegatedExpression(Op.getOperand(0), DAG,
 979                                               LegalOperations, ForCodeSize,
 980                                               Depth + 1),
 981                          Op.getOperand(1), Flags);
 982
 983     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
 984     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
 985                        Op.getOperand(0),
 986                        GetNegatedExpression(Op.getOperand(1), DAG,
 987                                             LegalOperations, ForCodeSize,
 988                                             Depth + 1), Flags);
 989
 990   case ISD::FMA:
 991   case ISD::FMAD: {
 992     assert((Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
 993            "Expected NSZ fp-flag");
 994
 995     SDValue Neg2 = GetNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
 996                                         ForCodeSize, Depth + 1);
 997
 998     char V0 = isNegatibleForFree(Op.getOperand(0), LegalOperations,
 999                                  DAG.getTargetLoweringInfo(), &Options,
1000                                  ForCodeSize, Depth + 1);
1001     char V1 = isNegatibleForFree(Op.getOperand(1), LegalOperations,
1002                                  DAG.getTargetLoweringInfo(), &Options,
1003                                  ForCodeSize, Depth + 1);
1004     if (V0 >= V1) {
1005       // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
1006       SDValue Neg0 = GetNegatedExpression(
1007           Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
1008       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
1009                          Op.getOperand(1), Neg2, Flags);
1010     }
1011
1012     // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
1013     SDValue Neg1 = GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
1014                                         ForCodeSize, Depth + 1);
1015     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
1016                        Op.getOperand(0), Neg1, Neg2, Flags);
1017   }
1018
1019   case ISD::FP_EXTEND:
1020   case ISD::FSIN:
1021     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
1022                        GetNegatedExpression(Op.getOperand(0), DAG,
1023                                             LegalOperations, ForCodeSize,
1024                                             Depth + 1));
1025   case ISD::FP_ROUND:
1026     return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
1027                        GetNegatedExpression(Op.getOperand(0), DAG,
1028                                             LegalOperations, ForCodeSize,
1029                                             Depth + 1),
1030                        Op.getOperand(1));
1031   }
1032 }
1033
1034 // APInts must be the same size for most operations, this helper
1035 // function zero extends the shorter of the pair so that they match.
1036 // We provide an Offset so that we can create bitwidths that won't overflow.
1037 static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
1038   unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
1039   LHS = LHS.zextOrSelf(Bits);
1040   RHS = RHS.zextOrSelf(Bits);
1041 }
1042
1043 // Return true if this node is a setcc, or is a select_cc
1044 // that selects between the target values used for true and false, making it
1045 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
1046 // the appropriate nodes based on the type of node we are checking. This
1047 // simplifies life a bit for the callers.
1048 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
1049                                     SDValue &CC) const {
1050   if (N.getOpcode() == ISD::SETCC) {
1051     LHS = N.getOperand(0);
1052     RHS = N.getOperand(1);
1053     CC  = N.getOperand(2);
1054     return true;
1055   }
1056
1057   if (N.getOpcode() != ISD::SELECT_CC ||
1058       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
1059       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
1060     return false;
1061
1062   if (TLI.getBooleanContents(N.getValueType()) ==
1063       TargetLowering::UndefinedBooleanContent)
1064     return false;
1065
1066   LHS = N.getOperand(0);
1067   RHS = N.getOperand(1);
1068   CC  = N.getOperand(4);
1069   return true;
1070 }
1071
1072 /// Return true if this is a SetCC-equivalent operation with only one use.
1073 /// If this is true, it allows the users to invert the operation for free when
1074 /// it is profitable to do so.
1075 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
1076   SDValue N0, N1, N2;
1077   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
1078     return true;
1079   return false;
1080 }
1081
1082 // Returns the SDNode if it is a constant float BuildVector
1083 // or constant float.
1084 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
1085   if (isa<ConstantFPSDNode>(N))
1086     return N.getNode();
1087   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
1088     return N.getNode();
1089   return nullptr;
1090 }
1091
1092 // Determines if it is a constant integer or a build vector of constant
1093 // integers (and undefs).
1094 // Do not permit build vector implicit truncation.
1095 static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
1096   if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
1097     return !(Const->isOpaque() && NoOpaques);
1098   if (N.getOpcode() != ISD::BUILD_VECTOR)
1099     return false;
1100   unsigned BitWidth = N.getScalarValueSizeInBits();
1101   for (const SDValue &Op : N->op_values()) {
1102     if (Op.isUndef())
1103       continue;
1104     ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
1105     if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
1106         (Const->isOpaque() && NoOpaques))
1107       return false;
1108   }
1109   return true;
1110 }
1111
1112 // Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
1113 // undef's.
1114 static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
1115   if (V.getOpcode() != ISD::BUILD_VECTOR)
1116     return false;
1117   return isConstantOrConstantVector(V, NoOpaques) ||
1118          ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
1119 }
1120
1121 bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
1122                                                              const SDLoc &DL,
1123                                                              SDValue N0,
1124                                                              SDValue N1) {
1125   // Currently this only tries to ensure we don't undo the GEP splits done by
1126   // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
1127   // we check if the following transformation would be problematic:
1128   // (load/store (add, (add, x, offset1), offset2)) ->
1129   // (load/store (add, x, offset1+offset2)).
1130
1131   if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
1132     return false;
1133
1134   if (N0.hasOneUse())
1135     return false;
1136
1137   auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
1138   auto *C2 = dyn_cast<ConstantSDNode>(N1);
1139   if (!C1 || !C2)
1140     return false;
1141
1142   const APInt &C1APIntVal = C1->getAPIntValue();
1143   const APInt &C2APIntVal = C2->getAPIntValue();
1144   if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
1145     return false;
1146
1147   const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
1148   if (CombinedValueIntVal.getBitWidth() > 64)
1149     return false;
1150   const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
1151
1152   for (SDNode *Node : N0->uses()) {
1153     auto LoadStore = dyn_cast<MemSDNode>(Node);
1154     if (LoadStore) {
1155       // Is x[offset2] already not a legal addressing mode? If so then
1156       // reassociating the constants breaks nothing (we test offset2 because
1157       // that's the one we hope to fold into the load or store).
1158       TargetLoweringBase::AddrMode AM;
1159       AM.HasBaseReg = true;
1160       AM.BaseOffs = C2APIntVal.getSExtValue();
1161       EVT VT = LoadStore->getMemoryVT();
1162       unsigned AS = LoadStore->getAddressSpace();
1163       Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
1164       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1165         continue;
1166
1167       // Would x[offset1+offset2] still be a legal addressing mode?
1168       AM.BaseOffs = CombinedValue;
1169       if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
1170         return true;
1171     }
1172   }
1173
1174   return false;
1175 }
1176
1177 // Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
1178 // such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
1179 SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
1180                                                SDValue N0, SDValue N1) {
1181   EVT VT = N0.getValueType();
1182
1183   if (N0.getOpcode() != Opc)
1184     return SDValue();
1185
1186   // Don't reassociate reductions.
1187   if (N0->getFlags().hasVectorReduction())
1188     return SDValue();
1189
1190   if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
1191     if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
1192       // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
1193       if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
1194         return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
1195       return SDValue();
1196     }
1197     if (N0.hasOneUse()) {
1198       // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
1199       //              iff (op x, c1) has one use
1200       SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
1201       if (!OpNode.getNode())
1202         return SDValue();
1203       return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
1204     }
1205   }
1206   return SDValue();
1207 }
1208
1209 // Try to reassociate commutative binops.
1210 SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
1211                                     SDValue N1, SDNodeFlags Flags) {
1212   assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
1213   // Don't reassociate reductions.
1214   if (Flags.hasVectorReduction())
1215     return SDValue();
1216
1217   // Floating-point reassociation is not allowed without loose FP math.
1218   if (N0.getValueType().isFloatingPoint() ||
1219       N1.getValueType().isFloatingPoint())
1220     if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
1221       return SDValue();
1222
1223   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
1224     return Combined;
1225   if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
1226     return Combined;
1227   return SDValue();
1228 }
1229
1230 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
1231                                bool AddTo) {
1232   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
1233   ++NodesCombined;
1234   LLVM_DEBUG(dbgs() << "\nReplacing.1 "; N->dump(&DAG); dbgs() << "\nWith: ";
1235              To[0].getNode()->dump(&DAG);
1236              dbgs() << " and " << NumTo - 1 << " other values\n");
1237   for (unsigned i = 0, e = NumTo; i != e; ++i)
1238     assert((!To[i].getNode() ||
1239             N->getValueType(i) == To[i].getValueType()) &&
1240            "Cannot combine value to value of different type!");
1241
1242   WorklistRemover DeadNodes(*this);
1243   DAG.ReplaceAllUsesWith(N, To);
1244   if (AddTo) {
1245     // Push the new nodes and any users onto the worklist
1246     for (unsigned i = 0, e = NumTo; i != e; ++i) {
1247       if (To[i].getNode()) {
1248         AddToWorklist(To[i].getNode());
1249         AddUsersToWorklist(To[i].getNode());
1250       }
1251     }
1252   }
1253
1254   // Finally, if the node is now dead, remove it from the graph.  The node
1255   // may not be dead if the replacement process recursively simplified to
1256   // something else needing this node.
1257   if (N->use_empty())
1258     deleteAndRecombine(N);
1259   return SDValue(N, 0);
1260 }
1261
1262 void DAGCombiner::
1263 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
1264   // Replace all uses.  If any nodes become isomorphic to other nodes and
1265   // are deleted, make sure to remove them from our worklist.
1266   WorklistRemover DeadNodes(*this);
1267   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
1268
1269   // Push the new node and any (possibly new) users onto the worklist.
1270   AddToWorklist(TLO.New.getNode());
1271   AddUsersToWorklist(TLO.New.getNode());
1272
1273   // Finally, if the node is now dead, remove it from the graph.  The node
1274   // may not be dead if the replacement process recursively simplified to
1275   // something else needing this node.
1276   if (TLO.Old.getNode()->use_empty())
1277     deleteAndRecombine(TLO.Old.getNode());
1278 }
1279
1280 /// Check the specified integer node value to see if it can be simplified or if
1281 /// things it uses can be simplified by bit propagation. If so, return true.
1282 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
1283                                        const APInt &DemandedElts) {
1284   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1285   KnownBits Known;
1286   if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
1287     return false;
1288
1289   // Revisit the node.
1290   AddToWorklist(Op.getNode());
1291
1292   // Replace the old value with the new one.
1293   ++NodesCombined;
1294   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1295              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1296              dbgs() << '\n');
1297
1298   CommitTargetLoweringOpt(TLO);
1299   return true;
1300 }
1301
1302 /// Check the specified vector node value to see if it can be simplified or
1303 /// if things it uses can be simplified as it only uses some of the elements.
1304 /// If so, return true.
1305 bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
1306                                              const APInt &DemandedElts,
1307                                              bool AssumeSingleUse) {
1308   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
1309   APInt KnownUndef, KnownZero;
1310   if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
1311                                       TLO, 0, AssumeSingleUse))
1312     return false;
1313
1314   // Revisit the node.
1315   AddToWorklist(Op.getNode());
1316
1317   // Replace the old value with the new one.
1318   ++NodesCombined;
1319   LLVM_DEBUG(dbgs() << "\nReplacing.2 "; TLO.Old.getNode()->dump(&DAG);
1320              dbgs() << "\nWith: "; TLO.New.getNode()->dump(&DAG);
1321              dbgs() << '\n');
1322
1323   CommitTargetLoweringOpt(TLO);
1324   return true;
1325 }
1326
1327 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
1328   SDLoc DL(Load);
1329   EVT VT = Load->getValueType(0);
1330   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
1331
1332   LLVM_DEBUG(dbgs() << "\nReplacing.9 "; Load->dump(&DAG); dbgs() << "\nWith: ";
1333              Trunc.getNode()->dump(&DAG); dbgs() << '\n');
1334   WorklistRemover DeadNodes(*this);
1335   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
1336   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
1337   deleteAndRecombine(Load);
1338   AddToWorklist(Trunc.getNode());
1339 }
1340
1341 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
1342   Replace = false;
1343   SDLoc DL(Op);
1344   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
1345     LoadSDNode *LD = cast<LoadSDNode>(Op);
1346     EVT MemVT = LD->getMemoryVT();
1347     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1348                                                       : LD->getExtensionType();
1349     Replace = true;
1350     return DAG.getExtLoad(ExtType, DL, PVT,
1351                           LD->getChain(), LD->getBasePtr(),
1352                           MemVT, LD->getMemOperand());
1353   }
1354
1355   unsigned Opc = Op.getOpcode();
1356   switch (Opc) {
1357   default: break;
1358   case ISD::AssertSext:
1359     if (SDValue Op0 = SExtPromoteOperand(Op.getOperand(0), PVT))
1360       return DAG.getNode(ISD::AssertSext, DL, PVT, Op0, Op.getOperand(1));
1361     break;
1362   case ISD::AssertZext:
1363     if (SDValue Op0 = ZExtPromoteOperand(Op.getOperand(0), PVT))
1364       return DAG.getNode(ISD::AssertZext, DL, PVT, Op0, Op.getOperand(1));
1365     break;
1366   case ISD::Constant: {
1367     unsigned ExtOpc =
1368       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1369     return DAG.getNode(ExtOpc, DL, PVT, Op);
1370   }
1371   }
1372
1373   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
1374     return SDValue();
1375   return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
1376 }
1377
1378 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
1379   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
1380     return SDValue();
1381   EVT OldVT = Op.getValueType();
1382   SDLoc DL(Op);
1383   bool Replace = false;
1384   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1385   if (!NewOp.getNode())
1386     return SDValue();
1387   AddToWorklist(NewOp.getNode());
1388
1389   if (Replace)
1390     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1391   return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
1392                      DAG.getValueType(OldVT));
1393 }
1394
1395 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
1396   EVT OldVT = Op.getValueType();
1397   SDLoc DL(Op);
1398   bool Replace = false;
1399   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
1400   if (!NewOp.getNode())
1401     return SDValue();
1402   AddToWorklist(NewOp.getNode());
1403
1404   if (Replace)
1405     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
1406   return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
1407 }
1408
1409 /// Promote the specified integer binary operation if the target indicates it is
1410 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1411 /// i32 since i16 instructions are longer.
1412 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
1413   if (!LegalOperations)
1414     return SDValue();
1415
1416   EVT VT = Op.getValueType();
1417   if (VT.isVector() || !VT.isInteger())
1418     return SDValue();
1419
1420   // If operation type is 'undesirable', e.g. i16 on x86, consider
1421   // promoting it.
1422   unsigned Opc = Op.getOpcode();
1423   if (TLI.isTypeDesirableForOp(Opc, VT))
1424     return SDValue();
1425
1426   EVT PVT = VT;
1427   // Consult target whether it is a good idea to promote this operation and
1428   // what's the right type to promote it to.
1429   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1430     assert(PVT != VT && "Don't know what type to promote to!");
1431
1432     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1433
1434     bool Replace0 = false;
1435     SDValue N0 = Op.getOperand(0);
1436     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
1437
1438     bool Replace1 = false;
1439     SDValue N1 = Op.getOperand(1);
1440     SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
1441     SDLoc DL(Op);
1442
1443     SDValue RV =
1444         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, NN0, NN1));
1445
1446     // We are always replacing N0/N1's use in N and only need
1447     // additional replacements if there are additional uses.
1448     Replace0 &= !N0->hasOneUse();
1449     Replace1 &= (N0 != N1) && !N1->hasOneUse();
1450
1451     // Combine Op here so it is preserved past replacements.
1452     CombineTo(Op.getNode(), RV);
1453
1454     // If operands have a use ordering, make sure we deal with
1455     // predecessor first.
1456     if (Replace0 && Replace1 && N0.getNode()->isPredecessorOf(N1.getNode())) {
1457       std::swap(N0, N1);
1458       std::swap(NN0, NN1);
1459     }
1460
1461     if (Replace0) {
1462       AddToWorklist(NN0.getNode());
1463       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
1464     }
1465     if (Replace1) {
1466       AddToWorklist(NN1.getNode());
1467       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
1468     }
1469     return Op;
1470   }
1471   return SDValue();
1472 }
1473
1474 /// Promote the specified integer shift operation if the target indicates it is
1475 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
1476 /// i32 since i16 instructions are longer.
1477 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
1478   if (!LegalOperations)
1479     return SDValue();
1480
1481   EVT VT = Op.getValueType();
1482   if (VT.isVector() || !VT.isInteger())
1483     return SDValue();
1484
1485   // If operation type is 'undesirable', e.g. i16 on x86, consider
1486   // promoting it.
1487   unsigned Opc = Op.getOpcode();
1488   if (TLI.isTypeDesirableForOp(Opc, VT))
1489     return SDValue();
1490
1491   EVT PVT = VT;
1492   // Consult target whether it is a good idea to promote this operation and
1493   // what's the right type to promote it to.
1494   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1495     assert(PVT != VT && "Don't know what type to promote to!");
1496
1497     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1498
1499     bool Replace = false;
1500     SDValue N0 = Op.getOperand(0);
1501     SDValue N1 = Op.getOperand(1);
1502     if (Opc == ISD::SRA)
1503       N0 = SExtPromoteOperand(N0, PVT);
1504     else if (Opc == ISD::SRL)
1505       N0 = ZExtPromoteOperand(N0, PVT);
1506     else
1507       N0 = PromoteOperand(N0, PVT, Replace);
1508
1509     if (!N0.getNode())
1510       return SDValue();
1511
1512     SDLoc DL(Op);
1513     SDValue RV =
1514         DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
1515
1516     if (Replace)
1517       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
1518
1519     // Deal with Op being deleted.
1520     if (Op && Op.getOpcode() != ISD::DELETED_NODE)
1521       return RV;
1522   }
1523   return SDValue();
1524 }
1525
1526 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
1527   if (!LegalOperations)
1528     return SDValue();
1529
1530   EVT VT = Op.getValueType();
1531   if (VT.isVector() || !VT.isInteger())
1532     return SDValue();
1533
1534   // If operation type is 'undesirable', e.g. i16 on x86, consider
1535   // promoting it.
1536   unsigned Opc = Op.getOpcode();
1537   if (TLI.isTypeDesirableForOp(Opc, VT))
1538     return SDValue();
1539
1540   EVT PVT = VT;
1541   // Consult target whether it is a good idea to promote this operation and
1542   // what's the right type to promote it to.
1543   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1544     assert(PVT != VT && "Don't know what type to promote to!");
1545     // fold (aext (aext x)) -> (aext x)
1546     // fold (aext (zext x)) -> (zext x)
1547     // fold (aext (sext x)) -> (sext x)
1548     LLVM_DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG));
1549     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
1550   }
1551   return SDValue();
1552 }
1553
1554 bool DAGCombiner::PromoteLoad(SDValue Op) {
1555   if (!LegalOperations)
1556     return false;
1557
1558   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
1559     return false;
1560
1561   EVT VT = Op.getValueType();
1562   if (VT.isVector() || !VT.isInteger())
1563     return false;
1564
1565   // If operation type is 'undesirable', e.g. i16 on x86, consider
1566   // promoting it.
1567   unsigned Opc = Op.getOpcode();
1568   if (TLI.isTypeDesirableForOp(Opc, VT))
1569     return false;
1570
1571   EVT PVT = VT;
1572   // Consult target whether it is a good idea to promote this operation and
1573   // what's the right type to promote it to.
1574   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
1575     assert(PVT != VT && "Don't know what type to promote to!");
1576
1577     SDLoc DL(Op);
1578     SDNode *N = Op.getNode();
1579     LoadSDNode *LD = cast<LoadSDNode>(N);
1580     EVT MemVT = LD->getMemoryVT();
1581     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? ISD::EXTLOAD
1582                                                       : LD->getExtensionType();
1583     SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
1584                                    LD->getChain(), LD->getBasePtr(),
1585                                    MemVT, LD->getMemOperand());
1586     SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
1587
1588     LLVM_DEBUG(dbgs() << "\nPromoting "; N->dump(&DAG); dbgs() << "\nTo: ";
1589                Result.getNode()->dump(&DAG); dbgs() << '\n');
1590     WorklistRemover DeadNodes(*this);
1591     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
1592     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
1593     deleteAndRecombine(N);
1594     AddToWorklist(Result.getNode());
1595     return true;
1596   }
1597   return false;
1598 }
1599
1600 /// Recursively delete a node which has no uses and any operands for
1601 /// which it is the only use.
1602 ///
1603 /// Note that this both deletes the nodes and removes them from the worklist.
1604 /// It also adds any nodes who have had a user deleted to the worklist as they
1605 /// may now have only one use and subject to other combines.
1606 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
1607   if (!N->use_empty())
1608     return false;
1609
1610   SmallSetVector<SDNode *, 16> Nodes;
1611   Nodes.insert(N);
1612   do {
1613     N = Nodes.pop_back_val();
1614     if (!N)
1615       continue;
1616
1617     if (N->use_empty()) {
1618       for (const SDValue &ChildN : N->op_values())
1619         Nodes.insert(ChildN.getNode());
1620
1621       removeFromWorklist(N);
1622       DAG.DeleteNode(N);
1623     } else {
1624       AddToWorklist(N);
1625     }
1626   } while (!Nodes.empty());
1627   return true;
1628 }
1629
1630 //===----------------------------------------------------------------------===//
1631 //  Main DAG Combiner implementation
1632 //===----------------------------------------------------------------------===//
1633
1634 void DAGCombiner::Run(CombineLevel AtLevel) {
1635   // set the instance variables, so that the various visit routines may use it.
1636   Level = AtLevel;
1637   LegalOperations = Level >= AfterLegalizeVectorOps;
1638   LegalTypes = Level >= AfterLegalizeTypes;
1639
1640   WorklistInserter AddNodes(*this);
1641
1642   // Add all the dag nodes to the worklist.
1643   for (SDNode &Node : DAG.allnodes())
1644     AddToWorklist(&Node);
1645
1646   // Create a dummy node (which is not added to allnodes), that adds a reference
1647   // to the root node, preventing it from being deleted, and tracking any
1648   // changes of the root.
1649   HandleSDNode Dummy(DAG.getRoot());
1650
1651   // While we have a valid worklist entry node, try to combine it.
1652   while (SDNode *N = getNextWorklistEntry()) {
1653     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
1654     // N is deleted from the DAG, since they too may now be dead or may have a
1655     // reduced number of uses, allowing other xforms.
1656     if (recursivelyDeleteUnusedNodes(N))
1657       continue;
1658
1659     WorklistRemover DeadNodes(*this);
1660
1661     // If this combine is running after legalizing the DAG, re-legalize any
1662     // nodes pulled off the worklist.
1663     if (Level == AfterLegalizeDAG) {
1664       SmallSetVector<SDNode *, 16> UpdatedNodes;
1665       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
1666
1667       for (SDNode *LN : UpdatedNodes) {
1668         AddUsersToWorklist(LN);
1669         AddToWorklist(LN);
1670       }
1671       if (!NIsValid)
1672         continue;
1673     }
1674
1675     LLVM_DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
1676
1677     // Add any operands of the new node which have not yet been combined to the
1678     // worklist as well. Because the worklist uniques things already, this
1679     // won't repeatedly process the same operand.
1680     CombinedNodes.insert(N);
1681     for (const SDValue &ChildN : N->op_values())
1682       if (!CombinedNodes.count(ChildN.getNode()))
1683         AddToWorklist(ChildN.getNode());
1684
1685     SDValue RV = combine(N);
1686
1687     if (!RV.getNode())
1688       continue;
1689
1690     ++NodesCombined;
1691
1692     // If we get back the same node we passed in, rather than a new node or
1693     // zero, we know that the node must have defined multiple values and
1694     // CombineTo was used.  Since CombineTo takes care of the worklist
1695     // mechanics for us, we have no work to do in this case.
1696     if (RV.getNode() == N)
1697       continue;
1698
1699     assert(N->getOpcode() != ISD::DELETED_NODE &&
1700            RV.getOpcode() != ISD::DELETED_NODE &&
1701            "Node was deleted but visit returned new node!");
1702
1703     LLVM_DEBUG(dbgs() << " ... into: "; RV.getNode()->dump(&DAG));
1704
1705     if (N->getNumValues() == RV.getNode()->getNumValues())
1706       DAG.ReplaceAllUsesWith(N, RV.getNode());
1707     else {
1708       assert(N->getValueType(0) == RV.getValueType() &&
1709              N->getNumValues() == 1 && "Type mismatch");
1710       DAG.ReplaceAllUsesWith(N, &RV);
1711     }
1712
1713     // Push the new node and any users onto the worklist
1714     AddToWorklist(RV.getNode());
1715     AddUsersToWorklist(RV.getNode());
1716
1717     // Finally, if the node is now dead, remove it from the graph.  The node
1718     // may not be dead if the replacement process recursively simplified to
1719     // something else needing this node. This will also take care of adding any
1720     // operands which have lost a user to the worklist.
1721     recursivelyDeleteUnusedNodes(N);
1722   }
1723
1724   // If the root changed (e.g. it was a dead load, update the root).
1725   DAG.setRoot(Dummy.getValue());
1726   DAG.RemoveDeadNodes();
1727 }
1728
1729 SDValue DAGCombiner::visit(SDNode *N) {
1730   switch (N->getOpcode()) {
1731   default: break;
1732   case ISD::TokenFactor:        return visitTokenFactor(N);
1733   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1734   case ISD::ADD:                return visitADD(N);
1735   case ISD::SUB:                return visitSUB(N);
1736   case ISD::SADDSAT:
1737   case ISD::UADDSAT:            return visitADDSAT(N);
1738   case ISD::SSUBSAT:
1739   case ISD::USUBSAT:            return visitSUBSAT(N);
1740   case ISD::ADDC:               return visitADDC(N);
1741   case ISD::SADDO:
1742   case ISD::UADDO:              return visitADDO(N);
1743   case ISD::SUBC:               return visitSUBC(N);
1744   case ISD::SSUBO:
1745   case ISD::USUBO:              return visitSUBO(N);
1746   case ISD::ADDE:               return visitADDE(N);
1747   case ISD::ADDCARRY:           return visitADDCARRY(N);
1748   case ISD::SUBE:               return visitSUBE(N);
1749   case ISD::SUBCARRY:           return visitSUBCARRY(N);
1750   case ISD::SMULFIX:
1751   case ISD::SMULFIXSAT:
1752   case ISD::UMULFIX:            return visitMULFIX(N);
1753   case ISD::MUL:                return visitMUL(N);
1754   case ISD::SDIV:               return visitSDIV(N);
1755   case ISD::UDIV:               return visitUDIV(N);
1756   case ISD::SREM:
1757   case ISD::UREM:               return visitREM(N);
1758   case ISD::MULHU:              return visitMULHU(N);
1759   case ISD::MULHS:              return visitMULHS(N);
1760   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1761   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1762   case ISD::SMULO:
1763   case ISD::UMULO:              return visitMULO(N);
1764   case ISD::SMIN:
1765   case ISD::SMAX:
1766   case ISD::UMIN:
1767   case ISD::UMAX:               return visitIMINMAX(N);
1768   case ISD::AND:                return visitAND(N);
1769   case ISD::OR:                 return visitOR(N);
1770   case ISD::XOR:                return visitXOR(N);
1771   case ISD::SHL:                return visitSHL(N);
1772   case ISD::SRA:                return visitSRA(N);
1773   case ISD::SRL:                return visitSRL(N);
1774   case ISD::ROTR:
1775   case ISD::ROTL:               return visitRotate(N);
1776   case ISD::FSHL:
1777   case ISD::FSHR:               return visitFunnelShift(N);
1778   case ISD::ABS:                return visitABS(N);
1779   case ISD::BSWAP:              return visitBSWAP(N);
1780   case ISD::BITREVERSE:         return visitBITREVERSE(N);
1781   case ISD::CTLZ:               return visitCTLZ(N);
1782   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
1783   case ISD::CTTZ:               return visitCTTZ(N);
1784   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
1785   case ISD::CTPOP:              return visitCTPOP(N);
1786   case ISD::SELECT:             return visitSELECT(N);
1787   case ISD::VSELECT:            return visitVSELECT(N);
1788   case ISD::SELECT_CC:          return visitSELECT_CC(N);
1789   case ISD::SETCC:              return visitSETCC(N);
1790   case ISD::SETCCCARRY:         return visitSETCCCARRY(N);
1791   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1792   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1793   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1794   case ISD::AssertSext:
1795   case ISD::AssertZext:         return visitAssertExt(N);
1796   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1797   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
1798   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
1799   case ISD::TRUNCATE:           return visitTRUNCATE(N);
1800   case ISD::BITCAST:            return visitBITCAST(N);
1801   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1802   case ISD::FADD:               return visitFADD(N);
1803   case ISD::FSUB:               return visitFSUB(N);
1804   case ISD::FMUL:               return visitFMUL(N);
1805   case ISD::FMA:                return visitFMA(N);
1806   case ISD::FDIV:               return visitFDIV(N);
1807   case ISD::FREM:               return visitFREM(N);
1808   case ISD::FSQRT:              return visitFSQRT(N);
1809   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1810   case ISD::FPOW:               return visitFPOW(N);
1811   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1812   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1813   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1814   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1815   case ISD::FP_ROUND:           return visitFP_ROUND(N);
1816   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1817   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1818   case ISD::FNEG:               return visitFNEG(N);
1819   case ISD::FABS:               return visitFABS(N);
1820   case ISD::FFLOOR:             return visitFFLOOR(N);
1821   case ISD::FMINNUM:            return visitFMINNUM(N);
1822   case ISD::FMAXNUM:            return visitFMAXNUM(N);
1823   case ISD::FMINIMUM:           return visitFMINIMUM(N);
1824   case ISD::FMAXIMUM:           return visitFMAXIMUM(N);
1825   case ISD::FCEIL:              return visitFCEIL(N);
1826   case ISD::FTRUNC:             return visitFTRUNC(N);
1827   case ISD::BRCOND:             return visitBRCOND(N);
1828   case ISD::BR_CC:              return visitBR_CC(N);
1829   case ISD::LOAD:               return visitLOAD(N);
1830   case ISD::STORE:              return visitSTORE(N);
1831   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1832   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1833   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1834   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1835   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
1836   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1837   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
1838   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
1839   case ISD::MGATHER:            return visitMGATHER(N);
1840   case ISD::MLOAD:              return visitMLOAD(N);
1841   case ISD::MSCATTER:           return visitMSCATTER(N);
1842   case ISD::MSTORE:             return visitMSTORE(N);
1843   case ISD::LIFETIME_END:       return visitLIFETIME_END(N);
1844   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
1845   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
1846   case ISD::VECREDUCE_FADD:
1847   case ISD::VECREDUCE_FMUL:
1848   case ISD::VECREDUCE_ADD:
1849   case ISD::VECREDUCE_MUL:
1850   case ISD::VECREDUCE_AND:
1851   case ISD::VECREDUCE_OR:
1852   case ISD::VECREDUCE_XOR:
1853   case ISD::VECREDUCE_SMAX:
1854   case ISD::VECREDUCE_SMIN:
1855   case ISD::VECREDUCE_UMAX:
1856   case ISD::VECREDUCE_UMIN:
1857   case ISD::VECREDUCE_FMAX:
1858   case ISD::VECREDUCE_FMIN:     return visitVECREDUCE(N);
1859   }
1860   return SDValue();
1861 }
1862
1863 SDValue DAGCombiner::combine(SDNode *N) {
1864   SDValue RV = visit(N);
1865
1866   // If nothing happened, try a target-specific DAG combine.
1867   if (!RV.getNode()) {
1868     assert(N->getOpcode() != ISD::DELETED_NODE &&
1869            "Node was deleted but visit returned NULL!");
1870
1871     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1872         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1873
1874       // Expose the DAG combiner to the target combiner impls.
1875       TargetLowering::DAGCombinerInfo
1876         DagCombineInfo(DAG, Level, false, this);
1877
1878       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1879     }
1880   }
1881
1882   // If nothing happened still, try promoting the operation.
1883   if (!RV.getNode()) {
1884     switch (N->getOpcode()) {
1885     default: break;
1886     case ISD::ADD:
1887     case ISD::SUB:
1888     case ISD::MUL:
1889     case ISD::AND:
1890     case ISD::OR:
1891     case ISD::XOR:
1892       RV = PromoteIntBinOp(SDValue(N, 0));
1893       break;
1894     case ISD::SHL:
1895     case ISD::SRA:
1896     case ISD::SRL:
1897       RV = PromoteIntShiftOp(SDValue(N, 0));
1898       break;
1899     case ISD::SIGN_EXTEND:
1900     case ISD::ZERO_EXTEND:
1901     case ISD::ANY_EXTEND:
1902       RV = PromoteExtend(SDValue(N, 0));
1903       break;
1904     case ISD::LOAD:
1905       if (PromoteLoad(SDValue(N, 0)))
1906         RV = SDValue(N, 0);
1907       break;
1908     }
1909   }
1910
1911   // If N is a commutative binary node, try to eliminate it if the commuted
1912   // version is already present in the DAG.
1913   if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
1914       N->getNumValues() == 1) {
1915     SDValue N0 = N->getOperand(0);
1916     SDValue N1 = N->getOperand(1);
1917
1918     // Constant operands are canonicalized to RHS.
1919     if (N0 != N1 && (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1))) {
1920       SDValue Ops[] = {N1, N0};
1921       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
1922                                             N->getFlags());
1923       if (CSENode)
1924         return SDValue(CSENode, 0);
1925     }
1926   }
1927
1928   return RV;
1929 }
1930
1931 /// Given a node, return its input chain if it has one, otherwise return a null
1932 /// sd operand.
1933 static SDValue getInputChainForNode(SDNode *N) {
1934   if (unsigned NumOps = N->getNumOperands()) {
1935     if (N->getOperand(0).getValueType() == MVT::Other)
1936       return N->getOperand(0);
1937     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1938       return N->getOperand(NumOps-1);
1939     for (unsigned i = 1; i < NumOps-1; ++i)
1940       if (N->getOperand(i).getValueType() == MVT::Other)
1941         return N->getOperand(i);
1942   }
1943   return SDValue();
1944 }
1945
1946 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1947   // If N has two operands, where one has an input chain equal to the other,
1948   // the 'other' chain is redundant.
1949   if (N->getNumOperands() == 2) {
1950     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1951       return N->getOperand(0);
1952     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1953       return N->getOperand(1);
1954   }
1955
1956   // Don't simplify token factors if optnone.
1957   if (OptLevel == CodeGenOpt::None)
1958     return SDValue();
1959
1960   // If the sole user is a token factor, we should make sure we have a
1961   // chance to merge them together. This prevents TF chains from inhibiting
1962   // optimizations.
1963   if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
1964     AddToWorklist(*(N->use_begin()));
1965
1966   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1967   SmallVector<SDValue, 8> Ops;      // Ops for replacing token factor.
1968   SmallPtrSet<SDNode*, 16> SeenOps;
1969   bool Changed = false;             // If we should replace this token factor.
1970
1971   // Start out with this token factor.
1972   TFs.push_back(N);
1973
1974   // Iterate through token factors.  The TFs grows when new token factors are
1975   // encountered.
1976   for (unsigned i = 0; i < TFs.size(); ++i) {
1977     // Limit number of nodes to inline, to avoid quadratic compile times.
1978     // We have to add the outstanding Token Factors to Ops, otherwise we might
1979     // drop Ops from the resulting Token Factors.
1980     if (Ops.size() > TokenFactorInlineLimit) {
1981       for (unsigned j = i; j < TFs.size(); j++)
1982         Ops.emplace_back(TFs[j], 0);
1983       // Drop unprocessed Token Factors from TFs, so we do not add them to the
1984       // combiner worklist later.
1985       TFs.resize(i);
1986       break;
1987     }
1988
1989     SDNode *TF = TFs[i];
1990     // Check each of the operands.
1991     for (const SDValue &Op : TF->op_values()) {
1992       switch (Op.getOpcode()) {
1993       case ISD::EntryToken:
1994         // Entry tokens don't need to be added to the list. They are
1995         // redundant.
1996         Changed = true;
1997         break;
1998
1999       case ISD::TokenFactor:
2000         if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
2001           // Queue up for processing.
2002           TFs.push_back(Op.getNode());
2003           Changed = true;
2004           break;
2005         }
2006         LLVM_FALLTHROUGH;
2007
2008       default:
2009         // Only add if it isn't already in the list.
2010         if (SeenOps.insert(Op.getNode()).second)
2011           Ops.push_back(Op);
2012         else
2013           Changed = true;
2014         break;
2015       }
2016     }
2017   }
2018
2019   // Re-visit inlined Token Factors, to clean them up in case they have been
2020   // removed. Skip the first Token Factor, as this is the current node.
2021   for (unsigned i = 1, e = TFs.size(); i < e; i++)
2022     AddToWorklist(TFs[i]);
2023
2024   // Remove Nodes that are chained to another node in the list. Do so
2025   // by walking up chains breath-first stopping when we've seen
2026   // another operand. In general we must climb to the EntryNode, but we can exit
2027   // early if we find all remaining work is associated with just one operand as
2028   // no further pruning is possible.
2029
2030   // List of nodes to search through and original Ops from which they originate.
2031   SmallVector<std::pair<SDNode *, unsigned>, 8> Worklist;
2032   SmallVector<unsigned, 8> OpWorkCount; // Count of work for each Op.
2033   SmallPtrSet<SDNode *, 16> SeenChains;
2034   bool DidPruneOps = false;
2035
2036   unsigned NumLeftToConsider = 0;
2037   for (const SDValue &Op : Ops) {
2038     Worklist.push_back(std::make_pair(Op.getNode(), NumLeftToConsider++));
2039     OpWorkCount.push_back(1);
2040   }
2041
2042   auto AddToWorklist = [&](unsigned CurIdx, SDNode *Op, unsigned OpNumber) {
2043     // If this is an Op, we can remove the op from the list. Remark any
2044     // search associated with it as from the current OpNumber.
2045     if (SeenOps.count(Op) != 0) {
2046       Changed = true;
2047       DidPruneOps = true;
2048       unsigned OrigOpNumber = 0;
2049       while (OrigOpNumber < Ops.size() && Ops[OrigOpNumber].getNode() != Op)
2050         OrigOpNumber++;
2051       assert((OrigOpNumber != Ops.size()) &&
2052              "expected to find TokenFactor Operand");
2053       // Re-mark worklist from OrigOpNumber to OpNumber
2054       for (unsigned i = CurIdx + 1; i < Worklist.size(); ++i) {
2055         if (Worklist[i].second == OrigOpNumber) {
2056           Worklist[i].second = OpNumber;
2057         }
2058       }
2059       OpWorkCount[OpNumber] += OpWorkCount[OrigOpNumber];
2060       OpWorkCount[OrigOpNumber] = 0;
2061       NumLeftToConsider--;
2062     }
2063     // Add if it's a new chain
2064     if (SeenChains.insert(Op).second) {
2065       OpWorkCount[OpNumber]++;
2066       Worklist.push_back(std::make_pair(Op, OpNumber));
2067     }
2068   };
2069
2070   for (unsigned i = 0; i < Worklist.size() && i < 1024; ++i) {
2071     // We need at least be consider at least 2 Ops to prune.
2072     if (NumLeftToConsider <= 1)
2073       break;
2074     auto CurNode = Worklist[i].first;
2075     auto CurOpNumber = Worklist[i].second;
2076     assert((OpWorkCount[CurOpNumber] > 0) &&
2077            "Node should not appear in worklist");
2078     switch (CurNode->getOpcode()) {
2079     case ISD::EntryToken:
2080       // Hitting EntryToken is the only way for the search to terminate without
2081       // hitting
2082       // another operand's search. Prevent us from marking this operand
2083       // considered.
2084       NumLeftToConsider++;
2085       break;
2086     case ISD::TokenFactor:
2087       for (const SDValue &Op : CurNode->op_values())
2088         AddToWorklist(i, Op.getNode(), CurOpNumber);
2089       break;
2090     case ISD::LIFETIME_START:
2091     case ISD::LIFETIME_END:
2092     case ISD::CopyFromReg:
2093     case ISD::CopyToReg:
2094       AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
2095       break;
2096     default:
2097       if (auto *MemNode = dyn_cast<MemSDNode>(CurNode))
2098         AddToWorklist(i, MemNode->getChain().getNode(), CurOpNumber);
2099       break;
2100     }
2101     OpWorkCount[CurOpNumber]--;
2102     if (OpWorkCount[CurOpNumber] == 0)
2103       NumLeftToConsider--;
2104   }
2105
2106   // If we've changed things around then replace token factor.
2107   if (Changed) {
2108     SDValue Result;
2109     if (Ops.empty()) {
2110       // The entry token is the only possible outcome.
2111       Result = DAG.getEntryNode();
2112     } else {
2113       if (DidPruneOps) {
2114         SmallVector<SDValue, 8> PrunedOps;
2115         //
2116         for (const SDValue &Op : Ops) {
2117           if (SeenChains.count(Op.getNode()) == 0)
2118             PrunedOps.push_back(Op);
2119         }
2120         Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
2121       } else {
2122         Result = DAG.getTokenFactor(SDLoc(N), Ops);
2123       }
2124     }
2125     return Result;
2126   }
2127   return SDValue();
2128 }
2129
2130 /// MERGE_VALUES can always be eliminated.
2131 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
2132   WorklistRemover DeadNodes(*this);
2133   // Replacing results may cause a different MERGE_VALUES to suddenly
2134   // be CSE'd with N, and carry its uses with it. Iterate until no
2135   // uses remain, to ensure that the node can be safely deleted.
2136   // First add the users of this node to the work list so that they
2137   // can be tried again once they have new operands.
2138   AddUsersToWorklist(N);
2139   do {
2140     // Do as a single replacement to avoid rewalking use lists.
2141     SmallVector<SDValue, 8> Ops;
2142     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
2143       Ops.push_back(N->getOperand(i));
2144     DAG.ReplaceAllUsesWith(N, Ops.data());
2145   } while (!N->use_empty());
2146   deleteAndRecombine(N);
2147   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2148 }
2149
2150 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
2151 /// ConstantSDNode pointer else nullptr.
2152 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
2153   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
2154   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
2155 }
2156
2157 SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
2158   assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
2159          "Unexpected binary operator");
2160
2161   // Don't do this unless the old select is going away. We want to eliminate the
2162   // binary operator, not replace a binop with a select.
2163   // TODO: Handle ISD::SELECT_CC.
2164   unsigned SelOpNo = 0;
2165   SDValue Sel = BO->getOperand(0);
2166   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse()) {
2167     SelOpNo = 1;
2168     Sel = BO->getOperand(1);
2169   }
2170
2171   if (Sel.getOpcode() != ISD::SELECT || !Sel.hasOneUse())
2172     return SDValue();
2173
2174   SDValue CT = Sel.getOperand(1);
2175   if (!isConstantOrConstantVector(CT, true) &&
2176       !isConstantFPBuildVectorOrConstantFP(CT))
2177     return SDValue();
2178
2179   SDValue CF = Sel.getOperand(2);
2180   if (!isConstantOrConstantVector(CF, true) &&
2181       !isConstantFPBuildVectorOrConstantFP(CF))
2182     return SDValue();
2183
2184   // Bail out if any constants are opaque because we can't constant fold those.
2185   // The exception is "and" and "or" with either 0 or -1 in which case we can
2186   // propagate non constant operands into select. I.e.:
2187   // and (select Cond, 0, -1), X --> select Cond, 0, X
2188   // or X, (select Cond, -1, 0) --> select Cond, -1, X
2189   auto BinOpcode = BO->getOpcode();
2190   bool CanFoldNonConst =
2191       (BinOpcode == ISD::AND || BinOpcode == ISD::OR) &&
2192       (isNullOrNullSplat(CT) || isAllOnesOrAllOnesSplat(CT)) &&
2193       (isNullOrNullSplat(CF) || isAllOnesOrAllOnesSplat(CF));
2194
2195   SDValue CBO = BO->getOperand(SelOpNo ^ 1);
2196   if (!CanFoldNonConst &&
2197       !isConstantOrConstantVector(CBO, true) &&
2198       !isConstantFPBuildVectorOrConstantFP(CBO))
2199     return SDValue();
2200
2201   EVT VT = Sel.getValueType();
2202
2203   // In case of shift value and shift amount may have different VT. For instance
2204   // on x86 shift amount is i8 regardles of LHS type. Bail out if we have
2205   // swapped operands and value types do not match. NB: x86 is fine if operands
2206   // are not swapped with shift amount VT being not bigger than shifted value.
2207   // TODO: that is possible to check for a shift operation, correct VTs and
2208   // still perform optimization on x86 if needed.
2209   if (SelOpNo && VT != CBO.getValueType())
2210     return SDValue();
2211
2212   // We have a select-of-constants followed by a binary operator with a
2213   // constant. Eliminate the binop by pulling the constant math into the select.
2214   // Example: add (select Cond, CT, CF), CBO --> select Cond, CT + CBO, CF + CBO
2215   SDLoc DL(Sel);
2216   SDValue NewCT = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CT)
2217                           : DAG.getNode(BinOpcode, DL, VT, CT, CBO);
2218   if (!CanFoldNonConst && !NewCT.isUndef() &&
2219       !isConstantOrConstantVector(NewCT, true) &&
2220       !isConstantFPBuildVectorOrConstantFP(NewCT))
2221     return SDValue();
2222
2223   SDValue NewCF = SelOpNo ? DAG.getNode(BinOpcode, DL, VT, CBO, CF)
2224                           : DAG.getNode(BinOpcode, DL, VT, CF, CBO);
2225   if (!CanFoldNonConst && !NewCF.isUndef() &&
2226       !isConstantOrConstantVector(NewCF, true) &&
2227       !isConstantFPBuildVectorOrConstantFP(NewCF))
2228     return SDValue();
2229
2230   SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
2231   SelectOp->setFlags(BO->getFlags());
2232   return SelectOp;
2233 }
2234
2235 static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
2236   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2237          "Expecting add or sub");
2238
2239   // Match a constant operand and a zext operand for the math instruction:
2240   // add Z, C
2241   // sub C, Z
2242   bool IsAdd = N->getOpcode() == ISD::ADD;
2243   SDValue C = IsAdd ? N->getOperand(1) : N->getOperand(0);
2244   SDValue Z = IsAdd ? N->getOperand(0) : N->getOperand(1);
2245   auto *CN = dyn_cast<ConstantSDNode>(C);
2246   if (!CN || Z.getOpcode() != ISD::ZERO_EXTEND)
2247     return SDValue();
2248
2249   // Match the zext operand as a setcc of a boolean.
2250   if (Z.getOperand(0).getOpcode() != ISD::SETCC ||
2251       Z.getOperand(0).getValueType() != MVT::i1)
2252     return SDValue();
2253
2254   // Match the compare as: setcc (X & 1), 0, eq.
2255   SDValue SetCC = Z.getOperand(0);
2256   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get();
2257   if (CC != ISD::SETEQ || !isNullConstant(SetCC.getOperand(1)) ||
2258       SetCC.getOperand(0).getOpcode() != ISD::AND ||
2259       !isOneConstant(SetCC.getOperand(0).getOperand(1)))
2260     return SDValue();
2261
2262   // We are adding/subtracting a constant and an inverted low bit. Turn that
2263   // into a subtract/add of the low bit with incremented/decremented constant:
2264   // add (zext i1 (seteq (X & 1), 0)), C --> sub C+1, (zext (X & 1))
2265   // sub C, (zext i1 (seteq (X & 1), 0)) --> add C-1, (zext (X & 1))
2266   EVT VT = C.getValueType();
2267   SDLoc DL(N);
2268   SDValue LowBit = DAG.getZExtOrTrunc(SetCC.getOperand(0), DL, VT);
2269   SDValue C1 = IsAdd ? DAG.getConstant(CN->getAPIntValue() + 1, DL, VT) :
2270                        DAG.getConstant(CN->getAPIntValue() - 1, DL, VT);
2271   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, C1, LowBit);
2272 }
2273
2274 /// Try to fold a 'not' shifted sign-bit with add/sub with constant operand into
2275 /// a shift and add with a different constant.
2276 static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
2277   assert((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) &&
2278          "Expecting add or sub");
2279
2280   // We need a constant operand for the add/sub, and the other operand is a
2281   // logical shift right: add (srl), C or sub C, (srl).
2282   // TODO - support non-uniform vector amounts.
2283   bool IsAdd = N->getOpcode() == ISD::ADD;
2284   SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
2285   SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
2286   ConstantSDNode *C = isConstOrConstSplat(ConstantOp);
2287   if (!C || ShiftOp.getOpcode() != ISD::SRL)
2288     return SDValue();
2289
2290   // The shift must be of a 'not' value.
2291   SDValue Not = ShiftOp.getOperand(0);
2292   if (!Not.hasOneUse() || !isBitwiseNot(Not))
2293     return SDValue();
2294
2295   // The shift must be moving the sign bit to the least-significant-bit.
2296   EVT VT = ShiftOp.getValueType();
2297   SDValue ShAmt = ShiftOp.getOperand(1);
2298   ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
2299   if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
2300     return SDValue();
2301
2302   // Eliminate the 'not' by adjusting the shift and add/sub constant:
2303   // add (srl (not X), 31), C --> add (sra X, 31), (C + 1)
2304   // sub C, (srl (not X), 31) --> add (srl X, 31), (C - 1)
2305   SDLoc DL(N);
2306   auto ShOpcode = IsAdd ? ISD::SRA : ISD::SRL;
2307   SDValue NewShift = DAG.getNode(ShOpcode, DL, VT, Not.getOperand(0), ShAmt);
2308   APInt NewC = IsAdd ? C->getAPIntValue() + 1 : C->getAPIntValue() - 1;
2309   return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
2310 }
2311
2312 /// Try to fold a node that behaves like an ADD (note that N isn't necessarily
2313 /// an ISD::ADD here, it could for example be an ISD::OR if we know that there
2314 /// are no common bits set in the operands).
2315 SDValue DAGCombiner::visitADDLike(SDNode *N) {
2316   SDValue N0 = N->getOperand(0);
2317   SDValue N1 = N->getOperand(1);
2318   EVT VT = N0.getValueType();
2319   SDLoc DL(N);
2320
2321   // fold vector ops
2322   if (VT.isVector()) {
2323     if (SDValue FoldedVOp = SimplifyVBinOp(N))
2324       return FoldedVOp;
2325
2326     // fold (add x, 0) -> x, vector edition
2327     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2328       return N0;
2329     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2330       return N1;
2331   }
2332
2333   // fold (add x, undef) -> undef
2334   if (N0.isUndef())
2335     return N0;
2336
2337   if (N1.isUndef())
2338     return N1;
2339
2340   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2341     // canonicalize constant to RHS
2342     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2343       return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
2344     // fold (add c1, c2) -> c1+c2
2345     return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
2346                                       N1.getNode());
2347   }
2348
2349   // fold (add x, 0) -> x
2350   if (isNullConstant(N1))
2351     return N0;
2352
2353   if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
2354     // fold ((A-c1)+c2) -> (A+(c2-c1))
2355     if (N0.getOpcode() == ISD::SUB &&
2356         isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
2357       SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
2358                                                N0.getOperand(1).getNode());
2359       assert(Sub && "Constant folding failed");
2360       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
2361     }
2362
2363     // fold ((c1-A)+c2) -> (c1+c2)-A
2364     if (N0.getOpcode() == ISD::SUB &&
2365         isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
2366       SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
2367                                                N0.getOperand(0).getNode());
2368       assert(Add && "Constant folding failed");
2369       return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2370     }
2371
2372     // add (sext i1 X), 1 -> zext (not i1 X)
2373     // We don't transform this pattern:
2374     //   add (zext i1 X), -1 -> sext (not i1 X)
2375     // because most (?) targets generate better code for the zext form.
2376     if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.hasOneUse() &&
2377         isOneOrOneSplat(N1)) {
2378       SDValue X = N0.getOperand(0);
2379       if ((!LegalOperations ||
2380            (TLI.isOperationLegal(ISD::XOR, X.getValueType()) &&
2381             TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) &&
2382           X.getScalarValueSizeInBits() == 1) {
2383         SDValue Not = DAG.getNOT(DL, X, X.getValueType());
2384         return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Not);
2385       }
2386     }
2387
2388     // Undo the add -> or combine to merge constant offsets from a frame index.
2389     if (N0.getOpcode() == ISD::OR &&
2390         isa<FrameIndexSDNode>(N0.getOperand(0)) &&
2391         isa<ConstantSDNode>(N0.getOperand(1)) &&
2392         DAG.haveNoCommonBitsSet(N0.getOperand(0), N0.getOperand(1))) {
2393       SDValue Add0 = DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(1));
2394       return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Add0);
2395     }
2396   }
2397
2398   if (SDValue NewSel = foldBinOpIntoSelect(N))
2399     return NewSel;
2400
2401   // reassociate add
2402   if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
2403     if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
2404       return RADD;
2405   }
2406   // fold ((0-A) + B) -> B-A
2407   if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
2408     return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2409
2410   // fold (A + (0-B)) -> A-B
2411   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
2412     return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
2413
2414   // fold (A+(B-A)) -> B
2415   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
2416     return N1.getOperand(0);
2417
2418   // fold ((B-A)+A) -> B
2419   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
2420     return N0.getOperand(0);
2421
2422   // fold ((A-B)+(C-A)) -> (C-B)
2423   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2424       N0.getOperand(0) == N1.getOperand(1))
2425     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2426                        N0.getOperand(1));
2427
2428   // fold ((A-B)+(B-C)) -> (A-C)
2429   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
2430       N0.getOperand(1) == N1.getOperand(0))
2431     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
2432                        N1.getOperand(1));
2433
2434   // fold (A+(B-(A+C))) to (B-C)
2435   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2436       N0 == N1.getOperand(1).getOperand(0))
2437     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2438                        N1.getOperand(1).getOperand(1));
2439
2440   // fold (A+(B-(C+A))) to (B-C)
2441   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
2442       N0 == N1.getOperand(1).getOperand(1))
2443     return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
2444                        N1.getOperand(1).getOperand(0));
2445
2446   // fold (A+((B-A)+or-C)) to (B+or-C)
2447   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
2448       N1.getOperand(0).getOpcode() == ISD::SUB &&
2449       N0 == N1.getOperand(0).getOperand(1))
2450     return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
2451                        N1.getOperand(1));
2452
2453   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
2454   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
2455     SDValue N00 = N0.getOperand(0);
2456     SDValue N01 = N0.getOperand(1);
2457     SDValue N10 = N1.getOperand(0);
2458     SDValue N11 = N1.getOperand(1);
2459
2460     if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
2461       return DAG.getNode(ISD::SUB, DL, VT,
2462                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
2463                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
2464   }
2465
2466   // fold (add (umax X, C), -C) --> (usubsat X, C)
2467   if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
2468     auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
2469       return (!Max && !Op) ||
2470              (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
2471     };
2472     if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
2473                                   /*AllowUndefs*/ true))
2474       return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
2475                          N0.getOperand(1));
2476   }
2477
2478   if (SimplifyDemandedBits(SDValue(N, 0)))
2479     return SDValue(N, 0);
2480
2481   if (isOneOrOneSplat(N1)) {
2482     // fold (add (xor a, -1), 1) -> (sub 0, a)
2483     if (isBitwiseNot(N0))
2484       return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
2485                          N0.getOperand(0));
2486
2487     // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
2488     if (N0.getOpcode() == ISD::ADD ||
2489         N0.getOpcode() == ISD::UADDO ||
2490         N0.getOpcode() == ISD::SADDO) {
2491       SDValue A, Xor;
2492
2493       if (isBitwiseNot(N0.getOperand(0))) {
2494         A = N0.getOperand(1);
2495         Xor = N0.getOperand(0);
2496       } else if (isBitwiseNot(N0.getOperand(1))) {
2497         A = N0.getOperand(0);
2498         Xor = N0.getOperand(1);
2499       }
2500
2501       if (Xor)
2502         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
2503     }
2504
2505     // Look for:
2506     //   add (add x, y), 1
2507     // And if the target does not like this form then turn into:
2508     //   sub y, (xor x, -1)
2509     if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2510         N0.getOpcode() == ISD::ADD) {
2511       SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2512                                 DAG.getAllOnesConstant(DL, VT));
2513       return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
2514     }
2515   }
2516
2517   // (x - y) + -1  ->  add (xor y, -1), x
2518   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2519       isAllOnesOrAllOnesSplat(N1)) {
2520     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
2521     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
2522   }
2523
2524   if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
2525     return Combined;
2526
2527   if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
2528     return Combined;
2529
2530   return SDValue();
2531 }
2532
2533 SDValue DAGCombiner::visitADD(SDNode *N) {
2534   SDValue N0 = N->getOperand(0);
2535   SDValue N1 = N->getOperand(1);
2536   EVT VT = N0.getValueType();
2537   SDLoc DL(N);
2538
2539   if (SDValue Combined = visitADDLike(N))
2540     return Combined;
2541
2542   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
2543     return V;
2544
2545   if (SDValue V = foldAddSubOfSignBit(N, DAG))
2546     return V;
2547
2548   // fold (a+b) -> (a|b) iff a and b share no bits.
2549   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
2550       DAG.haveNoCommonBitsSet(N0, N1))
2551     return DAG.getNode(ISD::OR, DL, VT, N0, N1);
2552
2553   return SDValue();
2554 }
2555
2556 SDValue DAGCombiner::visitADDSAT(SDNode *N) {
2557   unsigned Opcode = N->getOpcode();
2558   SDValue N0 = N->getOperand(0);
2559   SDValue N1 = N->getOperand(1);
2560   EVT VT = N0.getValueType();
2561   SDLoc DL(N);
2562
2563   // fold vector ops
2564   if (VT.isVector()) {
2565     // TODO SimplifyVBinOp
2566
2567     // fold (add_sat x, 0) -> x, vector edition
2568     if (ISD::isBuildVectorAllZeros(N1.getNode()))
2569       return N0;
2570     if (ISD::isBuildVectorAllZeros(N0.getNode()))
2571       return N1;
2572   }
2573
2574   // fold (add_sat x, undef) -> -1
2575   if (N0.isUndef() || N1.isUndef())
2576     return DAG.getAllOnesConstant(DL, VT);
2577
2578   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
2579     // canonicalize constant to RHS
2580     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
2581       return DAG.getNode(Opcode, DL, VT, N1, N0);
2582     // fold (add_sat c1, c2) -> c3
2583     return DAG.FoldConstantArithmetic(Opcode, DL, VT, N0.getNode(),
2584                                       N1.getNode());
2585   }
2586
2587   // fold (add_sat x, 0) -> x
2588   if (isNullConstant(N1))
2589     return N0;
2590
2591   // If it cannot overflow, transform into an add.
2592   if (Opcode == ISD::UADDSAT)
2593     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2594       return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
2595
2596   return SDValue();
2597 }
2598
2599 static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
2600   bool Masked = false;
2601
2602   // First, peel away TRUNCATE/ZERO_EXTEND/AND nodes due to legalization.
2603   while (true) {
2604     if (V.getOpcode() == ISD::TRUNCATE || V.getOpcode() == ISD::ZERO_EXTEND) {
2605       V = V.getOperand(0);
2606       continue;
2607     }
2608
2609     if (V.getOpcode() == ISD::AND && isOneConstant(V.getOperand(1))) {
2610       Masked = true;
2611       V = V.getOperand(0);
2612       continue;
2613     }
2614
2615     break;
2616   }
2617
2618   // If this is not a carry, return.
2619   if (V.getResNo() != 1)
2620     return SDValue();
2621
2622   if (V.getOpcode() != ISD::ADDCARRY && V.getOpcode() != ISD::SUBCARRY &&
2623       V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
2624     return SDValue();
2625
2626   EVT VT = V.getNode()->getValueType(0);
2627   if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
2628     return SDValue();
2629
2630   // If the result is masked, then no matter what kind of bool it is we can
2631   // return. If it isn't, then we need to make sure the bool type is either 0 or
2632   // 1 and not other values.
2633   if (Masked ||
2634       TLI.getBooleanContents(V.getValueType()) ==
2635           TargetLoweringBase::ZeroOrOneBooleanContent)
2636     return V;
2637
2638   return SDValue();
2639 }
2640
2641 /// Given the operands of an add/sub operation, see if the 2nd operand is a
2642 /// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
2643 /// the opcode and bypass the mask operation.
2644 static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
2645                                  SelectionDAG &DAG, const SDLoc &DL) {
2646   if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
2647     return SDValue();
2648
2649   EVT VT = N0.getValueType();
2650   if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
2651     return SDValue();
2652
2653   // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
2654   // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
2655   return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
2656 }
2657
2658 /// Helper for doing combines based on N0 and N1 being added to each other.
2659 SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
2660                                           SDNode *LocReference) {
2661   EVT VT = N0.getValueType();
2662   SDLoc DL(LocReference);
2663
2664   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
2665   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
2666       isNullOrNullSplat(N1.getOperand(0).getOperand(0)))
2667     return DAG.getNode(ISD::SUB, DL, VT, N0,
2668                        DAG.getNode(ISD::SHL, DL, VT,
2669                                    N1.getOperand(0).getOperand(1),
2670                                    N1.getOperand(1)));
2671
2672   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
2673     return V;
2674
2675   // Look for:
2676   //   add (add x, 1), y
2677   // And if the target does not like this form then turn into:
2678   //   sub y, (xor x, -1)
2679   if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
2680       N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
2681     SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
2682                               DAG.getAllOnesConstant(DL, VT));
2683     return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
2684   }
2685
2686   // Hoist one-use subtraction by non-opaque constant:
2687   //   (x - C) + y  ->  (x + y) - C
2688   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
2689   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2690       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
2691     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
2692     return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
2693   }
2694   // Hoist one-use subtraction from non-opaque constant:
2695   //   (C - x) + y  ->  (y - x) + C
2696   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
2697       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
2698     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
2699     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
2700   }
2701
2702   // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
2703   // rather than 'add 0/-1' (the zext should get folded).
2704   // add (sext i1 Y), X --> sub X, (zext i1 Y)
2705   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
2706       N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
2707       TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
2708     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
2709     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
2710   }
2711
2712   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
2713   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
2714     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
2715     if (TN->getVT() == MVT::i1) {
2716       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
2717                                  DAG.getConstant(1, DL, VT));
2718       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
2719     }
2720   }
2721
2722   // (add X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2723   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1)) &&
2724       N1.getResNo() == 0)
2725     return DAG.getNode(ISD::ADDCARRY, DL, N1->getVTList(),
2726                        N0, N1.getOperand(0), N1.getOperand(2));
2727
2728   // (add X, Carry) -> (addcarry X, 0, Carry)
2729   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2730     if (SDValue Carry = getAsCarry(TLI, N1))
2731       return DAG.getNode(ISD::ADDCARRY, DL,
2732                          DAG.getVTList(VT, Carry.getValueType()), N0,
2733                          DAG.getConstant(0, DL, VT), Carry);
2734
2735   return SDValue();
2736 }
2737
2738 SDValue DAGCombiner::visitADDC(SDNode *N) {
2739   SDValue N0 = N->getOperand(0);
2740   SDValue N1 = N->getOperand(1);
2741   EVT VT = N0.getValueType();
2742   SDLoc DL(N);
2743
2744   // If the flag result is dead, turn this into an ADD.
2745   if (!N->hasAnyUseOfValue(1))
2746     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2747                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2748
2749   // canonicalize constant to RHS.
2750   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2751   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2752   if (N0C && !N1C)
2753     return DAG.getNode(ISD::ADDC, DL, N->getVTList(), N1, N0);
2754
2755   // fold (addc x, 0) -> x + no carry out
2756   if (isNullConstant(N1))
2757     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
2758                                         DL, MVT::Glue));
2759
2760   // If it cannot overflow, transform into an add.
2761   if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2762     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2763                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
2764
2765   return SDValue();
2766 }
2767
2768 static SDValue flipBoolean(SDValue V, const SDLoc &DL,
2769                            SelectionDAG &DAG, const TargetLowering &TLI) {
2770   EVT VT = V.getValueType();
2771
2772   SDValue Cst;
2773   switch (TLI.getBooleanContents(VT)) {
2774   case TargetLowering::ZeroOrOneBooleanContent:
2775   case TargetLowering::UndefinedBooleanContent:
2776     Cst = DAG.getConstant(1, DL, VT);
2777     break;
2778   case TargetLowering::ZeroOrNegativeOneBooleanContent:
2779     Cst = DAG.getAllOnesConstant(DL, VT);
2780     break;
2781   }
2782
2783   return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
2784 }
2785
2786 /**
2787  * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
2788  * then the flip also occurs if computing the inverse is the same cost.
2789  * This function returns an empty SDValue in case it cannot flip the boolean
2790  * without increasing the cost of the computation. If you want to flip a boolean
2791  * no matter what, use flipBoolean.
2792  */
2793 static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
2794                                   const TargetLowering &TLI,
2795                                   bool Force) {
2796   if (Force && isa<ConstantSDNode>(V))
2797     return flipBoolean(V, SDLoc(V), DAG, TLI);
2798
2799   if (V.getOpcode() != ISD::XOR)
2800     return SDValue();
2801
2802   ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
2803   if (!Const)
2804     return SDValue();
2805
2806   EVT VT = V.getValueType();
2807
2808   bool IsFlip = false;
2809   switch(TLI.getBooleanContents(VT)) {
2810     case TargetLowering::ZeroOrOneBooleanContent:
2811       IsFlip = Const->isOne();
2812       break;
2813     case TargetLowering::ZeroOrNegativeOneBooleanContent:
2814       IsFlip = Const->isAllOnesValue();
2815       break;
2816     case TargetLowering::UndefinedBooleanContent:
2817       IsFlip = (Const->getAPIntValue() & 0x01) == 1;
2818       break;
2819   }
2820
2821   if (IsFlip)
2822     return V.getOperand(0);
2823   if (Force)
2824     return flipBoolean(V, SDLoc(V), DAG, TLI);
2825   return SDValue();
2826 }
2827
2828 SDValue DAGCombiner::visitADDO(SDNode *N) {
2829   SDValue N0 = N->getOperand(0);
2830   SDValue N1 = N->getOperand(1);
2831   EVT VT = N0.getValueType();
2832   bool IsSigned = (ISD::SADDO == N->getOpcode());
2833
2834   EVT CarryVT = N->getValueType(1);
2835   SDLoc DL(N);
2836
2837   // If the flag result is dead, turn this into an ADD.
2838   if (!N->hasAnyUseOfValue(1))
2839     return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2840                      DAG.getUNDEF(CarryVT));
2841
2842   // canonicalize constant to RHS.
2843   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
2844       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
2845     return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
2846
2847   // fold (addo x, 0) -> x + no carry out
2848   if (isNullOrNullSplat(N1))
2849     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
2850
2851   if (!IsSigned) {
2852     // If it cannot overflow, transform into an add.
2853     if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
2854       return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
2855                        DAG.getConstant(0, DL, CarryVT));
2856
2857     // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
2858     if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
2859       SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
2860                                 DAG.getConstant(0, DL, VT), N0.getOperand(0));
2861       return CombineTo(N, Sub,
2862                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
2863     }
2864
2865     if (SDValue Combined = visitUADDOLike(N0, N1, N))
2866       return Combined;
2867
2868     if (SDValue Combined = visitUADDOLike(N1, N0, N))
2869       return Combined;
2870   }
2871
2872   return SDValue();
2873 }
2874
2875 SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
2876   EVT VT = N0.getValueType();
2877   if (VT.isVector())
2878     return SDValue();
2879
2880   // (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
2881   // If Y + 1 cannot overflow.
2882   if (N1.getOpcode() == ISD::ADDCARRY && isNullConstant(N1.getOperand(1))) {
2883     SDValue Y = N1.getOperand(0);
2884     SDValue One = DAG.getConstant(1, SDLoc(N), Y.getValueType());
2885     if (DAG.computeOverflowKind(Y, One) == SelectionDAG::OFK_Never)
2886       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0, Y,
2887                          N1.getOperand(2));
2888   }
2889
2890   // (uaddo X, Carry) -> (addcarry X, 0, Carry)
2891   if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT))
2892     if (SDValue Carry = getAsCarry(TLI, N1))
2893       return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
2894                          DAG.getConstant(0, SDLoc(N), VT), Carry);
2895
2896   return SDValue();
2897 }
2898
2899 SDValue DAGCombiner::visitADDE(SDNode *N) {
2900   SDValue N0 = N->getOperand(0);
2901   SDValue N1 = N->getOperand(1);
2902   SDValue CarryIn = N->getOperand(2);
2903
2904   // canonicalize constant to RHS
2905   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2906   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2907   if (N0C && !N1C)
2908     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
2909                        N1, N0, CarryIn);
2910
2911   // fold (adde x, y, false) -> (addc x, y)
2912   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
2913     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
2914
2915   return SDValue();
2916 }
2917
2918 SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
2919   SDValue N0 = N->getOperand(0);
2920   SDValue N1 = N->getOperand(1);
2921   SDValue CarryIn = N->getOperand(2);
2922   SDLoc DL(N);
2923
2924   // canonicalize constant to RHS
2925   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2926   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2927   if (N0C && !N1C)
2928     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), N1, N0, CarryIn);
2929
2930   // fold (addcarry x, y, false) -> (uaddo x, y)
2931   if (isNullConstant(CarryIn)) {
2932     if (!LegalOperations ||
2933         TLI.isOperationLegalOrCustom(ISD::UADDO, N->getValueType(0)))
2934       return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
2935   }
2936
2937   // fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
2938   if (isNullConstant(N0) && isNullConstant(N1)) {
2939     EVT VT = N0.getValueType();
2940     EVT CarryVT = CarryIn.getValueType();
2941     SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
2942     AddToWorklist(CarryExt.getNode());
2943     return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
2944                                     DAG.getConstant(1, DL, VT)),
2945                      DAG.getConstant(0, DL, CarryVT));
2946   }
2947
2948   if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
2949     return Combined;
2950
2951   if (SDValue Combined = visitADDCARRYLike(N1, N0, CarryIn, N))
2952     return Combined;
2953
2954   return SDValue();
2955 }
2956
2957 /**
2958  * If we are facing some sort of diamond carry propapagtion pattern try to
2959  * break it up to generate something like:
2960  *   (addcarry X, 0, (addcarry A, B, Z):Carry)
2961  *
2962  * The end result is usually an increase in operation required, but because the
2963  * carry is now linearized, other tranforms can kick in and optimize the DAG.
2964  *
2965  * Patterns typically look something like
2966  *            (uaddo A, B)
2967  *             /       \
2968  *          Carry      Sum
2969  *            |          \
2970  *            | (addcarry *, 0, Z)
2971  *            |       /
2972  *             \   Carry
2973  *              |   /
2974  * (addcarry X, *, *)
2975  *
2976  * But numerous variation exist. Our goal is to identify A, B, X and Z and
2977  * produce a combine with a single path for carry propagation.
2978  */
2979 static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
2980                                       SDValue X, SDValue Carry0, SDValue Carry1,
2981                                       SDNode *N) {
2982   if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
2983     return SDValue();
2984   if (Carry1.getOpcode() != ISD::UADDO)
2985     return SDValue();
2986
2987   SDValue Z;
2988
2989   /**
2990    * First look for a suitable Z. It will present itself in the form of
2991    * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
2992    */
2993   if (Carry0.getOpcode() == ISD::ADDCARRY &&
2994       isNullConstant(Carry0.getOperand(1))) {
2995     Z = Carry0.getOperand(2);
2996   } else if (Carry0.getOpcode() == ISD::UADDO &&
2997              isOneConstant(Carry0.getOperand(1))) {
2998     EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
2999     Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
3000   } else {
3001     // We couldn't find a suitable Z.
3002     return SDValue();
3003   }
3004
3005
3006   auto cancelDiamond = [&](SDValue A,SDValue B) {
3007     SDLoc DL(N);
3008     SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
3009     Combiner.AddToWorklist(NewY.getNode());
3010     return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
3011                        DAG.getConstant(0, DL, X.getValueType()),
3012                        NewY.getValue(1));
3013   };
3014
3015   /**
3016    *      (uaddo A, B)
3017    *           |
3018    *          Sum
3019    *           |
3020    * (addcarry *, 0, Z)
3021    */
3022   if (Carry0.getOperand(0) == Carry1.getValue(0)) {
3023     return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
3024   }
3025
3026   /**
3027    * (addcarry A, 0, Z)
3028    *         |
3029    *        Sum
3030    *         |
3031    *  (uaddo *, B)
3032    */
3033   if (Carry1.getOperand(0) == Carry0.getValue(0)) {
3034     return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
3035   }
3036
3037   if (Carry1.getOperand(1) == Carry0.getValue(0)) {
3038     return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
3039   }
3040
3041   return SDValue();
3042 }
3043
3044 SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
3045                                        SDNode *N) {
3046   // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
3047   if (isBitwiseNot(N0))
3048     if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
3049       SDLoc DL(N);
3050       SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
3051                                 N0.getOperand(0), NotC);
3052       return CombineTo(N, Sub,
3053                        flipBoolean(Sub.getValue(1), DL, DAG, TLI));
3054     }
3055
3056   // Iff the flag result is dead:
3057   // (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
3058   // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
3059   // or the dependency between the instructions.
3060   if ((N0.getOpcode() == ISD::ADD ||
3061        (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
3062         N0.getValue(1) != CarryIn)) &&
3063       isNullConstant(N1) && !N->hasAnyUseOfValue(1))
3064     return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
3065                        N0.getOperand(0), N0.getOperand(1), CarryIn);
3066
3067   /**
3068    * When one of the addcarry argument is itself a carry, we may be facing
3069    * a diamond carry propagation. In which case we try to transform the DAG
3070    * to ensure linear carry propagation if that is possible.
3071    */
3072   if (auto Y = getAsCarry(TLI, N1)) {
3073     // Because both are carries, Y and Z can be swapped.
3074     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
3075       return R;
3076     if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
3077       return R;
3078   }
3079
3080   return SDValue();
3081 }
3082
3083 // Since it may not be valid to emit a fold to zero for vector initializers
3084 // check if we can before folding.
3085 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
3086                              SelectionDAG &DAG, bool LegalOperations) {
3087   if (!VT.isVector())
3088     return DAG.getConstant(0, DL, VT);
3089   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
3090     return DAG.getConstant(0, DL, VT);
3091   return SDValue();
3092 }
3093
3094 SDValue DAGCombiner::visitSUB(SDNode *N) {
3095   SDValue N0 = N->getOperand(0);
3096   SDValue N1 = N->getOperand(1);
3097   EVT VT = N0.getValueType();
3098   SDLoc DL(N);
3099
3100   // fold vector ops
3101   if (VT.isVector()) {
3102     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3103       return FoldedVOp;
3104
3105     // fold (sub x, 0) -> x, vector edition
3106     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3107       return N0;
3108   }
3109
3110   // fold (sub x, x) -> 0
3111   // FIXME: Refactor this and xor and other similar operations together.
3112   if (N0 == N1)
3113     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
3114   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3115       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
3116     // fold (sub c1, c2) -> c1-c2
3117     return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3118                                       N1.getNode());
3119   }
3120
3121   if (SDValue NewSel = foldBinOpIntoSelect(N))
3122     return NewSel;
3123
3124   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3125
3126   // fold (sub x, c) -> (add x, -c)
3127   if (N1C) {
3128     return DAG.getNode(ISD::ADD, DL, VT, N0,
3129                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3130   }
3131
3132   if (isNullOrNullSplat(N0)) {
3133     unsigned BitWidth = VT.getScalarSizeInBits();
3134     // Right-shifting everything out but the sign bit followed by negation is
3135     // the same as flipping arithmetic/logical shift type without the negation:
3136     // -(X >>u 31) -> (X >>s 31)
3137     // -(X >>s 31) -> (X >>u 31)
3138     if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
3139       ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
3140       if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
3141         auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
3142         if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
3143           return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
3144       }
3145     }
3146
3147     // 0 - X --> 0 if the sub is NUW.
3148     if (N->getFlags().hasNoUnsignedWrap())
3149       return N0;
3150
3151     if (DAG.MaskedValueIsZero(N1, ~APInt::getSignMask(BitWidth))) {
3152       // N1 is either 0 or the minimum signed value. If the sub is NSW, then
3153       // N1 must be 0 because negating the minimum signed value is undefined.
3154       if (N->getFlags().hasNoSignedWrap())
3155         return N0;
3156
3157       // 0 - X --> X if X is 0 or the minimum signed value.
3158       return N1;
3159     }
3160   }
3161
3162   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
3163   if (isAllOnesOrAllOnesSplat(N0))
3164     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
3165
3166   // fold (A - (0-B)) -> A+B
3167   if (N1.getOpcode() == ISD::SUB && isNullOrNullSplat(N1.getOperand(0)))
3168     return DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(1));
3169
3170   // fold A-(A-B) -> B
3171   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
3172     return N1.getOperand(1);
3173
3174   // fold (A+B)-A -> B
3175   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
3176     return N0.getOperand(1);
3177
3178   // fold (A+B)-B -> A
3179   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
3180     return N0.getOperand(0);
3181
3182   // fold (A+C1)-C2 -> A+(C1-C2)
3183   if (N0.getOpcode() == ISD::ADD &&
3184       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3185       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3186     SDValue NewC = DAG.FoldConstantArithmetic(
3187         ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3188     assert(NewC && "Constant folding failed");
3189     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
3190   }
3191
3192   // fold C2-(A+C1) -> (C2-C1)-A
3193   if (N1.getOpcode() == ISD::ADD) {
3194     SDValue N11 = N1.getOperand(1);
3195     if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
3196         isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
3197       SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
3198                                                 N11.getNode());
3199       assert(NewC && "Constant folding failed");
3200       return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
3201     }
3202   }
3203
3204   // fold (A-C1)-C2 -> A-(C1+C2)
3205   if (N0.getOpcode() == ISD::SUB &&
3206       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3207       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3208     SDValue NewC = DAG.FoldConstantArithmetic(
3209         ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
3210     assert(NewC && "Constant folding failed");
3211     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
3212   }
3213
3214   // fold (c1-A)-c2 -> (c1-c2)-A
3215   if (N0.getOpcode() == ISD::SUB &&
3216       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3217       isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
3218     SDValue NewC = DAG.FoldConstantArithmetic(
3219         ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
3220     assert(NewC && "Constant folding failed");
3221     return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
3222   }
3223
3224   // fold ((A+(B+or-C))-B) -> A+or-C
3225   if (N0.getOpcode() == ISD::ADD &&
3226       (N0.getOperand(1).getOpcode() == ISD::SUB ||
3227        N0.getOperand(1).getOpcode() == ISD::ADD) &&
3228       N0.getOperand(1).getOperand(0) == N1)
3229     return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
3230                        N0.getOperand(1).getOperand(1));
3231
3232   // fold ((A+(C+B))-B) -> A+C
3233   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
3234       N0.getOperand(1).getOperand(1) == N1)
3235     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
3236                        N0.getOperand(1).getOperand(0));
3237
3238   // fold ((A-(B-C))-C) -> A-B
3239   if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
3240       N0.getOperand(1).getOperand(1) == N1)
3241     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
3242                        N0.getOperand(1).getOperand(0));
3243
3244   // fold (A-(B-C)) -> A+(C-B)
3245   if (N1.getOpcode() == ISD::SUB && N1.hasOneUse())
3246     return DAG.getNode(ISD::ADD, DL, VT, N0,
3247                        DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(1),
3248                                    N1.getOperand(0)));
3249
3250   // fold (X - (-Y * Z)) -> (X + (Y * Z))
3251   if (N1.getOpcode() == ISD::MUL && N1.hasOneUse()) {
3252     if (N1.getOperand(0).getOpcode() == ISD::SUB &&
3253         isNullOrNullSplat(N1.getOperand(0).getOperand(0))) {
3254       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3255                                 N1.getOperand(0).getOperand(1),
3256                                 N1.getOperand(1));
3257       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3258     }
3259     if (N1.getOperand(1).getOpcode() == ISD::SUB &&
3260         isNullOrNullSplat(N1.getOperand(1).getOperand(0))) {
3261       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT,
3262                                 N1.getOperand(0),
3263                                 N1.getOperand(1).getOperand(1));
3264       return DAG.getNode(ISD::ADD, DL, VT, N0, Mul);
3265     }
3266   }
3267
3268   // If either operand of a sub is undef, the result is undef
3269   if (N0.isUndef())
3270     return N0;
3271   if (N1.isUndef())
3272     return N1;
3273
3274   if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
3275     return V;
3276
3277   if (SDValue V = foldAddSubOfSignBit(N, DAG))
3278     return V;
3279
3280   if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
3281     return V;
3282
3283   // (x - y) - 1  ->  add (xor y, -1), x
3284   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
3285     SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
3286                               DAG.getAllOnesConstant(DL, VT));
3287     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
3288   }
3289
3290   // Look for:
3291   //   sub y, (xor x, -1)
3292   // And if the target does not like this form then turn into:
3293   //   add (add x, y), 1
3294   if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
3295     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
3296     return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
3297   }
3298
3299   // Hoist one-use addition by non-opaque constant:
3300   //   (x + C) - y  ->  (x - y) + C
3301   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
3302       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3303     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3304     return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
3305   }
3306   // y - (x + C)  ->  (y - x) - C
3307   if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
3308       isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
3309     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
3310     return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
3311   }
3312   // (x - C) - y  ->  (x - y) - C
3313   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
3314   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3315       isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
3316     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
3317     return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
3318   }
3319   // (C - x) - y  ->  C - (x + y)
3320   if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
3321       isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
3322     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
3323     return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
3324   }
3325
3326   // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
3327   // rather than 'sub 0/1' (the sext should get folded).
3328   // sub X, (zext i1 Y) --> add X, (sext i1 Y)
3329   if (N1.getOpcode() == ISD::ZERO_EXTEND &&
3330       N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
3331       TLI.getBooleanContents(VT) ==
3332           TargetLowering::ZeroOrNegativeOneBooleanContent) {
3333     SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
3334     return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
3335   }
3336
3337   // fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
3338   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
3339     if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
3340       SDValue X0 = N0.getOperand(0), X1 = N0.getOperand(1);
3341       SDValue S0 = N1.getOperand(0);
3342       if ((X0 == S0 && X1 == N1) || (X0 == N1 && X1 == S0)) {
3343         unsigned OpSizeInBits = VT.getScalarSizeInBits();
3344         if (ConstantSDNode *C = isConstOrConstSplat(N1.getOperand(1)))
3345           if (C->getAPIntValue() == (OpSizeInBits - 1))
3346             return DAG.getNode(ISD::ABS, SDLoc(N), VT, S0);
3347       }
3348     }
3349   }
3350
3351   // If the relocation model supports it, consider symbol offsets.
3352   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
3353     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
3354       // fold (sub Sym, c) -> Sym-c
3355       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
3356         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
3357                                     GA->getOffset() -
3358                                         (uint64_t)N1C->getSExtValue());
3359       // fold (sub Sym+c1, Sym+c2) -> c1-c2
3360       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
3361         if (GA->getGlobal() == GB->getGlobal())
3362           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
3363                                  DL, VT);
3364     }
3365
3366   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
3367   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
3368     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
3369     if (TN->getVT() == MVT::i1) {
3370       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
3371                                  DAG.getConstant(1, DL, VT));
3372       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
3373     }
3374   }
3375
3376   // Prefer an add for more folding potential and possibly better codegen:
3377   // sub N0, (lshr N10, width-1) --> add N0, (ashr N10, width-1)
3378   if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
3379     SDValue ShAmt = N1.getOperand(1);
3380     ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
3381     if (ShAmtC &&
3382         ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
3383       SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
3384       return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
3385     }
3386   }
3387
3388   return SDValue();
3389 }
3390
3391 SDValue DAGCombiner::visitSUBSAT(SDNode *N) {
3392   SDValue N0 = N->getOperand(0);
3393   SDValue N1 = N->getOperand(1);
3394   EVT VT = N0.getValueType();
3395   SDLoc DL(N);
3396
3397   // fold vector ops
3398   if (VT.isVector()) {
3399     // TODO SimplifyVBinOp
3400
3401     // fold (sub_sat x, 0) -> x, vector edition
3402     if (ISD::isBuildVectorAllZeros(N1.getNode()))
3403       return N0;
3404   }
3405
3406   // fold (sub_sat x, undef) -> 0
3407   if (N0.isUndef() || N1.isUndef())
3408     return DAG.getConstant(0, DL, VT);
3409
3410   // fold (sub_sat x, x) -> 0
3411   if (N0 == N1)
3412     return DAG.getConstant(0, DL, VT);
3413
3414   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3415       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
3416     // fold (sub_sat c1, c2) -> c3
3417     return DAG.FoldConstantArithmetic(N->getOpcode(), DL, VT, N0.getNode(),
3418                                       N1.getNode());
3419   }
3420
3421   // fold (sub_sat x, 0) -> x
3422   if (isNullConstant(N1))
3423     return N0;
3424
3425   return SDValue();
3426 }
3427
3428 SDValue DAGCombiner::visitSUBC(SDNode *N) {
3429   SDValue N0 = N->getOperand(0);
3430   SDValue N1 = N->getOperand(1);
3431   EVT VT = N0.getValueType();
3432   SDLoc DL(N);
3433
3434   // If the flag result is dead, turn this into an SUB.
3435   if (!N->hasAnyUseOfValue(1))
3436     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3437                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3438
3439   // fold (subc x, x) -> 0 + no borrow
3440   if (N0 == N1)
3441     return CombineTo(N, DAG.getConstant(0, DL, VT),
3442                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3443
3444   // fold (subc x, 0) -> x + no borrow
3445   if (isNullConstant(N1))
3446     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3447
3448   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3449   if (isAllOnesConstant(N0))
3450     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3451                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
3452
3453   return SDValue();
3454 }
3455
3456 SDValue DAGCombiner::visitSUBO(SDNode *N) {
3457   SDValue N0 = N->getOperand(0);
3458   SDValue N1 = N->getOperand(1);
3459   EVT VT = N0.getValueType();
3460   bool IsSigned = (ISD::SSUBO == N->getOpcode());
3461
3462   EVT CarryVT = N->getValueType(1);
3463   SDLoc DL(N);
3464
3465   // If the flag result is dead, turn this into an SUB.
3466   if (!N->hasAnyUseOfValue(1))
3467     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
3468                      DAG.getUNDEF(CarryVT));
3469
3470   // fold (subo x, x) -> 0 + no borrow
3471   if (N0 == N1)
3472     return CombineTo(N, DAG.getConstant(0, DL, VT),
3473                      DAG.getConstant(0, DL, CarryVT));
3474
3475   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
3476
3477   // fold (subox, c) -> (addo x, -c)
3478   if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
3479     return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
3480                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
3481   }
3482
3483   // fold (subo x, 0) -> x + no borrow
3484   if (isNullOrNullSplat(N1))
3485     return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
3486
3487   // Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
3488   if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
3489     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
3490                      DAG.getConstant(0, DL, CarryVT));
3491
3492   return SDValue();
3493 }
3494
3495 SDValue DAGCombiner::visitSUBE(SDNode *N) {
3496   SDValue N0 = N->getOperand(0);
3497   SDValue N1 = N->getOperand(1);
3498   SDValue CarryIn = N->getOperand(2);
3499
3500   // fold (sube x, y, false) -> (subc x, y)
3501   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
3502     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
3503
3504   return SDValue();
3505 }
3506
3507 SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
3508   SDValue N0 = N->getOperand(0);
3509   SDValue N1 = N->getOperand(1);
3510   SDValue CarryIn = N->getOperand(2);
3511
3512   // fold (subcarry x, y, false) -> (usubo x, y)
3513   if (isNullConstant(CarryIn)) {
3514     if (!LegalOperations ||
3515         TLI.isOperationLegalOrCustom(ISD::USUBO, N->getValueType(0)))
3516       return DAG.getNode(ISD::USUBO, SDLoc(N), N->getVTList(), N0, N1);
3517   }
3518
3519   return SDValue();
3520 }
3521
3522 // Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT and UMULFIX here.
3523 SDValue DAGCombiner::visitMULFIX(SDNode *N) {
3524   SDValue N0 = N->getOperand(0);
3525   SDValue N1 = N->getOperand(1);
3526   SDValue Scale = N->getOperand(2);
3527   EVT VT = N0.getValueType();
3528
3529   // fold (mulfix x, undef, scale) -> 0
3530   if (N0.isUndef() || N1.isUndef())
3531     return DAG.getConstant(0, SDLoc(N), VT);
3532
3533   // Canonicalize constant to RHS (vector doesn't have to splat)
3534   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3535      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3536     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
3537
3538   // fold (mulfix x, 0, scale) -> 0
3539   if (isNullConstant(N1))
3540     return DAG.getConstant(0, SDLoc(N), VT);
3541
3542   return SDValue();
3543 }
3544
3545 SDValue DAGCombiner::visitMUL(SDNode *N) {
3546   SDValue N0 = N->getOperand(0);
3547   SDValue N1 = N->getOperand(1);
3548   EVT VT = N0.getValueType();
3549
3550   // fold (mul x, undef) -> 0
3551   if (N0.isUndef() || N1.isUndef())
3552     return DAG.getConstant(0, SDLoc(N), VT);
3553
3554   bool N0IsConst = false;
3555   bool N1IsConst = false;
3556   bool N1IsOpaqueConst = false;
3557   bool N0IsOpaqueConst = false;
3558   APInt ConstValue0, ConstValue1;
3559   // fold vector ops
3560   if (VT.isVector()) {
3561     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3562       return FoldedVOp;
3563
3564     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
3565     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
3566     assert((!N0IsConst ||
3567             ConstValue0.getBitWidth() == VT.getScalarSizeInBits()) &&
3568            "Splat APInt should be element width");
3569     assert((!N1IsConst ||
3570             ConstValue1.getBitWidth() == VT.getScalarSizeInBits()) &&
3571            "Splat APInt should be element width");
3572   } else {
3573     N0IsConst = isa<ConstantSDNode>(N0);
3574     if (N0IsConst) {
3575       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
3576       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
3577     }
3578     N1IsConst = isa<ConstantSDNode>(N1);
3579     if (N1IsConst) {
3580       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
3581       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
3582     }
3583   }
3584
3585   // fold (mul c1, c2) -> c1*c2
3586   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
3587     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
3588                                       N0.getNode(), N1.getNode());
3589
3590   // canonicalize constant to RHS (vector doesn't have to splat)
3591   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
3592      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
3593     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
3594   // fold (mul x, 0) -> 0
3595   if (N1IsConst && ConstValue1.isNullValue())
3596     return N1;
3597   // fold (mul x, 1) -> x
3598   if (N1IsConst && ConstValue1.isOneValue())
3599     return N0;
3600
3601   if (SDValue NewSel = foldBinOpIntoSelect(N))
3602     return NewSel;
3603
3604   // fold (mul x, -1) -> 0-x
3605   if (N1IsConst && ConstValue1.isAllOnesValue()) {
3606     SDLoc DL(N);
3607     return DAG.getNode(ISD::SUB, DL, VT,
3608                        DAG.getConstant(0, DL, VT), N0);
3609   }
3610   // fold (mul x, (1 << c)) -> x << c
3611   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
3612       DAG.isKnownToBeAPowerOfTwo(N1) &&
3613       (!VT.isVector() || Level <= AfterLegalizeVectorOps)) {
3614     SDLoc DL(N);
3615     SDValue LogBase2 = BuildLogBase2(N1, DL);
3616     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
3617     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
3618     return DAG.getNode(ISD::SHL, DL, VT, N0, Trunc);
3619   }
3620   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
3621   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2()) {
3622     unsigned Log2Val = (-ConstValue1).logBase2();
3623     SDLoc DL(N);
3624     // FIXME: If the input is something that is easily negated (e.g. a
3625     // single-use add), we should put the negate there.
3626     return DAG.getNode(ISD::SUB, DL, VT,
3627                        DAG.getConstant(0, DL, VT),
3628                        DAG.getNode(ISD::SHL, DL, VT, N0,
3629                             DAG.getConstant(Log2Val, DL,
3630                                       getShiftAmountTy(N0.getValueType()))));
3631   }
3632
3633   // Try to transform multiply-by-(power-of-2 +/- 1) into shift and add/sub.
3634   // mul x, (2^N + 1) --> add (shl x, N), x
3635   // mul x, (2^N - 1) --> sub (shl x, N), x
3636   // Examples: x * 33 --> (x << 5) + x
3637   //           x * 15 --> (x << 4) - x
3638   //           x * -33 --> -((x << 5) + x)
3639   //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
3640   if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
3641     // TODO: We could handle more general decomposition of any constant by
3642     //       having the target set a limit on number of ops and making a
3643     //       callback to determine that sequence (similar to sqrt expansion).
3644     unsigned MathOp = ISD::DELETED_NODE;
3645     APInt MulC = ConstValue1.abs();
3646     if ((MulC - 1).isPowerOf2())
3647       MathOp = ISD::ADD;
3648     else if ((MulC + 1).isPowerOf2())
3649       MathOp = ISD::SUB;
3650
3651     if (MathOp != ISD::DELETED_NODE) {
3652       unsigned ShAmt =
3653           MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
3654       assert(ShAmt < VT.getScalarSizeInBits() &&
3655              "multiply-by-constant generated out of bounds shift");
3656       SDLoc DL(N);
3657       SDValue Shl =
3658           DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
3659       SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
3660       if (ConstValue1.isNegative())
3661         R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
3662       return R;
3663     }
3664   }
3665
3666   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
3667   if (N0.getOpcode() == ISD::SHL &&
3668       isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
3669       isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
3670     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
3671     if (isConstantOrConstantVector(C3))
3672       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
3673   }
3674
3675   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
3676   // use.
3677   {
3678     SDValue Sh(nullptr, 0), Y(nullptr, 0);
3679
3680     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
3681     if (N0.getOpcode() == ISD::SHL &&
3682         isConstantOrConstantVector(N0.getOperand(1)) &&
3683         N0.getNode()->hasOneUse()) {
3684       Sh = N0; Y = N1;
3685     } else if (N1.getOpcode() == ISD::SHL &&
3686                isConstantOrConstantVector(N1.getOperand(1)) &&
3687                N1.getNode()->hasOneUse()) {
3688       Sh = N1; Y = N0;
3689     }
3690
3691     if (Sh.getNode()) {
3692       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
3693       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
3694     }
3695   }
3696
3697   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
3698   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
3699       N0.getOpcode() == ISD::ADD &&
3700       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
3701       isMulAddWithConstProfitable(N, N0, N1))
3702       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
3703                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
3704                                      N0.getOperand(0), N1),
3705                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
3706                                      N0.getOperand(1), N1));
3707
3708   // reassociate mul
3709   if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
3710     return RMUL;
3711
3712   return SDValue();
3713 }
3714
3715 /// Return true if divmod libcall is available.
3716 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
3717                                      const TargetLowering &TLI) {
3718   RTLIB::Libcall LC;
3719   EVT NodeType = Node->getValueType(0);
3720   if (!NodeType.isSimple())
3721     return false;
3722   switch (NodeType.getSimpleVT().SimpleTy) {
3723   default: return false; // No libcall for vector types.
3724   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
3725   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
3726   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
3727   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
3728   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
3729   }
3730
3731   return TLI.getLibcallName(LC) != nullptr;
3732 }
3733
3734 /// Issue divrem if both quotient and remainder are needed.
3735 SDValue DAGCombiner::useDivRem(SDNode *Node) {
3736   if (Node->use_empty())
3737     return SDValue(); // This is a dead node, leave it alone.
3738
3739   unsigned Opcode = Node->getOpcode();
3740   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
3741   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
3742
3743   // DivMod lib calls can still work on non-legal types if using lib-calls.
3744   EVT VT = Node->getValueType(0);
3745   if (VT.isVector() || !VT.isInteger())
3746     return SDValue();
3747
3748   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
3749     return SDValue();
3750
3751   // If DIVREM is going to get expanded into a libcall,
3752   // but there is no libcall available, then don't combine.
3753   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
3754       !isDivRemLibcallAvailable(Node, isSigned, TLI))
3755     return SDValue();
3756
3757   // If div is legal, it's better to do the normal expansion
3758   unsigned OtherOpcode = 0;
3759   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
3760     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
3761     if (TLI.isOperationLegalOrCustom(Opcode, VT))
3762       return SDValue();
3763   } else {
3764     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
3765     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
3766       return SDValue();
3767   }
3768
3769   SDValue Op0 = Node->getOperand(0);
3770   SDValue Op1 = Node->getOperand(1);
3771   SDValue combined;
3772   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
3773          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
3774     SDNode *User = *UI;
3775     if (User == Node || User->getOpcode() == ISD::DELETED_NODE ||
3776         User->use_empty())
3777       continue;
3778     // Convert the other matching node(s), too;
3779     // otherwise, the DIVREM may get target-legalized into something
3780     // target-specific that we won't be able to recognize.
3781     unsigned UserOpc = User->getOpcode();
3782     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
3783         User->getOperand(0) == Op0 &&
3784         User->getOperand(1) == Op1) {
3785       if (!combined) {
3786         if (UserOpc == OtherOpcode) {
3787           SDVTList VTs = DAG.getVTList(VT, VT);
3788           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
3789         } else if (UserOpc == DivRemOpc) {
3790           combined = SDValue(User, 0);
3791         } else {
3792           assert(UserOpc == Opcode);
3793           continue;
3794         }
3795       }
3796       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
3797         CombineTo(User, combined);
3798       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
3799         CombineTo(User, combined.getValue(1));
3800     }
3801   }
3802   return combined;
3803 }
3804
3805 static SDValue simplifyDivRem(SDNode *N, SelectionDAG &DAG) {
3806   SDValue N0 = N->getOperand(0);
3807   SDValue N1 = N->getOperand(1);
3808   EVT VT = N->getValueType(0);
3809   SDLoc DL(N);
3810
3811   unsigned Opc = N->getOpcode();
3812   bool IsDiv = (ISD::SDIV == Opc) || (ISD::UDIV == Opc);
3813   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3814
3815   // X / undef -> undef
3816   // X % undef -> undef
3817   // X / 0 -> undef
3818   // X % 0 -> undef
3819   // NOTE: This includes vectors where any divisor element is zero/undef.
3820   if (DAG.isUndef(Opc, {N0, N1}))
3821     return DAG.getUNDEF(VT);
3822
3823   // undef / X -> 0
3824   // undef % X -> 0
3825   if (N0.isUndef())
3826     return DAG.getConstant(0, DL, VT);
3827
3828   // 0 / X -> 0
3829   // 0 % X -> 0
3830   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3831   if (N0C && N0C->isNullValue())
3832     return N0;
3833
3834   // X / X -> 1
3835   // X % X -> 0
3836   if (N0 == N1)
3837     return DAG.getConstant(IsDiv ? 1 : 0, DL, VT);
3838
3839   // X / 1 -> X
3840   // X % 1 -> 0
3841   // If this is a boolean op (single-bit element type), we can't have
3842   // division-by-zero or remainder-by-zero, so assume the divisor is 1.
3843   // TODO: Similarly, if we're zero-extending a boolean divisor, then assume
3844   // it's a 1.
3845   if ((N1C && N1C->isOne()) || (VT.getScalarType() == MVT::i1))
3846     return IsDiv ? N0 : DAG.getConstant(0, DL, VT);
3847
3848   return SDValue();
3849 }
3850
3851 SDValue DAGCombiner::visitSDIV(SDNode *N) {
3852   SDValue N0 = N->getOperand(0);
3853   SDValue N1 = N->getOperand(1);
3854   EVT VT = N->getValueType(0);
3855   EVT CCVT = getSetCCResultType(VT);
3856
3857   // fold vector ops
3858   if (VT.isVector())
3859     if (SDValue FoldedVOp = SimplifyVBinOp(N))
3860       return FoldedVOp;
3861
3862   SDLoc DL(N);
3863
3864   // fold (sdiv c1, c2) -> c1/c2
3865   ConstantSDNode *N0C = isConstOrConstSplat(N0);
3866   ConstantSDNode *N1C = isConstOrConstSplat(N1);
3867   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
3868     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
3869   // fold (sdiv X, -1) -> 0-X
3870   if (N1C && N1C->isAllOnesValue())
3871     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), N0);
3872   // fold (sdiv X, MIN_SIGNED) -> select(X == MIN_SIGNED, 1, 0)
3873   if (N1C && N1C->getAPIntValue().isMinSignedValue())
3874     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
3875                          DAG.getConstant(1, DL, VT),
3876                          DAG.getConstant(0, DL, VT));
3877
3878   if (SDValue V = simplifyDivRem(N, DAG))
3879     return V;
3880
3881   if (SDValue NewSel = foldBinOpIntoSelect(N))
3882     return NewSel;
3883
3884   // If we know the sign bits of both operands are zero, strength reduce to a
3885   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
3886   if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
3887     return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
3888
3889   if (SDValue V = visitSDIVLike(N0, N1, N)) {
3890     // If the corresponding remainder node exists, update its users with
3891     // (Dividend - (Quotient * Divisor).
3892     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::SREM, N->getVTList(),
3893                                               { N0, N1 })) {
3894       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
3895       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
3896       AddToWorklist(Mul.getNode());
3897       AddToWorklist(Sub.getNode());
3898       CombineTo(RemNode, Sub);
3899     }
3900     return V;
3901   }
3902
3903   // sdiv, srem -> sdivrem
3904   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
3905   // true.  Otherwise, we break the simplification logic in visitREM().
3906   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3907   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
3908     if (SDValue DivRem = useDivRem(N))
3909         return DivRem;
3910
3911   return SDValue();
3912 }
3913
3914 SDValue DAGCombiner::visitSDIVLike(SDValue N0, SDValue N1, SDNode *N) {
3915   SDLoc DL(N);
3916   EVT VT = N->getValueType(0);
3917   EVT CCVT = getSetCCResultType(VT);
3918   unsigned BitWidth = VT.getScalarSizeInBits();
3919
3920   // Helper for determining whether a value is a power-2 constant scalar or a
3921   // vector of such elements.
3922   auto IsPowerOfTwo = [](ConstantSDNode *C) {
3923     if (C->isNullValue() || C->isOpaque())
3924       return false;
3925     if (C->getAPIntValue().isPowerOf2())
3926       return true;
3927     if ((-C->getAPIntValue()).isPowerOf2())
3928       return true;
3929     return false;
3930   };
3931
3932   // fold (sdiv X, pow2) -> simple ops after legalize
3933   // FIXME: We check for the exact bit here because the generic lowering gives
3934   // better results in that case. The target-specific lowering should learn how
3935   // to handle exact sdivs efficiently.
3936   if (!N->getFlags().hasExact() && ISD::matchUnaryPredicate(N1, IsPowerOfTwo)) {
3937     // Target-specific implementation of sdiv x, pow2.
3938     if (SDValue Res = BuildSDIVPow2(N))
3939       return Res;
3940
3941     // Create constants that are functions of the shift amount value.
3942     EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
3943     SDValue Bits = DAG.getConstant(BitWidth, DL, ShiftAmtTy);
3944     SDValue C1 = DAG.getNode(ISD::CTTZ, DL, VT, N1);
3945     C1 = DAG.getZExtOrTrunc(C1, DL, ShiftAmtTy);
3946     SDValue Inexact = DAG.getNode(ISD::SUB, DL, ShiftAmtTy, Bits, C1);
3947     if (!isConstantOrConstantVector(Inexact))
3948       return SDValue();
3949
3950     // Splat the sign bit into the register
3951     SDValue Sign = DAG.getNode(ISD::SRA, DL, VT, N0,
3952                                DAG.getConstant(BitWidth - 1, DL, ShiftAmtTy));
3953     AddToWorklist(Sign.getNode());
3954
3955     // Add (N0 < 0) ? abs2 - 1 : 0;
3956     SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, Sign, Inexact);
3957     AddToWorklist(Srl.getNode());
3958     SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Srl);
3959     AddToWorklist(Add.getNode());
3960     SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, Add, C1);
3961     AddToWorklist(Sra.getNode());
3962
3963     // Special case: (sdiv X, 1) -> X
3964     // Special Case: (sdiv X, -1) -> 0-X
3965     SDValue One = DAG.getConstant(1, DL, VT);
3966     SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
3967     SDValue IsOne = DAG.getSetCC(DL, CCVT, N1, One, ISD::SETEQ);
3968     SDValue IsAllOnes = DAG.getSetCC(DL, CCVT, N1, AllOnes, ISD::SETEQ);
3969     SDValue IsOneOrAllOnes = DAG.getNode(ISD::OR, DL, CCVT, IsOne, IsAllOnes);
3970     Sra = DAG.getSelect(DL, VT, IsOneOrAllOnes, N0, Sra);
3971
3972     // If dividing by a positive value, we're done. Otherwise, the result must
3973     // be negated.
3974     SDValue Zero = DAG.getConstant(0, DL, VT);
3975     SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, Zero, Sra);
3976
3977     // FIXME: Use SELECT_CC once we improve SELECT_CC constant-folding.
3978     SDValue IsNeg = DAG.getSetCC(DL, CCVT, N1, Zero, ISD::SETLT);
3979     SDValue Res = DAG.getSelect(DL, VT, IsNeg, Sub, Sra);
3980     return Res;
3981   }
3982
3983   // If integer divide is expensive and we satisfy the requirements, emit an
3984   // alternate sequence.  Targets may check function attributes for size/speed
3985   // trade-offs.
3986   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
3987   if (isConstantOrConstantVector(N1) &&
3988       !TLI.isIntDivCheap(N->getValueType(0), Attr))
3989     if (SDValue Op = BuildSDIV(N))
3990       return Op;
3991
3992   return SDValue();
3993 }
3994
3995 SDValue DAGCombiner::visitUDIV(SDNode *N) {
3996   SDValue N0 = N->getOperand(0);
3997   SDValue N1 = N->getOperand(1);
3998   EVT VT = N->getValueType(0);
3999   EVT CCVT = getSetCCResultType(VT);
4000
4001   // fold vector ops
4002   if (VT.isVector())
4003     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4004       return FoldedVOp;
4005
4006   SDLoc DL(N);
4007
4008   // fold (udiv c1, c2) -> c1/c2
4009   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4010   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4011   if (N0C && N1C)
4012     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
4013                                                     N0C, N1C))
4014       return Folded;
4015   // fold (udiv X, -1) -> select(X == -1, 1, 0)
4016   if (N1C && N1C->getAPIntValue().isAllOnesValue())
4017     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4018                          DAG.getConstant(1, DL, VT),
4019                          DAG.getConstant(0, DL, VT));
4020
4021   if (SDValue V = simplifyDivRem(N, DAG))
4022     return V;
4023
4024   if (SDValue NewSel = foldBinOpIntoSelect(N))
4025     return NewSel;
4026
4027   if (SDValue V = visitUDIVLike(N0, N1, N)) {
4028     // If the corresponding remainder node exists, update its users with
4029     // (Dividend - (Quotient * Divisor).
4030     if (SDNode *RemNode = DAG.getNodeIfExists(ISD::UREM, N->getVTList(),
4031                                               { N0, N1 })) {
4032       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, V, N1);
4033       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4034       AddToWorklist(Mul.getNode());
4035       AddToWorklist(Sub.getNode());
4036       CombineTo(RemNode, Sub);
4037     }
4038     return V;
4039   }
4040
4041   // sdiv, srem -> sdivrem
4042   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
4043   // true.  Otherwise, we break the simplification logic in visitREM().
4044   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4045   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
4046     if (SDValue DivRem = useDivRem(N))
4047         return DivRem;
4048
4049   return SDValue();
4050 }
4051
4052 SDValue DAGCombiner::visitUDIVLike(SDValue N0, SDValue N1, SDNode *N) {
4053   SDLoc DL(N);
4054   EVT VT = N->getValueType(0);
4055
4056   // fold (udiv x, (1 << c)) -> x >>u c
4057   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4058       DAG.isKnownToBeAPowerOfTwo(N1)) {
4059     SDValue LogBase2 = BuildLogBase2(N1, DL);
4060     AddToWorklist(LogBase2.getNode());
4061
4062     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4063     SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
4064     AddToWorklist(Trunc.getNode());
4065     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4066   }
4067
4068   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
4069   if (N1.getOpcode() == ISD::SHL) {
4070     SDValue N10 = N1.getOperand(0);
4071     if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
4072         DAG.isKnownToBeAPowerOfTwo(N10)) {
4073       SDValue LogBase2 = BuildLogBase2(N10, DL);
4074       AddToWorklist(LogBase2.getNode());
4075
4076       EVT ADDVT = N1.getOperand(1).getValueType();
4077       SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
4078       AddToWorklist(Trunc.getNode());
4079       SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
4080       AddToWorklist(Add.getNode());
4081       return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
4082     }
4083   }
4084
4085   // fold (udiv x, c) -> alternate
4086   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4087   if (isConstantOrConstantVector(N1) &&
4088       !TLI.isIntDivCheap(N->getValueType(0), Attr))
4089     if (SDValue Op = BuildUDIV(N))
4090       return Op;
4091
4092   return SDValue();
4093 }
4094
4095 // handles ISD::SREM and ISD::UREM
4096 SDValue DAGCombiner::visitREM(SDNode *N) {
4097   unsigned Opcode = N->getOpcode();
4098   SDValue N0 = N->getOperand(0);
4099   SDValue N1 = N->getOperand(1);
4100   EVT VT = N->getValueType(0);
4101   EVT CCVT = getSetCCResultType(VT);
4102
4103   bool isSigned = (Opcode == ISD::SREM);
4104   SDLoc DL(N);
4105
4106   // fold (rem c1, c2) -> c1%c2
4107   ConstantSDNode *N0C = isConstOrConstSplat(N0);
4108   ConstantSDNode *N1C = isConstOrConstSplat(N1);
4109   if (N0C && N1C)
4110     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
4111       return Folded;
4112   // fold (urem X, -1) -> select(X == -1, 0, x)
4113   if (!isSigned && N1C && N1C->getAPIntValue().isAllOnesValue())
4114     return DAG.getSelect(DL, VT, DAG.getSetCC(DL, CCVT, N0, N1, ISD::SETEQ),
4115                          DAG.getConstant(0, DL, VT), N0);
4116
4117   if (SDValue V = simplifyDivRem(N, DAG))
4118     return V;
4119
4120   if (SDValue NewSel = foldBinOpIntoSelect(N))
4121     return NewSel;
4122
4123   if (isSigned) {
4124     // If we know the sign bits of both operands are zero, strength reduce to a
4125     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
4126     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
4127       return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
4128   } else {
4129     SDValue NegOne = DAG.getAllOnesConstant(DL, VT);
4130     if (DAG.isKnownToBeAPowerOfTwo(N1)) {
4131       // fold (urem x, pow2) -> (and x, pow2-1)
4132       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4133       AddToWorklist(Add.getNode());
4134       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4135     }
4136     if (N1.getOpcode() == ISD::SHL &&
4137         DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
4138       // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
4139       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N1, NegOne);
4140       AddToWorklist(Add.getNode());
4141       return DAG.getNode(ISD::AND, DL, VT, N0, Add);
4142     }
4143   }
4144
4145   AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
4146
4147   // If X/C can be simplified by the division-by-constant logic, lower
4148   // X%C to the equivalent of X-X/C*C.
4149   // Reuse the SDIVLike/UDIVLike combines - to avoid mangling nodes, the
4150   // speculative DIV must not cause a DIVREM conversion.  We guard against this
4151   // by skipping the simplification if isIntDivCheap().  When div is not cheap,
4152   // combine will not return a DIVREM.  Regardless, checking cheapness here
4153   // makes sense since the simplification results in fatter code.
4154   if (DAG.isKnownNeverZero(N1) && !TLI.isIntDivCheap(VT, Attr)) {
4155     SDValue OptimizedDiv =
4156         isSigned ? visitSDIVLike(N0, N1, N) : visitUDIVLike(N0, N1, N);
4157     if (OptimizedDiv.getNode()) {
4158       // If the equivalent Div node also exists, update its users.
4159       unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
4160       if (SDNode *DivNode = DAG.getNodeIfExists(DivOpcode, N->getVTList(),
4161                                                 { N0, N1 }))
4162         CombineTo(DivNode, OptimizedDiv);
4163       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
4164       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
4165       AddToWorklist(OptimizedDiv.getNode());
4166       AddToWorklist(Mul.getNode());
4167       return Sub;
4168     }
4169   }
4170
4171   // sdiv, srem -> sdivrem
4172   if (SDValue DivRem = useDivRem(N))
4173     return DivRem.getValue(1);
4174
4175   return SDValue();
4176 }
4177
4178 SDValue DAGCombiner::visitMULHS(SDNode *N) {
4179   SDValue N0 = N->getOperand(0);
4180   SDValue N1 = N->getOperand(1);
4181   EVT VT = N->getValueType(0);
4182   SDLoc DL(N);
4183
4184   if (VT.isVector()) {
4185     // fold (mulhs x, 0) -> 0
4186     // do not return N0/N1, because undef node may exist.
4187     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4188         ISD::isBuildVectorAllZeros(N1.getNode()))
4189       return DAG.getConstant(0, DL, VT);
4190   }
4191
4192   // fold (mulhs x, 0) -> 0
4193   if (isNullConstant(N1))
4194     return N1;
4195   // fold (mulhs x, 1) -> (sra x, size(x)-1)
4196   if (isOneConstant(N1))
4197     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
4198                        DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
4199                                        getShiftAmountTy(N0.getValueType())));
4200
4201   // fold (mulhs x, undef) -> 0
4202   if (N0.isUndef() || N1.isUndef())
4203     return DAG.getConstant(0, DL, VT);
4204
4205   // If the type twice as wide is legal, transform the mulhs to a wider multiply
4206   // plus a shift.
4207   if (VT.isSimple() && !VT.isVector()) {
4208     MVT Simple = VT.getSimpleVT();
4209     unsigned SimpleSize = Simple.getSizeInBits();
4210     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4211     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4212       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
4213       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
4214       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4215       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4216             DAG.getConstant(SimpleSize, DL,
4217                             getShiftAmountTy(N1.getValueType())));
4218       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4219     }
4220   }
4221
4222   return SDValue();
4223 }
4224
4225 SDValue DAGCombiner::visitMULHU(SDNode *N) {
4226   SDValue N0 = N->getOperand(0);
4227   SDValue N1 = N->getOperand(1);
4228   EVT VT = N->getValueType(0);
4229   SDLoc DL(N);
4230
4231   if (VT.isVector()) {
4232     // fold (mulhu x, 0) -> 0
4233     // do not return N0/N1, because undef node may exist.
4234     if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
4235         ISD::isBuildVectorAllZeros(N1.getNode()))
4236       return DAG.getConstant(0, DL, VT);
4237   }
4238
4239   // fold (mulhu x, 0) -> 0
4240   if (isNullConstant(N1))
4241     return N1;
4242   // fold (mulhu x, 1) -> 0
4243   if (isOneConstant(N1))
4244     return DAG.getConstant(0, DL, N0.getValueType());
4245   // fold (mulhu x, undef) -> 0
4246   if (N0.isUndef() || N1.isUndef())
4247     return DAG.getConstant(0, DL, VT);
4248
4249   // fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
4250   if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
4251       DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
4252     unsigned NumEltBits = VT.getScalarSizeInBits();
4253     SDValue LogBase2 = BuildLogBase2(N1, DL);
4254     SDValue SRLAmt = DAG.getNode(
4255         ISD::SUB, DL, VT, DAG.getConstant(NumEltBits, DL, VT), LogBase2);
4256     EVT ShiftVT = getShiftAmountTy(N0.getValueType());
4257     SDValue Trunc = DAG.getZExtOrTrunc(SRLAmt, DL, ShiftVT);
4258     return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
4259   }
4260
4261   // If the type twice as wide is legal, transform the mulhu to a wider multiply
4262   // plus a shift.
4263   if (VT.isSimple() && !VT.isVector()) {
4264     MVT Simple = VT.getSimpleVT();
4265     unsigned SimpleSize = Simple.getSizeInBits();
4266     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4267     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4268       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
4269       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
4270       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
4271       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
4272             DAG.getConstant(SimpleSize, DL,
4273                             getShiftAmountTy(N1.getValueType())));
4274       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
4275     }
4276   }
4277
4278   return SDValue();
4279 }
4280
4281 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
4282 /// give the opcodes for the two computations that are being performed. Return
4283 /// true if a simplification was made.
4284 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
4285                                                 unsigned HiOp) {
4286   // If the high half is not needed, just compute the low half.
4287   bool HiExists = N->hasAnyUseOfValue(1);
4288   if (!HiExists && (!LegalOperations ||
4289                     TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
4290     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4291     return CombineTo(N, Res, Res);
4292   }
4293
4294   // If the low half is not needed, just compute the high half.
4295   bool LoExists = N->hasAnyUseOfValue(0);
4296   if (!LoExists && (!LegalOperations ||
4297                     TLI.isOperationLegalOrCustom(HiOp, N->getValueType(1)))) {
4298     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4299     return CombineTo(N, Res, Res);
4300   }
4301
4302   // If both halves are used, return as it is.
4303   if (LoExists && HiExists)
4304     return SDValue();
4305
4306   // If the two computed results can be simplified separately, separate them.
4307   if (LoExists) {
4308     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
4309     AddToWorklist(Lo.getNode());
4310     SDValue LoOpt = combine(Lo.getNode());
4311     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
4312         (!LegalOperations ||
4313          TLI.isOperationLegalOrCustom(LoOpt.getOpcode(), LoOpt.getValueType())))
4314       return CombineTo(N, LoOpt, LoOpt);
4315   }
4316
4317   if (HiExists) {
4318     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
4319     AddToWorklist(Hi.getNode());
4320     SDValue HiOpt = combine(Hi.getNode());
4321     if (HiOpt.getNode() && HiOpt != Hi &&
4322         (!LegalOperations ||
4323          TLI.isOperationLegalOrCustom(HiOpt.getOpcode(), HiOpt.getValueType())))
4324       return CombineTo(N, HiOpt, HiOpt);
4325   }
4326
4327   return SDValue();
4328 }
4329
4330 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
4331   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
4332     return Res;
4333
4334   EVT VT = N->getValueType(0);
4335   SDLoc DL(N);
4336
4337   // If the type is twice as wide is legal, transform the mulhu to a wider
4338   // multiply plus a shift.
4339   if (VT.isSimple() && !VT.isVector()) {
4340     MVT Simple = VT.getSimpleVT();
4341     unsigned SimpleSize = Simple.getSizeInBits();
4342     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4343     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4344       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
4345       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
4346       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4347       // Compute the high part as N1.
4348       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4349             DAG.getConstant(SimpleSize, DL,
4350                             getShiftAmountTy(Lo.getValueType())));
4351       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4352       // Compute the low part as N0.
4353       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4354       return CombineTo(N, Lo, Hi);
4355     }
4356   }
4357
4358   return SDValue();
4359 }
4360
4361 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
4362   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
4363     return Res;
4364
4365   EVT VT = N->getValueType(0);
4366   SDLoc DL(N);
4367
4368   // (umul_lohi N0, 0) -> (0, 0)
4369   if (isNullConstant(N->getOperand(1))) {
4370     SDValue Zero = DAG.getConstant(0, DL, VT);
4371     return CombineTo(N, Zero, Zero);
4372   }
4373
4374   // (umul_lohi N0, 1) -> (N0, 0)
4375   if (isOneConstant(N->getOperand(1))) {
4376     SDValue Zero = DAG.getConstant(0, DL, VT);
4377     return CombineTo(N, N->getOperand(0), Zero);
4378   }
4379
4380   // If the type is twice as wide is legal, transform the mulhu to a wider
4381   // multiply plus a shift.
4382   if (VT.isSimple() && !VT.isVector()) {
4383     MVT Simple = VT.getSimpleVT();
4384     unsigned SimpleSize = Simple.getSizeInBits();
4385     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
4386     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
4387       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
4388       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
4389       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
4390       // Compute the high part as N1.
4391       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
4392             DAG.getConstant(SimpleSize, DL,
4393                             getShiftAmountTy(Lo.getValueType())));
4394       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
4395       // Compute the low part as N0.
4396       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
4397       return CombineTo(N, Lo, Hi);
4398     }
4399   }
4400
4401   return SDValue();
4402 }
4403
4404 SDValue DAGCombiner::visitMULO(SDNode *N) {
4405   bool IsSigned = (ISD::SMULO == N->getOpcode());
4406
4407   // (mulo x, 2) -> (addo x, x)
4408   if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
4409     if (C2->getAPIntValue() == 2)
4410       return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
4411                          N->getVTList(), N->getOperand(0), N->getOperand(0));
4412
4413   return SDValue();
4414 }
4415
4416 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
4417   SDValue N0 = N->getOperand(0);
4418   SDValue N1 = N->getOperand(1);
4419   EVT VT = N0.getValueType();
4420
4421   // fold vector ops
4422   if (VT.isVector())
4423     if (SDValue FoldedVOp = SimplifyVBinOp(N))
4424       return FoldedVOp;
4425
4426   // fold operation with constant operands.
4427   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
4428   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
4429   if (N0C && N1C)
4430     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
4431
4432   // canonicalize constant to RHS
4433   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
4434      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
4435     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
4436
4437   // Is sign bits are zero, flip between UMIN/UMAX and SMIN/SMAX.
4438   // Only do this if the current op isn't legal and the flipped is.
4439   unsigned Opcode = N->getOpcode();
4440   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4441   if (!TLI.isOperationLegal(Opcode, VT) &&
4442       (N0.isUndef() || DAG.SignBitIsZero(N0)) &&
4443       (N1.isUndef() || DAG.SignBitIsZero(N1))) {
4444     unsigned AltOpcode;
4445     switch (Opcode) {
4446     case ISD::SMIN: AltOpcode = ISD::UMIN; break;
4447     case ISD::SMAX: AltOpcode = ISD::UMAX; break;
4448     case ISD::UMIN: AltOpcode = ISD::SMIN; break;
4449     case ISD::UMAX: AltOpcode = ISD::SMAX; break;
4450     default: llvm_unreachable("Unknown MINMAX opcode");
4451     }
4452     if (TLI.isOperationLegal(AltOpcode, VT))
4453       return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1);
4454   }
4455
4456   return SDValue();
4457 }
4458
4459 /// If this is a bitwise logic instruction and both operands have the same
4460 /// opcode, try to sink the other opcode after the logic instruction.
4461 SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
4462   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
4463   EVT VT = N0.getValueType();
4464   unsigned LogicOpcode = N->getOpcode();
4465   unsigned HandOpcode = N0.getOpcode();
4466   assert((LogicOpcode == ISD::AND || LogicOpcode == ISD::OR ||
4467           LogicOpcode == ISD::XOR) && "Expected logic opcode");
4468   assert(HandOpcode == N1.getOpcode() && "Bad input!");
4469
4470   // Bail early if none of these transforms apply.
4471   if (N0.getNumOperands() == 0)
4472     return SDValue();
4473
4474   // FIXME: We should check number of uses of the operands to not increase
4475   //        the instruction count for all transforms.
4476
4477   // Handle size-changing casts.
4478   SDValue X = N0.getOperand(0);
4479   SDValue Y = N1.getOperand(0);
4480   EVT XVT = X.getValueType();
4481   SDLoc DL(N);
4482   if (HandOpcode == ISD::ANY_EXTEND || HandOpcode == ISD::ZERO_EXTEND ||
4483       HandOpcode == ISD::SIGN_EXTEND) {
4484     // If both operands have other uses, this transform would create extra
4485     // instructions without eliminating anything.
4486     if (!N0.hasOneUse() && !N1.hasOneUse())
4487       return SDValue();
4488     // We need matching integer source types.
4489     if (XVT != Y.getValueType())
4490       return SDValue();
4491     // Don't create an illegal op during or after legalization. Don't ever
4492     // create an unsupported vector op.
4493     if ((VT.isVector() || LegalOperations) &&
4494         !TLI.isOperationLegalOrCustom(LogicOpcode, XVT))
4495       return SDValue();
4496     // Avoid infinite looping with PromoteIntBinOp.
4497     // TODO: Should we apply desirable/legal constraints to all opcodes?
4498     if (HandOpcode == ISD::ANY_EXTEND && LegalTypes &&
4499         !TLI.isTypeDesirableForOp(LogicOpcode, XVT))
4500       return SDValue();
4501     // logic_op (hand_op X), (hand_op Y) --> hand_op (logic_op X, Y)
4502     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4503     return DAG.getNode(HandOpcode, DL, VT, Logic);
4504   }
4505
4506   // logic_op (truncate x), (truncate y) --> truncate (logic_op x, y)
4507   if (HandOpcode == ISD::TRUNCATE) {
4508     // If both operands have other uses, this transform would create extra
4509     // instructions without eliminating anything.
4510     if (!N0.hasOneUse() && !N1.hasOneUse())
4511       return SDValue();
4512     // We need matching source types.
4513     if (XVT != Y.getValueType())
4514       return SDValue();
4515     // Don't create an illegal op during or after legalization.
4516     if (LegalOperations && !TLI.isOperationLegal(LogicOpcode, XVT))
4517       return SDValue();
4518     // Be extra careful sinking truncate. If it's free, there's no benefit in
4519     // widening a binop. Also, don't create a logic op on an illegal type.
4520     if (TLI.isZExtFree(VT, XVT) && TLI.isTruncateFree(XVT, VT))
4521       return SDValue();
4522     if (!TLI.isTypeLegal(XVT))
4523       return SDValue();
4524     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4525     return DAG.getNode(HandOpcode, DL, VT, Logic);
4526   }
4527
4528   // For binops SHL/SRL/SRA/AND:
4529   //   logic_op (OP x, z), (OP y, z) --> OP (logic_op x, y), z
4530   if ((HandOpcode == ISD::SHL || HandOpcode == ISD::SRL ||
4531        HandOpcode == ISD::SRA || HandOpcode == ISD::AND) &&
4532       N0.getOperand(1) == N1.getOperand(1)) {
4533     // If either operand has other uses, this transform is not an improvement.
4534     if (!N0.hasOneUse() || !N1.hasOneUse())
4535       return SDValue();
4536     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4537     return DAG.getNode(HandOpcode, DL, VT, Logic, N0.getOperand(1));
4538   }
4539
4540   // Unary ops: logic_op (bswap x), (bswap y) --> bswap (logic_op x, y)
4541   if (HandOpcode == ISD::BSWAP) {
4542     // If either operand has other uses, this transform is not an improvement.
4543     if (!N0.hasOneUse() || !N1.hasOneUse())
4544       return SDValue();
4545     SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4546     return DAG.getNode(HandOpcode, DL, VT, Logic);
4547   }
4548
4549   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
4550   // Only perform this optimization up until type legalization, before
4551   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
4552   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
4553   // we don't want to undo this promotion.
4554   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
4555   // on scalars.
4556   if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
4557        Level <= AfterLegalizeTypes) {
4558     // Input types must be integer and the same.
4559     if (XVT.isInteger() && XVT == Y.getValueType()) {
4560       SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
4561       return DAG.getNode(HandOpcode, DL, VT, Logic);
4562     }
4563   }
4564
4565   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
4566   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
4567   // If both shuffles use the same mask, and both shuffle within a single
4568   // vector, then it is worthwhile to move the swizzle after the operation.
4569   // The type-legalizer generates this pattern when loading illegal
4570   // vector types from memory. In many cases this allows additional shuffle
4571   // optimizations.
4572   // There are other cases where moving the shuffle after the xor/and/or
4573   // is profitable even if shuffles don't perform a swizzle.
4574   // If both shuffles use the same mask, and both shuffles have the same first
4575   // or second operand, then it might still be profitable to move the shuffle
4576   // after the xor/and/or operation.
4577   if (HandOpcode == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
4578     auto *SVN0 = cast<ShuffleVectorSDNode>(N0);
4579     auto *SVN1 = cast<ShuffleVectorSDNode>(N1);
4580     assert(X.getValueType() == Y.getValueType() &&
4581            "Inputs to shuffles are not the same type");
4582
4583     // Check that both shuffles use the same mask. The masks are known to be of
4584     // the same length because the result vector type is the same.
4585     // Check also that shuffles have only one use to avoid introducing extra
4586     // instructions.
4587     if (!SVN0->hasOneUse() || !SVN1->hasOneUse() ||
4588         !SVN0->getMask().equals(SVN1->getMask()))
4589       return SDValue();
4590
4591     // Don't try to fold this node if it requires introducing a
4592     // build vector of all zeros that might be illegal at this stage.
4593     SDValue ShOp = N0.getOperand(1);
4594     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4595       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4596
4597     // (logic_op (shuf (A, C), shuf (B, C))) --> shuf (logic_op (A, B), C)
4598     if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
4599       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT,
4600                                   N0.getOperand(0), N1.getOperand(0));
4601       return DAG.getVectorShuffle(VT, DL, Logic, ShOp, SVN0->getMask());
4602     }
4603
4604     // Don't try to fold this node if it requires introducing a
4605     // build vector of all zeros that might be illegal at this stage.
4606     ShOp = N0.getOperand(0);
4607     if (LogicOpcode == ISD::XOR && !ShOp.isUndef())
4608       ShOp = tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
4609
4610     // (logic_op (shuf (C, A), shuf (C, B))) --> shuf (C, logic_op (A, B))
4611     if (N0.getOperand(0) == N1.getOperand(0) && ShOp.getNode()) {
4612       SDValue Logic = DAG.getNode(LogicOpcode, DL, VT, N0.getOperand(1),
4613                                   N1.getOperand(1));
4614       return DAG.getVectorShuffle(VT, DL, ShOp, Logic, SVN0->getMask());
4615     }
4616   }
4617
4618   return SDValue();
4619 }
4620
4621 /// Try to make (and/or setcc (LL, LR), setcc (RL, RR)) more efficient.
4622 SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
4623                                        const SDLoc &DL) {
4624   SDValue LL, LR, RL, RR, N0CC, N1CC;
4625   if (!isSetCCEquivalent(N0, LL, LR, N0CC) ||
4626       !isSetCCEquivalent(N1, RL, RR, N1CC))
4627     return SDValue();
4628
4629   assert(N0.getValueType() == N1.getValueType() &&
4630          "Unexpected operand types for bitwise logic op");
4631   assert(LL.getValueType() == LR.getValueType() &&
4632          RL.getValueType() == RR.getValueType() &&
4633          "Unexpected operand types for setcc");
4634
4635   // If we're here post-legalization or the logic op type is not i1, the logic
4636   // op type must match a setcc result type. Also, all folds require new
4637   // operations on the left and right operands, so those types must match.
4638   EVT VT = N0.getValueType();
4639   EVT OpVT = LL.getValueType();
4640   if (LegalOperations || VT.getScalarType() != MVT::i1)
4641     if (VT != getSetCCResultType(OpVT))
4642       return SDValue();
4643   if (OpVT != RL.getValueType())
4644     return SDValue();
4645
4646   ISD::CondCode CC0 = cast<CondCodeSDNode>(N0CC)->get();
4647   ISD::CondCode CC1 = cast<CondCodeSDNode>(N1CC)->get();
4648   bool IsInteger = OpVT.isInteger();
4649   if (LR == RR && CC0 == CC1 && IsInteger) {
4650     bool IsZero = isNullOrNullSplat(LR);
4651     bool IsNeg1 = isAllOnesOrAllOnesSplat(LR);
4652
4653     // All bits clear?
4654     bool AndEqZero = IsAnd && CC1 == ISD::SETEQ && IsZero;
4655     // All sign bits clear?
4656     bool AndGtNeg1 = IsAnd && CC1 == ISD::SETGT && IsNeg1;
4657     // Any bits set?
4658     bool OrNeZero = !IsAnd && CC1 == ISD::SETNE && IsZero;
4659     // Any sign bits set?
4660     bool OrLtZero = !IsAnd && CC1 == ISD::SETLT && IsZero;
4661
4662     // (and (seteq X,  0), (seteq Y,  0)) --> (seteq (or X, Y),  0)
4663     // (and (setgt X, -1), (setgt Y, -1)) --> (setgt (or X, Y), -1)
4664     // (or  (setne X,  0), (setne Y,  0)) --> (setne (or X, Y),  0)
4665     // (or  (setlt X,  0), (setlt Y,  0)) --> (setlt (or X, Y),  0)
4666     if (AndEqZero || AndGtNeg1 || OrNeZero || OrLtZero) {
4667       SDValue Or = DAG.getNode(ISD::OR, SDLoc(N0), OpVT, LL, RL);
4668       AddToWorklist(Or.getNode());
4669       return DAG.getSetCC(DL, VT, Or, LR, CC1);
4670     }
4671
4672     // All bits set?
4673     bool AndEqNeg1 = IsAnd && CC1 == ISD::SETEQ && IsNeg1;
4674     // All sign bits set?
4675     bool AndLtZero = IsAnd && CC1 == ISD::SETLT && IsZero;
4676     // Any bits clear?
4677     bool OrNeNeg1 = !IsAnd && CC1 == ISD::SETNE && IsNeg1;
4678     // Any sign bits clear?
4679     bool OrGtNeg1 = !IsAnd && CC1 == ISD::SETGT && IsNeg1;
4680
4681     // (and (seteq X, -1), (seteq Y, -1)) --> (seteq (and X, Y), -1)
4682     // (and (setlt X,  0), (setlt Y,  0)) --> (setlt (and X, Y),  0)
4683     // (or  (setne X, -1), (setne Y, -1)) --> (setne (and X, Y), -1)
4684     // (or  (setgt X, -1), (setgt Y  -1)) --> (setgt (and X, Y), -1)
4685     if (AndEqNeg1 || AndLtZero || OrNeNeg1 || OrGtNeg1) {
4686       SDValue And = DAG.getNode(ISD::AND, SDLoc(N0), OpVT, LL, RL);
4687       AddToWorklist(And.getNode());
4688       return DAG.getSetCC(DL, VT, And, LR, CC1);
4689     }
4690   }
4691
4692   // TODO: What is the 'or' equivalent of this fold?
4693   // (and (setne X, 0), (setne X, -1)) --> (setuge (add X, 1), 2)
4694   if (IsAnd && LL == RL && CC0 == CC1 && OpVT.getScalarSizeInBits() > 1 &&
4695       IsInteger && CC0 == ISD::SETNE &&
4696       ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
4697        (isAllOnesConstant(LR) && isNullConstant(RR)))) {
4698     SDValue One = DAG.getConstant(1, DL, OpVT);
4699     SDValue Two = DAG.getConstant(2, DL, OpVT);
4700     SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), OpVT, LL, One);
4701     AddToWorklist(Add.getNode());
4702     return DAG.getSetCC(DL, VT, Add, Two, ISD::SETUGE);
4703   }
4704
4705   // Try more general transforms if the predicates match and the only user of
4706   // the compares is the 'and' or 'or'.
4707   if (IsInteger && TLI.convertSetCCLogicToBitwiseLogic(OpVT) && CC0 == CC1 &&
4708       N0.hasOneUse() && N1.hasOneUse()) {
4709     // and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
4710     // or  (setne A, B), (setne C, D) --> setne (or (xor A, B), (xor C, D)), 0
4711     if ((IsAnd && CC1 == ISD::SETEQ) || (!IsAnd && CC1 == ISD::SETNE)) {
4712       SDValue XorL = DAG.getNode(ISD::XOR, SDLoc(N0), OpVT, LL, LR);
4713       SDValue XorR = DAG.getNode(ISD::XOR, SDLoc(N1), OpVT, RL, RR);
4714       SDValue Or = DAG.getNode(ISD::OR, DL, OpVT, XorL, XorR);
4715       SDValue Zero = DAG.getConstant(0, DL, OpVT);
4716       return DAG.getSetCC(DL, VT, Or, Zero, CC1);
4717     }
4718
4719     // Turn compare of constants whose difference is 1 bit into add+and+setcc.
4720     // TODO - support non-uniform vector amounts.
4721     if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
4722       // Match a shared variable operand and 2 non-opaque constant operands.
4723       ConstantSDNode *C0 = isConstOrConstSplat(LR);
4724       ConstantSDNode *C1 = isConstOrConstSplat(RR);
4725       if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
4726         // Canonicalize larger constant as C0.
4727         if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
4728           std::swap(C0, C1);
4729
4730         // The difference of the constants must be a single bit.
4731         const APInt &C0Val = C0->getAPIntValue();
4732         const APInt &C1Val = C1->getAPIntValue();
4733         if ((C0Val - C1Val).isPowerOf2()) {
4734           // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
4735           // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
4736           SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
4737           SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
4738           SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
4739           SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
4740           SDValue Zero = DAG.getConstant(0, DL, OpVT);
4741           return DAG.getSetCC(DL, VT, And, Zero, CC0);
4742         }
4743       }
4744     }
4745   }
4746
4747   // Canonicalize equivalent operands to LL == RL.
4748   if (LL == RR && LR == RL) {
4749     CC1 = ISD::getSetCCSwappedOperands(CC1);
4750     std::swap(RL, RR);
4751   }
4752
4753   // (and (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4754   // (or  (setcc X, Y, CC0), (setcc X, Y, CC1)) --> (setcc X, Y, NewCC)
4755   if (LL == RL && LR == RR) {
4756     ISD::CondCode NewCC = IsAnd ? ISD::getSetCCAndOperation(CC0, CC1, IsInteger)
4757                                 : ISD::getSetCCOrOperation(CC0, CC1, IsInteger);
4758     if (NewCC != ISD::SETCC_INVALID &&
4759         (!LegalOperations ||
4760          (TLI.isCondCodeLegal(NewCC, LL.getSimpleValueType()) &&
4761           TLI.isOperationLegal(ISD::SETCC, OpVT))))
4762       return DAG.getSetCC(DL, VT, LL, LR, NewCC);
4763   }
4764
4765   return SDValue();
4766 }
4767
4768 /// This contains all DAGCombine rules which reduce two values combined by
4769 /// an And operation to a single value. This makes them reusable in the context
4770 /// of visitSELECT(). Rules involving constants are not included as
4771 /// visitSELECT() already handles those cases.
4772 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, SDNode *N) {
4773   EVT VT = N1.getValueType();
4774   SDLoc DL(N);
4775
4776   // fold (and x, undef) -> 0
4777   if (N0.isUndef() || N1.isUndef())
4778     return DAG.getConstant(0, DL, VT);
4779
4780   if (SDValue V = foldLogicOfSetCCs(true, N0, N1, DL))
4781     return V;
4782
4783   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
4784       VT.getSizeInBits() <= 64) {
4785     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4786       if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
4787         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
4788         // immediate for an add, but it is legal if its top c2 bits are set,
4789         // transform the ADD so the immediate doesn't need to be materialized
4790         // in a register.
4791         APInt ADDC = ADDI->getAPIntValue();
4792         APInt SRLC = SRLI->getAPIntValue();
4793         if (ADDC.getMinSignedBits() <= 64 &&
4794             SRLC.ult(VT.getSizeInBits()) &&
4795             !TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4796           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
4797                                              SRLC.getZExtValue());
4798           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
4799             ADDC |= Mask;
4800             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
4801               SDLoc DL0(N0);
4802               SDValue NewAdd =
4803                 DAG.getNode(ISD::ADD, DL0, VT,
4804                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
4805               CombineTo(N0.getNode(), NewAdd);
4806               // Return N so it doesn't get rechecked!
4807               return SDValue(N, 0);
4808             }
4809           }
4810         }
4811       }
4812     }
4813   }
4814
4815   // Reduce bit extract of low half of an integer to the narrower type.
4816   // (and (srl i64:x, K), KMask) ->
4817   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
4818   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
4819     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
4820       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
4821         unsigned Size = VT.getSizeInBits();
4822         const APInt &AndMask = CAnd->getAPIntValue();
4823         unsigned ShiftBits = CShift->getZExtValue();
4824
4825         // Bail out, this node will probably disappear anyway.
4826         if (ShiftBits == 0)
4827           return SDValue();
4828
4829         unsigned MaskBits = AndMask.countTrailingOnes();
4830         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
4831
4832         if (AndMask.isMask() &&
4833             // Required bits must not span the two halves of the integer and
4834             // must fit in the half size type.
4835             (ShiftBits + MaskBits <= Size / 2) &&
4836             TLI.isNarrowingProfitable(VT, HalfVT) &&
4837             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
4838             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
4839             TLI.isTruncateFree(VT, HalfVT) &&
4840             TLI.isZExtFree(HalfVT, VT)) {
4841           // The isNarrowingProfitable is to avoid regressions on PPC and
4842           // AArch64 which match a few 64-bit bit insert / bit extract patterns
4843           // on downstream users of this. Those patterns could probably be
4844           // extended to handle extensions mixed in.
4845
4846           SDValue SL(N0);
4847           assert(MaskBits <= Size);
4848
4849           // Extracting the highest bit of the low half.
4850           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
4851           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
4852                                       N0.getOperand(0));
4853
4854           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
4855           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
4856           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
4857           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
4858           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
4859         }
4860       }
4861     }
4862   }
4863
4864   return SDValue();
4865 }
4866
4867 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
4868                                    EVT LoadResultTy, EVT &ExtVT) {
4869   if (!AndC->getAPIntValue().isMask())
4870     return false;
4871
4872   unsigned ActiveBits = AndC->getAPIntValue().countTrailingOnes();
4873
4874   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
4875   EVT LoadedVT = LoadN->getMemoryVT();
4876
4877   if (ExtVT == LoadedVT &&
4878       (!LegalOperations ||
4879        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
4880     // ZEXTLOAD will match without needing to change the size of the value being
4881     // loaded.
4882     return true;
4883   }
4884
4885   // Do not change the width of a volatile load.
4886   if (LoadN->isVolatile())
4887     return false;
4888
4889   // Do not generate loads of non-round integer types since these can
4890   // be expensive (and would be wrong if the type is not byte sized).
4891   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
4892     return false;
4893
4894   if (LegalOperations &&
4895       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
4896     return false;
4897
4898   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
4899     return false;
4900
4901   return true;
4902 }
4903
4904 bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
4905                                     ISD::LoadExtType ExtType, EVT &MemVT,
4906                                     unsigned ShAmt) {
4907   if (!LDST)
4908     return false;
4909   // Only allow byte offsets.
4910   if (ShAmt % 8)
4911     return false;
4912
4913   // Do not generate loads of non-round integer types since these can
4914   // be expensive (and would be wrong if the type is not byte sized).
4915   if (!MemVT.isRound())
4916     return false;
4917
4918   // Don't change the width of a volatile load.
4919   if (LDST->isVolatile())
4920     return false;
4921
4922   // Verify that we are actually reducing a load width here.
4923   if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
4924     return false;
4925
4926   // Ensure that this isn't going to produce an unsupported unaligned access.
4927   if (ShAmt &&
4928       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
4929                               LDST->getAddressSpace(), ShAmt / 8,
4930                               LDST->getMemOperand()->getFlags()))
4931     return false;
4932
4933   // It's not possible to generate a constant of extended or untyped type.
4934   EVT PtrType = LDST->getBasePtr().getValueType();
4935   if (PtrType == MVT::Untyped || PtrType.isExtended())
4936     return false;
4937
4938   if (isa<LoadSDNode>(LDST)) {
4939     LoadSDNode *Load = cast<LoadSDNode>(LDST);
4940     // Don't transform one with multiple uses, this would require adding a new
4941     // load.
4942     if (!SDValue(Load, 0).hasOneUse())
4943       return false;
4944
4945     if (LegalOperations &&
4946         !TLI.isLoadExtLegal(ExtType, Load->getValueType(0), MemVT))
4947       return false;
4948
4949     // For the transform to be legal, the load must produce only two values
4950     // (the value loaded and the chain).  Don't transform a pre-increment
4951     // load, for example, which produces an extra value.  Otherwise the
4952     // transformation is not equivalent, and the downstream logic to replace
4953     // uses gets things wrong.
4954     if (Load->getNumValues() > 2)
4955       return false;
4956
4957     // If the load that we're shrinking is an extload and we're not just
4958     // discarding the extension we can't simply shrink the load. Bail.
4959     // TODO: It would be possible to merge the extensions in some cases.
4960     if (Load->getExtensionType() != ISD::NON_EXTLOAD &&
4961         Load->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4962       return false;
4963
4964     if (!TLI.shouldReduceLoadWidth(Load, ExtType, MemVT))
4965       return false;
4966   } else {
4967     assert(isa<StoreSDNode>(LDST) && "It is not a Load nor a Store SDNode");
4968     StoreSDNode *Store = cast<StoreSDNode>(LDST);
4969     // Can't write outside the original store
4970     if (Store->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits() + ShAmt)
4971       return false;
4972
4973     if (LegalOperations &&
4974         !TLI.isTruncStoreLegal(Store->getValue().getValueType(), MemVT))
4975       return false;
4976   }
4977   return true;
4978 }
4979
4980 bool DAGCombiner::SearchForAndLoads(SDNode *N,
4981                                     SmallVectorImpl<LoadSDNode*> &Loads,
4982                                     SmallPtrSetImpl<SDNode*> &NodesWithConsts,
4983                                     ConstantSDNode *Mask,
4984                                     SDNode *&NodeToMask) {
4985   // Recursively search for the operands, looking for loads which can be
4986   // narrowed.
4987   for (SDValue Op : N->op_values()) {
4988     if (Op.getValueType().isVector())
4989       return false;
4990
4991     // Some constants may need fixing up later if they are too large.
4992     if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
4993       if ((N->getOpcode() == ISD::OR || N->getOpcode() == ISD::XOR) &&
4994           (Mask->getAPIntValue() & C->getAPIntValue()) != C->getAPIntValue())
4995         NodesWithConsts.insert(N);
4996       continue;
4997     }
4998
4999     if (!Op.hasOneUse())
5000       return false;
5001
5002     switch(Op.getOpcode()) {
5003     case ISD::LOAD: {
5004       auto *Load = cast<LoadSDNode>(Op);
5005       EVT ExtVT;
5006       if (isAndLoadExtLoad(Mask, Load, Load->getValueType(0), ExtVT) &&
5007           isLegalNarrowLdSt(Load, ISD::ZEXTLOAD, ExtVT)) {
5008
5009         // ZEXTLOAD is already small enough.
5010         if (Load->getExtensionType() == ISD::ZEXTLOAD &&
5011             ExtVT.bitsGE(Load->getMemoryVT()))
5012           continue;
5013
5014         // Use LE to convert equal sized loads to zext.
5015         if (ExtVT.bitsLE(Load->getMemoryVT()))
5016           Loads.push_back(Load);
5017
5018         continue;
5019       }
5020       return false;
5021     }
5022     case ISD::ZERO_EXTEND:
5023     case ISD::AssertZext: {
5024       unsigned ActiveBits = Mask->getAPIntValue().countTrailingOnes();
5025       EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
5026       EVT VT = Op.getOpcode() == ISD::AssertZext ?
5027         cast<VTSDNode>(Op.getOperand(1))->getVT() :
5028         Op.getOperand(0).getValueType();
5029
5030       // We can accept extending nodes if the mask is wider or an equal
5031       // width to the original type.
5032       if (ExtVT.bitsGE(VT))
5033         continue;
5034       break;
5035     }
5036     case ISD::OR:
5037     case ISD::XOR:
5038     case ISD::AND:
5039       if (!SearchForAndLoads(Op.getNode(), Loads, NodesWithConsts, Mask,
5040                              NodeToMask))
5041         return false;
5042       continue;
5043     }
5044
5045     // Allow one node which will masked along with any loads found.
5046     if (NodeToMask)
5047       return false;
5048
5049     // Also ensure that the node to be masked only produces one data result.
5050     NodeToMask = Op.getNode();
5051     if (NodeToMask->getNumValues() > 1) {
5052       bool HasValue = false;
5053       for (unsigned i = 0, e = NodeToMask->getNumValues(); i < e; ++i) {
5054         MVT VT = SDValue(NodeToMask, i).getSimpleValueType();
5055         if (VT != MVT::Glue && VT != MVT::Other) {
5056           if (HasValue) {
5057             NodeToMask = nullptr;
5058             return false;
5059           }
5060           HasValue = true;
5061         }
5062       }
5063       assert(HasValue && "Node to be masked has no data result?");
5064     }
5065   }
5066   return true;
5067 }
5068
5069 bool DAGCombiner::BackwardsPropagateMask(SDNode *N, SelectionDAG &DAG) {
5070   auto *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1));
5071   if (!Mask)
5072     return false;
5073
5074   if (!Mask->getAPIntValue().isMask())
5075     return false;
5076
5077   // No need to do anything if the and directly uses a load.
5078   if (isa<LoadSDNode>(N->getOperand(0)))
5079     return false;
5080
5081   SmallVector<LoadSDNode*, 8> Loads;
5082   SmallPtrSet<SDNode*, 2> NodesWithConsts;
5083   SDNode *FixupNode = nullptr;
5084   if (SearchForAndLoads(N, Loads, NodesWithConsts, Mask, FixupNode)) {
5085     if (Loads.size() == 0)
5086       return false;
5087
5088     LLVM_DEBUG(dbgs() << "Backwards propagate AND: "; N->dump());
5089     SDValue MaskOp = N->getOperand(1);
5090
5091     // If it exists, fixup the single node we allow in the tree that needs
5092     // masking.
5093     if (FixupNode) {
5094       LLVM_DEBUG(dbgs() << "First, need to fix up: "; FixupNode->dump());
5095       SDValue And = DAG.getNode(ISD::AND, SDLoc(FixupNode),
5096                                 FixupNode->getValueType(0),
5097                                 SDValue(FixupNode, 0), MaskOp);
5098       DAG.ReplaceAllUsesOfValueWith(SDValue(FixupNode, 0), And);
5099       if (And.getOpcode() == ISD ::AND)
5100         DAG.UpdateNodeOperands(And.getNode(), SDValue(FixupNode, 0), MaskOp);
5101     }
5102
5103     // Narrow any constants that need it.
5104     for (auto *LogicN : NodesWithConsts) {
5105       SDValue Op0 = LogicN->getOperand(0);
5106       SDValue Op1 = LogicN->getOperand(1);
5107
5108       if (isa<ConstantSDNode>(Op0))
5109           std::swap(Op0, Op1);
5110
5111       SDValue And = DAG.getNode(ISD::AND, SDLoc(Op1), Op1.getValueType(),
5112                                 Op1, MaskOp);
5113
5114       DAG.UpdateNodeOperands(LogicN, Op0, And);
5115     }
5116
5117     // Create narrow loads.
5118     for (auto *Load : Loads) {
5119       LLVM_DEBUG(dbgs() << "Propagate AND back to: "; Load->dump());
5120       SDValue And = DAG.getNode(ISD::AND, SDLoc(Load), Load->getValueType(0),
5121                                 SDValue(Load, 0), MaskOp);
5122       DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), And);
5123       if (And.getOpcode() == ISD ::AND)
5124         And = SDValue(
5125             DAG.UpdateNodeOperands(And.getNode(), SDValue(Load, 0), MaskOp), 0);
5126       SDValue NewLoad = ReduceLoadWidth(And.getNode());
5127       assert(NewLoad &&
5128              "Shouldn't be masking the load if it can't be narrowed");
5129       CombineTo(Load, NewLoad, NewLoad.getValue(1));
5130     }
5131     DAG.ReplaceAllUsesWith(N, N->getOperand(0).getNode());
5132     return true;
5133   }
5134   return false;
5135 }
5136
5137 // Unfold
5138 //    x &  (-1 'logical shift' y)
5139 // To
5140 //    (x 'opposite logical shift' y) 'logical shift' y
5141 // if it is better for performance.
5142 SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
5143   assert(N->getOpcode() == ISD::AND);
5144
5145   SDValue N0 = N->getOperand(0);
5146   SDValue N1 = N->getOperand(1);
5147
5148   // Do we actually prefer shifts over mask?
5149   if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
5150     return SDValue();
5151
5152   // Try to match  (-1 '[outer] logical shift' y)
5153   unsigned OuterShift;
5154   unsigned InnerShift; // The opposite direction to the OuterShift.
5155   SDValue Y;           // Shift amount.
5156   auto matchMask = [&OuterShift, &InnerShift, &Y](SDValue M) -> bool {
5157     if (!M.hasOneUse())
5158       return false;
5159     OuterShift = M->getOpcode();
5160     if (OuterShift == ISD::SHL)
5161       InnerShift = ISD::SRL;
5162     else if (OuterShift == ISD::SRL)
5163       InnerShift = ISD::SHL;
5164     else
5165       return false;
5166     if (!isAllOnesConstant(M->getOperand(0)))
5167       return false;
5168     Y = M->getOperand(1);
5169     return true;
5170   };
5171
5172   SDValue X;
5173   if (matchMask(N1))
5174     X = N0;
5175   else if (matchMask(N0))
5176     X = N1;
5177   else
5178     return SDValue();
5179
5180   SDLoc DL(N);
5181   EVT VT = N->getValueType(0);
5182
5183   //     tmp = x   'opposite logical shift' y
5184   SDValue T0 = DAG.getNode(InnerShift, DL, VT, X, Y);
5185   //     ret = tmp 'logical shift' y
5186   SDValue T1 = DAG.getNode(OuterShift, DL, VT, T0, Y);
5187
5188   return T1;
5189 }
5190
5191 /// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
5192 /// For a target with a bit test, this is expected to become test + set and save
5193 /// at least 1 instruction.
5194 static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
5195   assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
5196
5197   // This is probably not worthwhile without a supported type.
5198   EVT VT = And->getValueType(0);
5199   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
5200   if (!TLI.isTypeLegal(VT))
5201     return SDValue();
5202
5203   // Look through an optional extension and find a 'not'.
5204   // TODO: Should we favor test+set even without the 'not' op?
5205   SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
5206   if (Not.getOpcode() == ISD::ANY_EXTEND)
5207     Not = Not.getOperand(0);
5208   if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
5209     return SDValue();
5210
5211   // Look though an optional truncation. The source operand may not be the same
5212   // type as the original 'and', but that is ok because we are masking off
5213   // everything but the low bit.
5214   SDValue Srl = Not.getOperand(0);
5215   if (Srl.getOpcode() == ISD::TRUNCATE)
5216     Srl = Srl.getOperand(0);
5217
5218   // Match a shift-right by constant.
5219   if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
5220       !isa<ConstantSDNode>(Srl.getOperand(1)))
5221     return SDValue();
5222
5223   // We might have looked through casts that make this transform invalid.
5224   // TODO: If the source type is wider than the result type, do the mask and
5225   //       compare in the source type.
5226   const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
5227   unsigned VTBitWidth = VT.getSizeInBits();
5228   if (ShiftAmt.uge(VTBitWidth))
5229     return SDValue();
5230
5231   // Turn this into a bit-test pattern using mask op + setcc:
5232   // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
5233   SDLoc DL(And);
5234   SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
5235   EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
5236   SDValue Mask = DAG.getConstant(
5237       APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
5238   SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
5239   SDValue Zero = DAG.getConstant(0, DL, VT);
5240   SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
5241   return DAG.getZExtOrTrunc(Setcc, DL, VT);
5242 }
5243
5244 SDValue DAGCombiner::visitAND(SDNode *N) {
5245   SDValue N0 = N->getOperand(0);
5246   SDValue N1 = N->getOperand(1);
5247   EVT VT = N1.getValueType();
5248
5249   // x & x --> x
5250   if (N0 == N1)
5251     return N0;
5252
5253   // fold vector ops
5254   if (VT.isVector()) {
5255     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5256       return FoldedVOp;
5257
5258     // fold (and x, 0) -> 0, vector edition
5259     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5260       // do not return N0, because undef node may exist in N0
5261       return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
5262                              SDLoc(N), N0.getValueType());
5263     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5264       // do not return N1, because undef node may exist in N1
5265       return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
5266                              SDLoc(N), N1.getValueType());
5267
5268     // fold (and x, -1) -> x, vector edition
5269     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5270       return N1;
5271     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5272       return N0;
5273   }
5274
5275   // fold (and c1, c2) -> c1&c2
5276   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5277   ConstantSDNode *N1C = isConstOrConstSplat(N1);
5278   if (N0C && N1C && !N1C->isOpaque())
5279     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
5280   // canonicalize constant to RHS
5281   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5282       !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5283     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
5284   // fold (and x, -1) -> x
5285   if (isAllOnesConstant(N1))
5286     return N0;
5287   // if (and x, c) is known to be zero, return 0
5288   unsigned BitWidth = VT.getScalarSizeInBits();
5289   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
5290                                    APInt::getAllOnesValue(BitWidth)))
5291     return DAG.getConstant(0, SDLoc(N), VT);
5292
5293   if (SDValue NewSel = foldBinOpIntoSelect(N))
5294     return NewSel;
5295
5296   // reassociate and
5297   if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
5298     return RAND;
5299
5300   // Try to convert a constant mask AND into a shuffle clear mask.
5301   if (VT.isVector())
5302     if (SDValue Shuffle = XformToShuffleWithZero(N))
5303       return Shuffle;
5304
5305   // fold (and (or x, C), D) -> D if (C & D) == D
5306   auto MatchSubset = [](ConstantSDNode *LHS, ConstantSDNode *RHS) {
5307     return RHS->getAPIntValue().isSubsetOf(LHS->getAPIntValue());
5308   };
5309   if (N0.getOpcode() == ISD::OR &&
5310       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchSubset))
5311     return N1;
5312   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
5313   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
5314     SDValue N0Op0 = N0.getOperand(0);
5315     APInt Mask = ~N1C->getAPIntValue();
5316     Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
5317     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
5318       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
5319                                  N0.getValueType(), N0Op0);
5320
5321       // Replace uses of the AND with uses of the Zero extend node.
5322       CombineTo(N, Zext);
5323
5324       // We actually want to replace all uses of the any_extend with the
5325       // zero_extend, to avoid duplicating things.  This will later cause this
5326       // AND to be folded.
5327       CombineTo(N0.getNode(), Zext);
5328       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5329     }
5330   }
5331
5332   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
5333   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
5334   // already be zero by virtue of the width of the base type of the load.
5335   //
5336   // the 'X' node here can either be nothing or an extract_vector_elt to catch
5337   // more cases.
5338   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
5339        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
5340        N0.getOperand(0).getOpcode() == ISD::LOAD &&
5341        N0.getOperand(0).getResNo() == 0) ||
5342       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
5343     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
5344                                          N0 : N0.getOperand(0) );
5345
5346     // Get the constant (if applicable) the zero'th operand is being ANDed with.
5347     // This can be a pure constant or a vector splat, in which case we treat the
5348     // vector as a scalar and use the splat value.
5349     APInt Constant = APInt::getNullValue(1);
5350     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
5351       Constant = C->getAPIntValue();
5352     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
5353       APInt SplatValue, SplatUndef;
5354       unsigned SplatBitSize;
5355       bool HasAnyUndefs;
5356       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
5357                                              SplatBitSize, HasAnyUndefs);
5358       if (IsSplat) {
5359         // Undef bits can contribute to a possible optimisation if set, so
5360         // set them.
5361         SplatValue |= SplatUndef;
5362
5363         // The splat value may be something like "0x00FFFFFF", which means 0 for
5364         // the first vector value and FF for the rest, repeating. We need a mask
5365         // that will apply equally to all members of the vector, so AND all the
5366         // lanes of the constant together.
5367         unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
5368
5369         // If the splat value has been compressed to a bitlength lower
5370         // than the size of the vector lane, we need to re-expand it to
5371         // the lane size.
5372         if (EltBitWidth > SplatBitSize)
5373           for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
5374                SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
5375             SplatValue |= SplatValue.shl(SplatBitSize);
5376
5377         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
5378         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
5379         if ((SplatBitSize % EltBitWidth) == 0) {
5380           Constant = APInt::getAllOnesValue(EltBitWidth);
5381           for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
5382             Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
5383         }
5384       }
5385     }
5386
5387     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
5388     // actually legal and isn't going to get expanded, else this is a false
5389     // optimisation.
5390     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
5391                                                     Load->getValueType(0),
5392                                                     Load->getMemoryVT());
5393
5394     // Resize the constant to the same size as the original memory access before
5395     // extension. If it is still the AllOnesValue then this AND is completely
5396     // unneeded.
5397     Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
5398
5399     bool B;
5400     switch (Load->getExtensionType()) {
5401     default: B = false; break;
5402     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
5403     case ISD::ZEXTLOAD:
5404     case ISD::NON_EXTLOAD: B = true; break;
5405     }
5406
5407     if (B && Constant.isAllOnesValue()) {
5408       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
5409       // preserve semantics once we get rid of the AND.
5410       SDValue NewLoad(Load, 0);
5411
5412       // Fold the AND away. NewLoad may get replaced immediately.
5413       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
5414
5415       if (Load->getExtensionType() == ISD::EXTLOAD) {
5416         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
5417                               Load->getValueType(0), SDLoc(Load),
5418                               Load->getChain(), Load->getBasePtr(),
5419                               Load->getOffset(), Load->getMemoryVT(),
5420                               Load->getMemOperand());
5421         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
5422         if (Load->getNumValues() == 3) {
5423           // PRE/POST_INC loads have 3 values.
5424           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
5425                            NewLoad.getValue(2) };
5426           CombineTo(Load, To, 3, true);
5427         } else {
5428           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
5429         }
5430       }
5431
5432       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5433     }
5434   }
5435
5436   // fold (and (load x), 255) -> (zextload x, i8)
5437   // fold (and (extload x, i16), 255) -> (zextload x, i8)
5438   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
5439   if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
5440                                 (N0.getOpcode() == ISD::ANY_EXTEND &&
5441                                  N0.getOperand(0).getOpcode() == ISD::LOAD))) {
5442     if (SDValue Res = ReduceLoadWidth(N)) {
5443       LoadSDNode *LN0 = N0->getOpcode() == ISD::ANY_EXTEND
5444         ? cast<LoadSDNode>(N0.getOperand(0)) : cast<LoadSDNode>(N0);
5445       AddToWorklist(N);
5446       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 0), Res);
5447       return SDValue(N, 0);
5448     }
5449   }
5450
5451   if (Level >= AfterLegalizeTypes) {
5452     // Attempt to propagate the AND back up to the leaves which, if they're
5453     // loads, can be combined to narrow loads and the AND node can be removed.
5454     // Perform after legalization so that extend nodes will already be
5455     // combined into the loads.
5456     if (BackwardsPropagateMask(N, DAG)) {
5457       return SDValue(N, 0);
5458     }
5459   }
5460
5461   if (SDValue Combined = visitANDLike(N0, N1, N))
5462     return Combined;
5463
5464   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
5465   if (N0.getOpcode() == N1.getOpcode())
5466     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
5467       return V;
5468
5469   // Masking the negated extension of a boolean is just the zero-extended
5470   // boolean:
5471   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
5472   // and (sub 0, sext(bool X)), 1 --> zext(bool X)
5473   //
5474   // Note: the SimplifyDemandedBits fold below can make an information-losing
5475   // transform, and then we have no way to find this better fold.
5476   if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
5477     if (isNullOrNullSplat(N0.getOperand(0))) {
5478       SDValue SubRHS = N0.getOperand(1);
5479       if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
5480           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5481         return SubRHS;
5482       if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
5483           SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
5484         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
5485     }
5486   }
5487
5488   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
5489   // fold (and (sra)) -> (and (srl)) when possible.
5490   if (SimplifyDemandedBits(SDValue(N, 0)))
5491     return SDValue(N, 0);
5492
5493   // fold (zext_inreg (extload x)) -> (zextload x)
5494   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
5495   if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
5496       (ISD::isEXTLoad(N0.getNode()) ||
5497        (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
5498     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
5499     EVT MemVT = LN0->getMemoryVT();
5500     // If we zero all the possible extended bits, then we can turn this into
5501     // a zextload if we are running before legalize or the operation is legal.
5502     unsigned ExtBitSize = N1.getScalarValueSizeInBits();
5503     unsigned MemBitSize = MemVT.getScalarSizeInBits();
5504     APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
5505     if (DAG.MaskedValueIsZero(N1, ExtBits) &&
5506         ((!LegalOperations && !LN0->isVolatile()) ||
5507          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
5508       SDValue ExtLoad =
5509           DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
5510                          LN0->getBasePtr(), MemVT, LN0->getMemOperand());
5511       AddToWorklist(N);
5512       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
5513       return SDValue(N, 0); // Return N so it doesn't get rechecked!
5514     }
5515   }
5516
5517   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
5518   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
5519     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
5520                                            N0.getOperand(1), false))
5521       return BSwap;
5522   }
5523
5524   if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
5525     return Shifts;
5526
5527   if (TLI.hasBitTest(N0, N1))
5528     if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
5529       return V;
5530
5531   return SDValue();
5532 }
5533
5534 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
5535 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
5536                                         bool DemandHighBits) {
5537   if (!LegalOperations)
5538     return SDValue();
5539
5540   EVT VT = N->getValueType(0);
5541   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
5542     return SDValue();
5543   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5544     return SDValue();
5545
5546   // Recognize (and (shl a, 8), 0xff00), (and (srl a, 8), 0xff)
5547   bool LookPassAnd0 = false;
5548   bool LookPassAnd1 = false;
5549   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
5550       std::swap(N0, N1);
5551   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
5552       std::swap(N0, N1);
5553   if (N0.getOpcode() == ISD::AND) {
5554     if (!N0.getNode()->hasOneUse())
5555       return SDValue();
5556     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5557     // Also handle 0xffff since the LHS is guaranteed to have zeros there.
5558     // This is needed for X86.
5559     if (!N01C || (N01C->getZExtValue() != 0xFF00 &&
5560                   N01C->getZExtValue() != 0xFFFF))
5561       return SDValue();
5562     N0 = N0.getOperand(0);
5563     LookPassAnd0 = true;
5564   }
5565
5566   if (N1.getOpcode() == ISD::AND) {
5567     if (!N1.getNode()->hasOneUse())
5568       return SDValue();
5569     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5570     if (!N11C || N11C->getZExtValue() != 0xFF)
5571       return SDValue();
5572     N1 = N1.getOperand(0);
5573     LookPassAnd1 = true;
5574   }
5575
5576   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
5577     std::swap(N0, N1);
5578   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
5579     return SDValue();
5580   if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
5581     return SDValue();
5582
5583   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5584   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
5585   if (!N01C || !N11C)
5586     return SDValue();
5587   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
5588     return SDValue();
5589
5590   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
5591   SDValue N00 = N0->getOperand(0);
5592   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
5593     if (!N00.getNode()->hasOneUse())
5594       return SDValue();
5595     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
5596     if (!N001C || N001C->getZExtValue() != 0xFF)
5597       return SDValue();
5598     N00 = N00.getOperand(0);
5599     LookPassAnd0 = true;
5600   }
5601
5602   SDValue N10 = N1->getOperand(0);
5603   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
5604     if (!N10.getNode()->hasOneUse())
5605       return SDValue();
5606     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
5607     // Also allow 0xFFFF since the bits will be shifted out. This is needed
5608     // for X86.
5609     if (!N101C || (N101C->getZExtValue() != 0xFF00 &&
5610                    N101C->getZExtValue() != 0xFFFF))
5611       return SDValue();
5612     N10 = N10.getOperand(0);
5613     LookPassAnd1 = true;
5614   }
5615
5616   if (N00 != N10)
5617     return SDValue();
5618
5619   // Make sure everything beyond the low halfword gets set to zero since the SRL
5620   // 16 will clear the top bits.
5621   unsigned OpSizeInBits = VT.getSizeInBits();
5622   if (DemandHighBits && OpSizeInBits > 16) {
5623     // If the left-shift isn't masked out then the only way this is a bswap is
5624     // if all bits beyond the low 8 are 0. In that case the entire pattern
5625     // reduces to a left shift anyway: leave it for other parts of the combiner.
5626     if (!LookPassAnd0)
5627       return SDValue();
5628
5629     // However, if the right shift isn't masked out then it might be because
5630     // it's not needed. See if we can spot that too.
5631     if (!LookPassAnd1 &&
5632         !DAG.MaskedValueIsZero(
5633             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
5634       return SDValue();
5635   }
5636
5637   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
5638   if (OpSizeInBits > 16) {
5639     SDLoc DL(N);
5640     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
5641                       DAG.getConstant(OpSizeInBits - 16, DL,
5642                                       getShiftAmountTy(VT)));
5643   }
5644   return Res;
5645 }
5646
5647 /// Return true if the specified node is an element that makes up a 32-bit
5648 /// packed halfword byteswap.
5649 /// ((x & 0x000000ff) << 8) |
5650 /// ((x & 0x0000ff00) >> 8) |
5651 /// ((x & 0x00ff0000) << 8) |
5652 /// ((x & 0xff000000) >> 8)
5653 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
5654   if (!N.getNode()->hasOneUse())
5655     return false;
5656
5657   unsigned Opc = N.getOpcode();
5658   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
5659     return false;
5660
5661   SDValue N0 = N.getOperand(0);
5662   unsigned Opc0 = N0.getOpcode();
5663   if (Opc0 != ISD::AND && Opc0 != ISD::SHL && Opc0 != ISD::SRL)
5664     return false;
5665
5666   ConstantSDNode *N1C = nullptr;
5667   // SHL or SRL: look upstream for AND mask operand
5668   if (Opc == ISD::AND)
5669     N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5670   else if (Opc0 == ISD::AND)
5671     N1C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5672   if (!N1C)
5673     return false;
5674
5675   unsigned MaskByteOffset;
5676   switch (N1C->getZExtValue()) {
5677   default:
5678     return false;
5679   case 0xFF:       MaskByteOffset = 0; break;
5680   case 0xFF00:     MaskByteOffset = 1; break;
5681   case 0xFFFF:
5682     // In case demanded bits didn't clear the bits that will be shifted out.
5683     // This is needed for X86.
5684     if (Opc == ISD::SRL || (Opc == ISD::AND && Opc0 == ISD::SHL)) {
5685       MaskByteOffset = 1;
5686       break;
5687     }
5688     return false;
5689   case 0xFF0000:   MaskByteOffset = 2; break;
5690   case 0xFF000000: MaskByteOffset = 3; break;
5691   }
5692
5693   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
5694   if (Opc == ISD::AND) {
5695     if (MaskByteOffset == 0 || MaskByteOffset == 2) {
5696       // (x >> 8) & 0xff
5697       // (x >> 8) & 0xff0000
5698       if (Opc0 != ISD::SRL)
5699         return false;
5700       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5701       if (!C || C->getZExtValue() != 8)
5702         return false;
5703     } else {
5704       // (x << 8) & 0xff00
5705       // (x << 8) & 0xff000000
5706       if (Opc0 != ISD::SHL)
5707         return false;
5708       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
5709       if (!C || C->getZExtValue() != 8)
5710         return false;
5711     }
5712   } else if (Opc == ISD::SHL) {
5713     // (x & 0xff) << 8
5714     // (x & 0xff0000) << 8
5715     if (MaskByteOffset != 0 && MaskByteOffset != 2)
5716       return false;
5717     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5718     if (!C || C->getZExtValue() != 8)
5719       return false;
5720   } else { // Opc == ISD::SRL
5721     // (x & 0xff00) >> 8
5722     // (x & 0xff000000) >> 8
5723     if (MaskByteOffset != 1 && MaskByteOffset != 3)
5724       return false;
5725     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
5726     if (!C || C->getZExtValue() != 8)
5727       return false;
5728   }
5729
5730   if (Parts[MaskByteOffset])
5731     return false;
5732
5733   Parts[MaskByteOffset] = N0.getOperand(0).getNode();
5734   return true;
5735 }
5736
5737 /// Match a 32-bit packed halfword bswap. That is
5738 /// ((x & 0x000000ff) << 8) |
5739 /// ((x & 0x0000ff00) >> 8) |
5740 /// ((x & 0x00ff0000) << 8) |
5741 /// ((x & 0xff000000) >> 8)
5742 /// => (rotl (bswap x), 16)
5743 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
5744   if (!LegalOperations)
5745     return SDValue();
5746
5747   EVT VT = N->getValueType(0);
5748   if (VT != MVT::i32)
5749     return SDValue();
5750   if (!TLI.isOperationLegalOrCustom(ISD::BSWAP, VT))
5751     return SDValue();
5752
5753   // Look for either
5754   // (or (or (and), (and)), (or (and), (and)))
5755   // (or (or (or (and), (and)), (and)), (and))
5756   if (N0.getOpcode() != ISD::OR)
5757     return SDValue();
5758   SDValue N00 = N0.getOperand(0);
5759   SDValue N01 = N0.getOperand(1);
5760   SDNode *Parts[4] = {};
5761
5762   if (N1.getOpcode() == ISD::OR &&
5763       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
5764     // (or (or (and), (and)), (or (and), (and)))
5765     if (!isBSwapHWordElement(N00, Parts))
5766       return SDValue();
5767
5768     if (!isBSwapHWordElement(N01, Parts))
5769       return SDValue();
5770     SDValue N10 = N1.getOperand(0);
5771     if (!isBSwapHWordElement(N10, Parts))
5772       return SDValue();
5773     SDValue N11 = N1.getOperand(1);
5774     if (!isBSwapHWordElement(N11, Parts))
5775       return SDValue();
5776   } else {
5777     // (or (or (or (and), (and)), (and)), (and))
5778     if (!isBSwapHWordElement(N1, Parts))
5779       return SDValue();
5780     if (!isBSwapHWordElement(N01, Parts))
5781       return SDValue();
5782     if (N00.getOpcode() != ISD::OR)
5783       return SDValue();
5784     SDValue N000 = N00.getOperand(0);
5785     if (!isBSwapHWordElement(N000, Parts))
5786       return SDValue();
5787     SDValue N001 = N00.getOperand(1);
5788     if (!isBSwapHWordElement(N001, Parts))
5789       return SDValue();
5790   }
5791
5792   // Make sure the parts are all coming from the same node.
5793   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
5794     return SDValue();
5795
5796   SDLoc DL(N);
5797   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
5798                               SDValue(Parts[0], 0));
5799
5800   // Result of the bswap should be rotated by 16. If it's not legal, then
5801   // do  (x << 16) | (x >> 16).
5802   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
5803   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
5804     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
5805   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
5806     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
5807   return DAG.getNode(ISD::OR, DL, VT,
5808                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
5809                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
5810 }
5811
5812 /// This contains all DAGCombine rules which reduce two values combined by
5813 /// an Or operation to a single value \see visitANDLike().
5814 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
5815   EVT VT = N1.getValueType();
5816   SDLoc DL(N);
5817
5818   // fold (or x, undef) -> -1
5819   if (!LegalOperations && (N0.isUndef() || N1.isUndef()))
5820     return DAG.getAllOnesConstant(DL, VT);
5821
5822   if (SDValue V = foldLogicOfSetCCs(false, N0, N1, DL))
5823     return V;
5824
5825   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
5826   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
5827       // Don't increase # computations.
5828       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5829     // We can only do this xform if we know that bits from X that are set in C2
5830     // but not in C1 are already zero.  Likewise for Y.
5831     if (const ConstantSDNode *N0O1C =
5832         getAsNonOpaqueConstant(N0.getOperand(1))) {
5833       if (const ConstantSDNode *N1O1C =
5834           getAsNonOpaqueConstant(N1.getOperand(1))) {
5835         // We can only do this xform if we know that bits from X that are set in
5836         // C2 but not in C1 are already zero.  Likewise for Y.
5837         const APInt &LHSMask = N0O1C->getAPIntValue();
5838         const APInt &RHSMask = N1O1C->getAPIntValue();
5839
5840         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
5841             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
5842           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5843                                   N0.getOperand(0), N1.getOperand(0));
5844           return DAG.getNode(ISD::AND, DL, VT, X,
5845                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
5846         }
5847       }
5848     }
5849   }
5850
5851   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
5852   if (N0.getOpcode() == ISD::AND &&
5853       N1.getOpcode() == ISD::AND &&
5854       N0.getOperand(0) == N1.getOperand(0) &&
5855       // Don't increase # computations.
5856       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
5857     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
5858                             N0.getOperand(1), N1.getOperand(1));
5859     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), X);
5860   }
5861
5862   return SDValue();
5863 }
5864
5865 /// OR combines for which the commuted variant will be tried as well.
5866 static SDValue visitORCommutative(
5867     SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
5868   EVT VT = N0.getValueType();
5869   if (N0.getOpcode() == ISD::AND) {
5870     // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
5871     if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
5872       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
5873
5874     // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
5875     if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
5876       return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
5877   }
5878
5879   return SDValue();
5880 }
5881
5882 SDValue DAGCombiner::visitOR(SDNode *N) {
5883   SDValue N0 = N->getOperand(0);
5884   SDValue N1 = N->getOperand(1);
5885   EVT VT = N1.getValueType();
5886
5887   // x | x --> x
5888   if (N0 == N1)
5889     return N0;
5890
5891   // fold vector ops
5892   if (VT.isVector()) {
5893     if (SDValue FoldedVOp = SimplifyVBinOp(N))
5894       return FoldedVOp;
5895
5896     // fold (or x, 0) -> x, vector edition
5897     if (ISD::isBuildVectorAllZeros(N0.getNode()))
5898       return N1;
5899     if (ISD::isBuildVectorAllZeros(N1.getNode()))
5900       return N0;
5901
5902     // fold (or x, -1) -> -1, vector edition
5903     if (ISD::isBuildVectorAllOnes(N0.getNode()))
5904       // do not return N0, because undef node may exist in N0
5905       return DAG.getAllOnesConstant(SDLoc(N), N0.getValueType());
5906     if (ISD::isBuildVectorAllOnes(N1.getNode()))
5907       // do not return N1, because undef node may exist in N1
5908       return DAG.getAllOnesConstant(SDLoc(N), N1.getValueType());
5909
5910     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
5911     // Do this only if the resulting shuffle is legal.
5912     if (isa<ShuffleVectorSDNode>(N0) &&
5913         isa<ShuffleVectorSDNode>(N1) &&
5914         // Avoid folding a node with illegal type.
5915         TLI.isTypeLegal(VT)) {
5916       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
5917       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
5918       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
5919       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
5920       // Ensure both shuffles have a zero input.
5921       if ((ZeroN00 != ZeroN01) && (ZeroN10 != ZeroN11)) {
5922         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
5923         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
5924         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
5925         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
5926         bool CanFold = true;
5927         int NumElts = VT.getVectorNumElements();
5928         SmallVector<int, 4> Mask(NumElts);
5929
5930         for (int i = 0; i != NumElts; ++i) {
5931           int M0 = SV0->getMaskElt(i);
5932           int M1 = SV1->getMaskElt(i);
5933
5934           // Determine if either index is pointing to a zero vector.
5935           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
5936           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
5937
5938           // If one element is zero and the otherside is undef, keep undef.
5939           // This also handles the case that both are undef.
5940           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
5941             Mask[i] = -1;
5942             continue;
5943           }
5944
5945           // Make sure only one of the elements is zero.
5946           if (M0Zero == M1Zero) {
5947             CanFold = false;
5948             break;
5949           }
5950
5951           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
5952
5953           // We have a zero and non-zero element. If the non-zero came from
5954           // SV0 make the index a LHS index. If it came from SV1, make it
5955           // a RHS index. We need to mod by NumElts because we don't care
5956           // which operand it came from in the original shuffles.
5957           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
5958         }
5959
5960         if (CanFold) {
5961           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
5962           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
5963
5964           SDValue LegalShuffle =
5965               TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
5966                                           Mask, DAG);
5967           if (LegalShuffle)
5968             return LegalShuffle;
5969         }
5970       }
5971     }
5972   }
5973
5974   // fold (or c1, c2) -> c1|c2
5975   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
5976   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
5977   if (N0C && N1C && !N1C->isOpaque())
5978     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
5979   // canonicalize constant to RHS
5980   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
5981      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
5982     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
5983   // fold (or x, 0) -> x
5984   if (isNullConstant(N1))
5985     return N0;
5986   // fold (or x, -1) -> -1
5987   if (isAllOnesConstant(N1))
5988     return N1;
5989
5990   if (SDValue NewSel = foldBinOpIntoSelect(N))
5991     return NewSel;
5992
5993   // fold (or x, c) -> c iff (x & ~c) == 0
5994   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
5995     return N1;
5996
5997   if (SDValue Combined = visitORLike(N0, N1, N))
5998     return Combined;
5999
6000   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
6001   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
6002     return BSwap;
6003   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
6004     return BSwap;
6005
6006   // reassociate or
6007   if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
6008     return ROR;
6009
6010   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
6011   // iff (c1 & c2) != 0 or c1/c2 are undef.
6012   auto MatchIntersect = [](ConstantSDNode *C1, ConstantSDNode *C2) {
6013     return !C1 || !C2 || C1->getAPIntValue().intersects(C2->getAPIntValue());
6014   };
6015   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
6016       ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchIntersect, true)) {
6017     if (SDValue COR = DAG.FoldConstantArithmetic(
6018             ISD::OR, SDLoc(N1), VT, N1.getNode(), N0.getOperand(1).getNode())) {
6019       SDValue IOR = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1);
6020       AddToWorklist(IOR.getNode());
6021       return DAG.getNode(ISD::AND, SDLoc(N), VT, COR, IOR);
6022     }
6023   }
6024
6025   if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
6026     return Combined;
6027   if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
6028     return Combined;
6029
6030   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
6031   if (N0.getOpcode() == N1.getOpcode())
6032     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
6033       return V;
6034
6035   // See if this is some rotate idiom.
6036   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
6037     return SDValue(Rot, 0);
6038
6039   if (SDValue Load = MatchLoadCombine(N))
6040     return Load;
6041
6042   // Simplify the operands using demanded-bits information.
6043   if (SimplifyDemandedBits(SDValue(N, 0)))
6044     return SDValue(N, 0);
6045
6046   // If OR can be rewritten into ADD, try combines based on ADD.
6047   if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
6048       DAG.haveNoCommonBitsSet(N0, N1))
6049     if (SDValue Combined = visitADDLike(N))
6050       return Combined;
6051
6052   return SDValue();
6053 }
6054
6055 static SDValue stripConstantMask(SelectionDAG &DAG, SDValue Op, SDValue &Mask) {
6056   if (Op.getOpcode() == ISD::AND &&
6057       DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
6058     Mask = Op.getOperand(1);
6059     return Op.getOperand(0);
6060   }
6061   return Op;
6062 }
6063
6064 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
6065 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
6066                             SDValue &Mask) {
6067   Op = stripConstantMask(DAG, Op, Mask);
6068   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
6069     Shift = Op;
6070     return true;
6071   }
6072   return false;
6073 }
6074
6075 /// Helper function for visitOR to extract the needed side of a rotate idiom
6076 /// from a shl/srl/mul/udiv.  This is meant to handle cases where
6077 /// InstCombine merged some outside op with one of the shifts from
6078 /// the rotate pattern.
6079 /// \returns An empty \c SDValue if the needed shift couldn't be extracted.
6080 /// Otherwise, returns an expansion of \p ExtractFrom based on the following
6081 /// patterns:
6082 ///
6083 ///   (or (add v v) (shrl v bitwidth-1)):
6084 ///     expands (add v v) -> (shl v 1)
6085 ///
6086 ///   (or (mul v c0) (shrl (mul v c1) c2)):
6087 ///     expands (mul v c0) -> (shl (mul v c1) c3)
6088 ///
6089 ///   (or (udiv v c0) (shl (udiv v c1) c2)):
6090 ///     expands (udiv v c0) -> (shrl (udiv v c1) c3)
6091 ///
6092 ///   (or (shl v c0) (shrl (shl v c1) c2)):
6093 ///     expands (shl v c0) -> (shl (shl v c1) c3)
6094 ///
6095 ///   (or (shrl v c0) (shl (shrl v c1) c2)):
6096 ///     expands (shrl v c0) -> (shrl (shrl v c1) c3)
6097 ///
6098 /// Such that in all cases, c3+c2==bitwidth(op v c1).
6099 static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
6100                                      SDValue ExtractFrom, SDValue &Mask,
6101                                      const SDLoc &DL) {
6102   assert(OppShift && ExtractFrom && "Empty SDValue");
6103   assert(
6104       (OppShift.getOpcode() == ISD::SHL || OppShift.getOpcode() == ISD::SRL) &&
6105       "Existing shift must be valid as a rotate half");
6106
6107   ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
6108
6109   // Value and Type of the shift.
6110   SDValue OppShiftLHS = OppShift.getOperand(0);
6111   EVT ShiftedVT = OppShiftLHS.getValueType();
6112
6113   // Amount of the existing shift.
6114   ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
6115
6116   // (add v v) -> (shl v 1)
6117   if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
6118       ExtractFrom.getOpcode() == ISD::ADD &&
6119       ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
6120       ExtractFrom.getOperand(0) == OppShiftLHS &&
6121       OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
6122     return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
6123                        DAG.getShiftAmountConstant(1, ShiftedVT, DL));
6124
6125   // Preconditions:
6126   //    (or (op0 v c0) (shiftl/r (op0 v c1) c2))
6127   //
6128   // Find opcode of the needed shift to be extracted from (op0 v c0).
6129   unsigned Opcode = ISD::DELETED_NODE;
6130   bool IsMulOrDiv = false;
6131   // Set Opcode and IsMulOrDiv if the extract opcode matches the needed shift
6132   // opcode or its arithmetic (mul or udiv) variant.
6133   auto SelectOpcode = [&](unsigned NeededShift, unsigned MulOrDivVariant) {
6134     IsMulOrDiv = ExtractFrom.getOpcode() == MulOrDivVariant;
6135     if (!IsMulOrDiv && ExtractFrom.getOpcode() != NeededShift)
6136       return false;
6137     Opcode = NeededShift;
6138     return true;
6139   };
6140   // op0 must be either the needed shift opcode or the mul/udiv equivalent
6141   // that the needed shift can be extracted from.
6142   if ((OppShift.getOpcode() != ISD::SRL || !SelectOpcode(ISD::SHL, ISD::MUL)) &&
6143       (OppShift.getOpcode() != ISD::SHL || !SelectOpcode(ISD::SRL, ISD::UDIV)))
6144     return SDValue();
6145
6146   // op0 must be the same opcode on both sides, have the same LHS argument,
6147   // and produce the same value type.
6148   if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
6149       OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
6150       ShiftedVT != ExtractFrom.getValueType())
6151     return SDValue();
6152
6153   // Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
6154   ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
6155   // Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
6156   ConstantSDNode *ExtractFromCst =
6157       isConstOrConstSplat(ExtractFrom.getOperand(1));
6158   // TODO: We should be able to handle non-uniform constant vectors for these values
6159   // Check that we have constant values.
6160   if (!OppShiftCst || !OppShiftCst->getAPIntValue() ||
6161       !OppLHSCst || !OppLHSCst->getAPIntValue() ||
6162       !ExtractFromCst || !ExtractFromCst->getAPIntValue())
6163     return SDValue();
6164
6165   // Compute the shift amount we need to extract to complete the rotate.
6166   const unsigned VTWidth = ShiftedVT.getScalarSizeInBits();
6167   if (OppShiftCst->getAPIntValue().ugt(VTWidth))
6168     return SDValue();
6169   APInt NeededShiftAmt = VTWidth - OppShiftCst->getAPIntValue();
6170   // Normalize the bitwidth of the two mul/udiv/shift constant operands.
6171   APInt ExtractFromAmt = ExtractFromCst->getAPIntValue();
6172   APInt OppLHSAmt = OppLHSCst->getAPIntValue();
6173   zeroExtendToMatch(ExtractFromAmt, OppLHSAmt);
6174
6175   // Now try extract the needed shift from the ExtractFrom op and see if the
6176   // result matches up with the existing shift's LHS op.
6177   if (IsMulOrDiv) {
6178     // Op to extract from is a mul or udiv by a constant.
6179     // Check:
6180     //     c2 / (1 << (bitwidth(op0 v c0) - c1)) == c0
6181     //     c2 % (1 << (bitwidth(op0 v c0) - c1)) == 0
6182     const APInt ExtractDiv = APInt::getOneBitSet(ExtractFromAmt.getBitWidth(),
6183                                                  NeededShiftAmt.getZExtValue());
6184     APInt ResultAmt;
6185     APInt Rem;
6186     APInt::udivrem(ExtractFromAmt, ExtractDiv, ResultAmt, Rem);
6187     if (Rem != 0 || ResultAmt != OppLHSAmt)
6188       return SDValue();
6189   } else {
6190     // Op to extract from is a shift by a constant.
6191     // Check:
6192     //      c2 - (bitwidth(op0 v c0) - c1) == c0
6193     if (OppLHSAmt != ExtractFromAmt - NeededShiftAmt.zextOrTrunc(
6194                                           ExtractFromAmt.getBitWidth()))
6195       return SDValue();
6196   }
6197
6198   // Return the expanded shift op that should allow a rotate to be formed.
6199   EVT ShiftVT = OppShift.getOperand(1).getValueType();
6200   EVT ResVT = ExtractFrom.getValueType();
6201   SDValue NewShiftNode = DAG.getConstant(NeededShiftAmt, DL, ShiftVT);
6202   return DAG.getNode(Opcode, DL, ResVT, OppShiftLHS, NewShiftNode);
6203 }
6204
6205 // Return true if we can prove that, whenever Neg and Pos are both in the
6206 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
6207 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
6208 //
6209 //     (or (shift1 X, Neg), (shift2 X, Pos))
6210 //
6211 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
6212 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
6213 // to consider shift amounts with defined behavior.
6214 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
6215                            SelectionDAG &DAG) {
6216   // If EltSize is a power of 2 then:
6217   //
6218   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
6219   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
6220   //
6221   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
6222   // for the stronger condition:
6223   //
6224   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
6225   //
6226   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
6227   // we can just replace Neg with Neg' for the rest of the function.
6228   //
6229   // In other cases we check for the even stronger condition:
6230   //
6231   //     Neg == EltSize - Pos                                    [B]
6232   //
6233   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
6234   // behavior if Pos == 0 (and consequently Neg == EltSize).
6235   //
6236   // We could actually use [A] whenever EltSize is a power of 2, but the
6237   // only extra cases that it would match are those uninteresting ones
6238   // where Neg and Pos are never in range at the same time.  E.g. for
6239   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
6240   // as well as (sub 32, Pos), but:
6241   //
6242   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
6243   //
6244   // always invokes undefined behavior for 32-bit X.
6245   //
6246   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
6247   unsigned MaskLoBits = 0;
6248   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
6249     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
6250       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
6251       unsigned Bits = Log2_64(EltSize);
6252       if (NegC->getAPIntValue().getActiveBits() <= Bits &&
6253           ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) {
6254         Neg = Neg.getOperand(0);
6255         MaskLoBits = Bits;
6256       }
6257     }
6258   }
6259
6260   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
6261   if (Neg.getOpcode() != ISD::SUB)
6262     return false;
6263   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
6264   if (!NegC)
6265     return false;
6266   SDValue NegOp1 = Neg.getOperand(1);
6267
6268   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
6269   // Pos'.  The truncation is redundant for the purpose of the equality.
6270   if (MaskLoBits && Pos.getOpcode() == ISD::AND) {
6271     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) {
6272       KnownBits Known = DAG.computeKnownBits(Pos.getOperand(0));
6273       if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits &&
6274           ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >=
6275            MaskLoBits))
6276         Pos = Pos.getOperand(0);
6277     }
6278   }
6279
6280   // The condition we need is now:
6281   //
6282   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
6283   //
6284   // If NegOp1 == Pos then we need:
6285   //
6286   //              EltSize & Mask == NegC & Mask
6287   //
6288   // (because "x & Mask" is a truncation and distributes through subtraction).
6289   APInt Width;
6290   if (Pos == NegOp1)
6291     Width = NegC->getAPIntValue();
6292
6293   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
6294   // Then the condition we want to prove becomes:
6295   //
6296   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
6297   //
6298   // which, again because "x & Mask" is a truncation, becomes:
6299   //
6300   //                NegC & Mask == (EltSize - PosC) & Mask
6301   //             EltSize & Mask == (NegC + PosC) & Mask
6302   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
6303     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
6304       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
6305     else
6306       return false;
6307   } else
6308     return false;
6309
6310   // Now we just need to check that EltSize & Mask == Width & Mask.
6311   if (MaskLoBits)
6312     // EltSize & Mask is 0 since Mask is EltSize - 1.
6313     return Width.getLoBits(MaskLoBits) == 0;
6314   return Width == EltSize;
6315 }
6316
6317 // A subroutine of MatchRotate used once we have found an OR of two opposite
6318 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
6319 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
6320 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
6321 // Neg with outer conversions stripped away.
6322 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
6323                                        SDValue Neg, SDValue InnerPos,
6324                                        SDValue InnerNeg, unsigned PosOpcode,
6325                                        unsigned NegOpcode, const SDLoc &DL) {
6326   // fold (or (shl x, (*ext y)),
6327   //          (srl x, (*ext (sub 32, y)))) ->
6328   //   (rotl x, y) or (rotr x, (sub 32, y))
6329   //
6330   // fold (or (shl x, (*ext (sub 32, y))),
6331   //          (srl x, (*ext y))) ->
6332   //   (rotr x, y) or (rotl x, (sub 32, y))
6333   EVT VT = Shifted.getValueType();
6334   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
6335     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
6336     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
6337                        HasPos ? Pos : Neg).getNode();
6338   }
6339
6340   return nullptr;
6341 }
6342
6343 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
6344 // idioms for rotate, and if the target supports rotation instructions, generate
6345 // a rot[lr].
6346 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
6347   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
6348   EVT VT = LHS.getValueType();
6349   if (!TLI.isTypeLegal(VT)) return nullptr;
6350
6351   // The target must have at least one rotate flavor.
6352   bool HasROTL = hasOperation(ISD::ROTL, VT);
6353   bool HasROTR = hasOperation(ISD::ROTR, VT);
6354   if (!HasROTL && !HasROTR) return nullptr;
6355
6356   // Check for truncated rotate.
6357   if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
6358       LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
6359     assert(LHS.getValueType() == RHS.getValueType());
6360     if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
6361       return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
6362                          SDValue(Rot, 0)).getNode();
6363     }
6364   }
6365
6366   // Match "(X shl/srl V1) & V2" where V2 may not be present.
6367   SDValue LHSShift;   // The shift.
6368   SDValue LHSMask;    // AND value if any.
6369   matchRotateHalf(DAG, LHS, LHSShift, LHSMask);
6370
6371   SDValue RHSShift;   // The shift.
6372   SDValue RHSMask;    // AND value if any.
6373   matchRotateHalf(DAG, RHS, RHSShift, RHSMask);
6374
6375   // If neither side matched a rotate half, bail
6376   if (!LHSShift && !RHSShift)
6377     return nullptr;
6378
6379   // InstCombine may have combined a constant shl, srl, mul, or udiv with one
6380   // side of the rotate, so try to handle that here. In all cases we need to
6381   // pass the matched shift from the opposite side to compute the opcode and
6382   // needed shift amount to extract.  We still want to do this if both sides
6383   // matched a rotate half because one half may be a potential overshift that
6384   // can be broken down (ie if InstCombine merged two shl or srl ops into a
6385   // single one).
6386
6387   // Have LHS side of the rotate, try to extract the needed shift from the RHS.
6388   if (LHSShift)
6389     if (SDValue NewRHSShift =
6390             extractShiftForRotate(DAG, LHSShift, RHS, RHSMask, DL))
6391       RHSShift = NewRHSShift;
6392   // Have RHS side of the rotate, try to extract the needed shift from the LHS.
6393   if (RHSShift)
6394     if (SDValue NewLHSShift =
6395             extractShiftForRotate(DAG, RHSShift, LHS, LHSMask, DL))
6396       LHSShift = NewLHSShift;
6397
6398   // If a side is still missing, nothing else we can do.
6399   if (!RHSShift || !LHSShift)
6400     return nullptr;
6401
6402   // At this point we've matched or extracted a shift op on each side.
6403
6404   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
6405     return nullptr;   // Not shifting the same value.
6406
6407   if (LHSShift.getOpcode() == RHSShift.getOpcode())
6408     return nullptr;   // Shifts must disagree.
6409
6410   // Canonicalize shl to left side in a shl/srl pair.
6411   if (RHSShift.getOpcode() == ISD::SHL) {
6412     std::swap(LHS, RHS);
6413     std::swap(LHSShift, RHSShift);
6414     std::swap(LHSMask, RHSMask);
6415   }
6416
6417   unsigned EltSizeInBits = VT.getScalarSizeInBits();
6418   SDValue LHSShiftArg = LHSShift.getOperand(0);
6419   SDValue LHSShiftAmt = LHSShift.getOperand(1);
6420   SDValue RHSShiftArg = RHSShift.getOperand(0);
6421   SDValue RHSShiftAmt = RHSShift.getOperand(1);
6422
6423   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
6424   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
6425   auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
6426                                         ConstantSDNode *RHS) {
6427     return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
6428   };
6429   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
6430     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
6431                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
6432
6433     // If there is an AND of either shifted operand, apply it to the result.
6434     if (LHSMask.getNode() || RHSMask.getNode()) {
6435       SDValue AllOnes = DAG.getAllOnesConstant(DL, VT);
6436       SDValue Mask = AllOnes;
6437
6438       if (LHSMask.getNode()) {
6439         SDValue RHSBits = DAG.getNode(ISD::SRL, DL, VT, AllOnes, RHSShiftAmt);
6440         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6441                            DAG.getNode(ISD::OR, DL, VT, LHSMask, RHSBits));
6442       }
6443       if (RHSMask.getNode()) {
6444         SDValue LHSBits = DAG.getNode(ISD::SHL, DL, VT, AllOnes, LHSShiftAmt);
6445         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
6446                            DAG.getNode(ISD::OR, DL, VT, RHSMask, LHSBits));
6447       }
6448
6449       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
6450     }
6451
6452     return Rot.getNode();
6453   }
6454
6455   // If there is a mask here, and we have a variable shift, we can't be sure
6456   // that we're masking out the right stuff.
6457   if (LHSMask.getNode() || RHSMask.getNode())
6458     return nullptr;
6459
6460   // If the shift amount is sign/zext/any-extended just peel it off.
6461   SDValue LExtOp0 = LHSShiftAmt;
6462   SDValue RExtOp0 = RHSShiftAmt;
6463   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6464        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6465        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6466        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
6467       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
6468        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
6469        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
6470        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
6471     LExtOp0 = LHSShiftAmt.getOperand(0);
6472     RExtOp0 = RHSShiftAmt.getOperand(0);
6473   }
6474
6475   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
6476                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
6477   if (TryL)
6478     return TryL;
6479
6480   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
6481                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
6482   if (TryR)
6483     return TryR;
6484
6485   return nullptr;
6486 }
6487
6488 namespace {
6489
6490 /// Represents known origin of an individual byte in load combine pattern. The
6491 /// value of the byte is either constant zero or comes from memory.
6492 struct ByteProvider {
6493   // For constant zero providers Load is set to nullptr. For memory providers
6494   // Load represents the node which loads the byte from memory.
6495   // ByteOffset is the offset of the byte in the value produced by the load.
6496   LoadSDNode *Load = nullptr;
6497   unsigned ByteOffset = 0;
6498
6499   ByteProvider() = default;
6500
6501   static ByteProvider getMemory(LoadSDNode *Load, unsigned ByteOffset) {
6502     return ByteProvider(Load, ByteOffset);
6503   }
6504
6505   static ByteProvider getConstantZero() { return ByteProvider(nullptr, 0); }
6506
6507   bool isConstantZero() const { return !Load; }
6508   bool isMemory() const { return Load; }
6509
6510   bool operator==(const ByteProvider &Other) const {
6511     return Other.Load == Load && Other.ByteOffset == ByteOffset;
6512   }
6513
6514 private:
6515   ByteProvider(LoadSDNode *Load, unsigned ByteOffset)
6516       : Load(Load), ByteOffset(ByteOffset) {}
6517 };
6518
6519 } // end anonymous namespace
6520
6521 /// Recursively traverses the expression calculating the origin of the requested
6522 /// byte of the given value. Returns None if the provider can't be calculated.
6523 ///
6524 /// For all the values except the root of the expression verifies that the value
6525 /// has exactly one use and if it's not true return None. This way if the origin
6526 /// of the byte is returned it's guaranteed that the values which contribute to
6527 /// the byte are not used outside of this expression.
6528 ///
6529 /// Because the parts of the expression are not allowed to have more than one
6530 /// use this function iterates over trees, not DAGs. So it never visits the same
6531 /// node more than once.
6532 static const Optional<ByteProvider>
6533 calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
6534                       bool Root = false) {
6535   // Typical i64 by i8 pattern requires recursion up to 8 calls depth
6536   if (Depth == 10)
6537     return None;
6538
6539   if (!Root && !Op.hasOneUse())
6540     return None;
6541
6542   assert(Op.getValueType().isScalarInteger() && "can't handle other types");
6543   unsigned BitWidth = Op.getValueSizeInBits();
6544   if (BitWidth % 8 != 0)
6545     return None;
6546   unsigned ByteWidth = BitWidth / 8;
6547   assert(Index < ByteWidth && "invalid index requested");
6548   (void) ByteWidth;
6549
6550   switch (Op.getOpcode()) {
6551   case ISD::OR: {
6552     auto LHS = calculateByteProvider(Op->getOperand(0), Index, Depth + 1);
6553     if (!LHS)
6554       return None;
6555     auto RHS = calculateByteProvider(Op->getOperand(1), Index, Depth + 1);
6556     if (!RHS)
6557       return None;
6558
6559     if (LHS->isConstantZero())
6560       return RHS;
6561     if (RHS->isConstantZero())
6562       return LHS;
6563     return None;
6564   }
6565   case ISD::SHL: {
6566     auto ShiftOp = dyn_cast<ConstantSDNode>(Op->getOperand(1));
6567     if (!ShiftOp)
6568       return None;
6569
6570     uint64_t BitShift = ShiftOp->getZExtValue();
6571     if (BitShift % 8 != 0)
6572       return None;
6573     uint64_t ByteShift = BitShift / 8;
6574
6575     return Index < ByteShift
6576                ? ByteProvider::getConstantZero()
6577                : calculateByteProvider(Op->getOperand(0), Index - ByteShift,
6578                                        Depth + 1);
6579   }
6580   case ISD::ANY_EXTEND:
6581   case ISD::SIGN_EXTEND:
6582   case ISD::ZERO_EXTEND: {
6583     SDValue NarrowOp = Op->getOperand(0);
6584     unsigned NarrowBitWidth = NarrowOp.getScalarValueSizeInBits();
6585     if (NarrowBitWidth % 8 != 0)
6586       return None;
6587     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6588
6589     if (Index >= NarrowByteWidth)
6590       return Op.getOpcode() == ISD::ZERO_EXTEND
6591                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6592                  : None;
6593     return calculateByteProvider(NarrowOp, Index, Depth + 1);
6594   }
6595   case ISD::BSWAP:
6596     return calculateByteProvider(Op->getOperand(0), ByteWidth - Index - 1,
6597                                  Depth + 1);
6598   case ISD::LOAD: {
6599     auto L = cast<LoadSDNode>(Op.getNode());
6600     if (L->isVolatile() || L->isIndexed())
6601       return None;
6602
6603     unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
6604     if (NarrowBitWidth % 8 != 0)
6605       return None;
6606     uint64_t NarrowByteWidth = NarrowBitWidth / 8;
6607
6608     if (Index >= NarrowByteWidth)
6609       return L->getExtensionType() == ISD::ZEXTLOAD
6610                  ? Optional<ByteProvider>(ByteProvider::getConstantZero())
6611                  : None;
6612     return ByteProvider::getMemory(L, Index);
6613   }
6614   }
6615
6616   return None;
6617 }
6618
6619 static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
6620   return i;
6621 }
6622
6623 static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
6624   return BW - i - 1;
6625 }
6626
6627 // Check if the bytes offsets we are looking at match with either big or
6628 // little endian value loaded. Return true for big endian, false for little
6629 // endian, and None if match failed.
6630 static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
6631                                   int64_t FirstOffset) {
6632   // The endian can be decided only when it is 2 bytes at least.
6633   unsigned Width = ByteOffsets.size();
6634   if (Width < 2)
6635     return None;
6636
6637   bool BigEndian = true, LittleEndian = true;
6638   for (unsigned i = 0; i < Width; i++) {
6639     int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
6640     LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
6641     BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
6642     if (!BigEndian && !LittleEndian)
6643       return None;
6644   }
6645
6646   assert((BigEndian != LittleEndian) && "It should be either big endian or"
6647                                         "little endian");
6648   return BigEndian;
6649 }
6650
6651 static SDValue stripTruncAndExt(SDValue Value) {
6652   switch (Value.getOpcode()) {
6653   case ISD::TRUNCATE:
6654   case ISD::ZERO_EXTEND:
6655   case ISD::SIGN_EXTEND:
6656   case ISD::ANY_EXTEND:
6657     return stripTruncAndExt(Value.getOperand(0));
6658   }
6659   return Value;
6660 }
6661
6662 /// Match a pattern where a wide type scalar value is stored by several narrow
6663 /// stores. Fold it into a single store or a BSWAP and a store if the targets
6664 /// supports it.
6665 ///
6666 /// Assuming little endian target:
6667 ///  i8 *p = ...
6668 ///  i32 val = ...
6669 ///  p[0] = (val >> 0) & 0xFF;
6670 ///  p[1] = (val >> 8) & 0xFF;
6671 ///  p[2] = (val >> 16) & 0xFF;
6672 ///  p[3] = (val >> 24) & 0xFF;
6673 /// =>
6674 ///  *((i32)p) = val;
6675 ///
6676 ///  i8 *p = ...
6677 ///  i32 val = ...
6678 ///  p[0] = (val >> 24) & 0xFF;
6679 ///  p[1] = (val >> 16) & 0xFF;
6680 ///  p[2] = (val >> 8) & 0xFF;
6681 ///  p[3] = (val >> 0) & 0xFF;
6682 /// =>
6683 ///  *((i32)p) = BSWAP(val);
6684 SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
6685   // Collect all the stores in the chain.
6686   SDValue Chain;
6687   SmallVector<StoreSDNode *, 8> Stores;
6688   for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
6689     if (Store->getMemoryVT() != MVT::i8 ||
6690         Store->isVolatile() || Store->isIndexed())
6691       return SDValue();
6692     Stores.push_back(Store);
6693     Chain = Store->getChain();
6694   }
6695   // Handle the simple type only.
6696   unsigned Width = Stores.size();
6697   EVT VT = EVT::getIntegerVT(
6698     *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
6699   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6700     return SDValue();
6701
6702   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6703   if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
6704     return SDValue();
6705
6706   // Check if all the bytes of the combined value we are looking at are stored
6707   // to the same base address. Collect bytes offsets from Base address into
6708   // ByteOffsets.
6709   SDValue CombinedValue;
6710   SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
6711   int64_t FirstOffset = INT64_MAX;
6712   StoreSDNode *FirstStore = nullptr;
6713   Optional<BaseIndexOffset> Base;
6714   for (auto Store : Stores) {
6715     // All the stores store different byte of the CombinedValue. A truncate is
6716     // required to get that byte value.
6717     SDValue Trunc = Store->getValue();
6718     if (Trunc.getOpcode() != ISD::TRUNCATE)
6719       return SDValue();
6720     // A shift operation is required to get the right byte offset, except the
6721     // first byte.
6722     int64_t Offset = 0;
6723     SDValue Value = Trunc.getOperand(0);
6724     if (Value.getOpcode() == ISD::SRL ||
6725         Value.getOpcode() == ISD::SRA) {
6726       ConstantSDNode *ShiftOffset =
6727         dyn_cast<ConstantSDNode>(Value.getOperand(1));
6728       // Trying to match the following pattern. The shift offset must be
6729       // a constant and a multiple of 8. It is the byte offset in "y".
6730       //
6731       // x = srl y, offset
6732       // i8 z = trunc x
6733       // store z, ...
6734       if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
6735         return SDValue();
6736
6737      Offset = ShiftOffset->getSExtValue()/8;
6738      Value = Value.getOperand(0);
6739     }
6740
6741     // Stores must share the same combined value with different offsets.
6742     if (!CombinedValue)
6743       CombinedValue = Value;
6744     else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
6745       return SDValue();
6746
6747     // The trunc and all the extend operation should be stripped to get the
6748     // real value we are stored.
6749     else if (CombinedValue.getValueType() != VT) {
6750       if (Value.getValueType() == VT ||
6751           Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
6752         CombinedValue = Value;
6753       // Give up if the combined value type is smaller than the store size.
6754       if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
6755         return SDValue();
6756     }
6757
6758     // Stores must share the same base address
6759     BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
6760     int64_t ByteOffsetFromBase = 0;
6761     if (!Base)
6762       Base = Ptr;
6763     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6764       return SDValue();
6765
6766     // Remember the first byte store
6767     if (ByteOffsetFromBase < FirstOffset) {
6768       FirstStore = Store;
6769       FirstOffset = ByteOffsetFromBase;
6770     }
6771     // Map the offset in the store and the offset in the combined value, and
6772     // early return if it has been set before.
6773     if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
6774       return SDValue();
6775     ByteOffsets[Offset] = ByteOffsetFromBase;
6776   }
6777
6778   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6779   assert(FirstStore && "First store must be set");
6780
6781   // Check if the bytes of the combined value we are looking at match with
6782   // either big or little endian value store.
6783   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6784   if (!IsBigEndian.hasValue())
6785     return SDValue();
6786
6787   // The node we are looking at matches with the pattern, check if we can
6788   // replace it with a single bswap if needed and store.
6789
6790   // If the store needs byte swap check if the target supports it
6791   bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
6792
6793   // Before legalize we can introduce illegal bswaps which will be later
6794   // converted to an explicit bswap sequence. This way we end up with a single
6795   // store and byte shuffling instead of several stores and byte shuffling.
6796   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6797     return SDValue();
6798
6799   // Check that a store of the wide type is both allowed and fast on the target
6800   bool Fast = false;
6801   bool Allowed =
6802       TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
6803                              *FirstStore->getMemOperand(), &Fast);
6804   if (!Allowed || !Fast)
6805     return SDValue();
6806
6807   if (VT != CombinedValue.getValueType()) {
6808     assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
6809            "Get unexpected store value to combine");
6810     CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
6811                              CombinedValue);
6812   }
6813
6814   if (NeedsBswap)
6815     CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
6816
6817   SDValue NewStore =
6818     DAG.getStore(Chain, SDLoc(N),  CombinedValue, FirstStore->getBasePtr(),
6819                  FirstStore->getPointerInfo(), FirstStore->getAlignment());
6820
6821   // Rely on other DAG combine rules to remove the other individual stores.
6822   DAG.ReplaceAllUsesWith(N, NewStore.getNode());
6823   return NewStore;
6824 }
6825
6826 /// Match a pattern where a wide type scalar value is loaded by several narrow
6827 /// loads and combined by shifts and ors. Fold it into a single load or a load
6828 /// and a BSWAP if the targets supports it.
6829 ///
6830 /// Assuming little endian target:
6831 ///  i8 *a = ...
6832 ///  i32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
6833 /// =>
6834 ///  i32 val = *((i32)a)
6835 ///
6836 ///  i8 *a = ...
6837 ///  i32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
6838 /// =>
6839 ///  i32 val = BSWAP(*((i32)a))
6840 ///
6841 /// TODO: This rule matches complex patterns with OR node roots and doesn't
6842 /// interact well with the worklist mechanism. When a part of the pattern is
6843 /// updated (e.g. one of the loads) its direct users are put into the worklist,
6844 /// but the root node of the pattern which triggers the load combine is not
6845 /// necessarily a direct user of the changed node. For example, once the address
6846 /// of t28 load is reassociated load combine won't be triggered:
6847 ///             t25: i32 = add t4, Constant:i32<2>
6848 ///           t26: i64 = sign_extend t25
6849 ///        t27: i64 = add t2, t26
6850 ///       t28: i8,ch = load<LD1[%tmp9]> t0, t27, undef:i64
6851 ///     t29: i32 = zero_extend t28
6852 ///   t32: i32 = shl t29, Constant:i8<8>
6853 /// t33: i32 = or t23, t32
6854 /// As a possible fix visitLoad can check if the load can be a part of a load
6855 /// combine pattern and add corresponding OR roots to the worklist.
6856 SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
6857   assert(N->getOpcode() == ISD::OR &&
6858          "Can only match load combining against OR nodes");
6859
6860   // Handles simple types only
6861   EVT VT = N->getValueType(0);
6862   if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
6863     return SDValue();
6864   unsigned ByteWidth = VT.getSizeInBits() / 8;
6865
6866   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
6867   // Before legalize we can introduce too wide illegal loads which will be later
6868   // split into legal sized loads. This enables us to combine i64 load by i8
6869   // patterns to a couple of i32 loads on 32 bit targets.
6870   if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
6871     return SDValue();
6872
6873   bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
6874   auto MemoryByteOffset = [&] (ByteProvider P) {
6875     assert(P.isMemory() && "Must be a memory byte provider");
6876     unsigned LoadBitWidth = P.Load->getMemoryVT().getSizeInBits();
6877     assert(LoadBitWidth % 8 == 0 &&
6878            "can only analyze providers for individual bytes not bit");
6879     unsigned LoadByteWidth = LoadBitWidth / 8;
6880     return IsBigEndianTarget
6881             ? BigEndianByteAt(LoadByteWidth, P.ByteOffset)
6882             : LittleEndianByteAt(LoadByteWidth, P.ByteOffset);
6883   };
6884
6885   Optional<BaseIndexOffset> Base;
6886   SDValue Chain;
6887
6888   SmallPtrSet<LoadSDNode *, 8> Loads;
6889   Optional<ByteProvider> FirstByteProvider;
6890   int64_t FirstOffset = INT64_MAX;
6891
6892   // Check if all the bytes of the OR we are looking at are loaded from the same
6893   // base address. Collect bytes offsets from Base address in ByteOffsets.
6894   SmallVector<int64_t, 4> ByteOffsets(ByteWidth);
6895   for (unsigned i = 0; i < ByteWidth; i++) {
6896     auto P = calculateByteProvider(SDValue(N, 0), i, 0, /*Root=*/true);
6897     if (!P || !P->isMemory()) // All the bytes must be loaded from memory
6898       return SDValue();
6899
6900     LoadSDNode *L = P->Load;
6901     assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
6902            "Must be enforced by calculateByteProvider");
6903     assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
6904
6905     // All loads must share the same chain
6906     SDValue LChain = L->getChain();
6907     if (!Chain)
6908       Chain = LChain;
6909     else if (Chain != LChain)
6910       return SDValue();
6911
6912     // Loads must share the same base address
6913     BaseIndexOffset Ptr = BaseIndexOffset::match(L, DAG);
6914     int64_t ByteOffsetFromBase = 0;
6915     if (!Base)
6916       Base = Ptr;
6917     else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
6918       return SDValue();
6919
6920     // Calculate the offset of the current byte from the base address
6921     ByteOffsetFromBase += MemoryByteOffset(*P);
6922     ByteOffsets[i] = ByteOffsetFromBase;
6923
6924     // Remember the first byte load
6925     if (ByteOffsetFromBase < FirstOffset) {
6926       FirstByteProvider = P;
6927       FirstOffset = ByteOffsetFromBase;
6928     }
6929
6930     Loads.insert(L);
6931   }
6932   assert(!Loads.empty() && "All the bytes of the value must be loaded from "
6933          "memory, so there must be at least one load which produces the value");
6934   assert(Base && "Base address of the accessed memory location must be set");
6935   assert(FirstOffset != INT64_MAX && "First byte offset must be set");
6936
6937   // Check if the bytes of the OR we are looking at match with either big or
6938   // little endian value load
6939   Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
6940   if (!IsBigEndian.hasValue())
6941     return SDValue();
6942
6943   assert(FirstByteProvider && "must be set");
6944
6945   // Ensure that the first byte is loaded from zero offset of the first load.
6946   // So the combined value can be loaded from the first load address.
6947   if (MemoryByteOffset(*FirstByteProvider) != 0)
6948     return SDValue();
6949   LoadSDNode *FirstLoad = FirstByteProvider->Load;
6950
6951   // The node we are looking at matches with the pattern, check if we can
6952   // replace it with a single load and bswap if needed.
6953
6954   // If the load needs byte swap check if the target supports it
6955   bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
6956
6957   // Before legalize we can introduce illegal bswaps which will be later
6958   // converted to an explicit bswap sequence. This way we end up with a single
6959   // load and byte shuffling instead of several loads and byte shuffling.
6960   if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
6961     return SDValue();
6962
6963   // Check that a load of the wide type is both allowed and fast on the target
6964   bool Fast = false;
6965   bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
6966                                         VT, *FirstLoad->getMemOperand(), &Fast);
6967   if (!Allowed || !Fast)
6968     return SDValue();
6969
6970   SDValue NewLoad =
6971       DAG.getLoad(VT, SDLoc(N), Chain, FirstLoad->getBasePtr(),
6972                   FirstLoad->getPointerInfo(), FirstLoad->getAlignment());
6973
6974   // Transfer chain users from old loads to the new load.
6975   for (LoadSDNode *L : Loads)
6976     DAG.ReplaceAllUsesOfValueWith(SDValue(L, 1), SDValue(NewLoad.getNode(), 1));
6977
6978   return NeedsBswap ? DAG.getNode(ISD::BSWAP, SDLoc(N), VT, NewLoad) : NewLoad;
6979 }
6980
6981 // If the target has andn, bsl, or a similar bit-select instruction,
6982 // we want to unfold masked merge, with canonical pattern of:
6983 //   |        A  |  |B|
6984 //   ((x ^ y) & m) ^ y
6985 //    |  D  |
6986 // Into:
6987 //   (x & m) | (y & ~m)
6988 // If y is a constant, and the 'andn' does not work with immediates,
6989 // we unfold into a different pattern:
6990 //   ~(~x & m) & (m | y)
6991 // NOTE: we don't unfold the pattern if 'xor' is actually a 'not', because at
6992 //       the very least that breaks andnpd / andnps patterns, and because those
6993 //       patterns are simplified in IR and shouldn't be created in the DAG
6994 SDValue DAGCombiner::unfoldMaskedMerge(SDNode *N) {
6995   assert(N->getOpcode() == ISD::XOR);
6996
6997   // Don't touch 'not' (i.e. where y = -1).
6998   if (isAllOnesOrAllOnesSplat(N->getOperand(1)))
6999     return SDValue();
7000
7001   EVT VT = N->getValueType(0);
7002
7003   // There are 3 commutable operators in the pattern,
7004   // so we have to deal with 8 possible variants of the basic pattern.
7005   SDValue X, Y, M;
7006   auto matchAndXor = [&X, &Y, &M](SDValue And, unsigned XorIdx, SDValue Other) {
7007     if (And.getOpcode() != ISD::AND || !And.hasOneUse())
7008       return false;
7009     SDValue Xor = And.getOperand(XorIdx);
7010     if (Xor.getOpcode() != ISD::XOR || !Xor.hasOneUse())
7011       return false;
7012     SDValue Xor0 = Xor.getOperand(0);
7013     SDValue Xor1 = Xor.getOperand(1);
7014     // Don't touch 'not' (i.e. where y = -1).
7015     if (isAllOnesOrAllOnesSplat(Xor1))
7016       return false;
7017     if (Other == Xor0)
7018       std::swap(Xor0, Xor1);
7019     if (Other != Xor1)
7020       return false;
7021     X = Xor0;
7022     Y = Xor1;
7023     M = And.getOperand(XorIdx ? 0 : 1);
7024     return true;
7025   };
7026
7027   SDValue N0 = N->getOperand(0);
7028   SDValue N1 = N->getOperand(1);
7029   if (!matchAndXor(N0, 0, N1) && !matchAndXor(N0, 1, N1) &&
7030       !matchAndXor(N1, 0, N0) && !matchAndXor(N1, 1, N0))
7031     return SDValue();
7032
7033   // Don't do anything if the mask is constant. This should not be reachable.
7034   // InstCombine should have already unfolded this pattern, and DAGCombiner
7035   // probably shouldn't produce it, too.
7036   if (isa<ConstantSDNode>(M.getNode()))
7037     return SDValue();
7038
7039   // We can transform if the target has AndNot
7040   if (!TLI.hasAndNot(M))
7041     return SDValue();
7042
7043   SDLoc DL(N);
7044
7045   // If Y is a constant, check that 'andn' works with immediates.
7046   if (!TLI.hasAndNot(Y)) {
7047     assert(TLI.hasAndNot(X) && "Only mask is a variable? Unreachable.");
7048     // If not, we need to do a bit more work to make sure andn is still used.
7049     SDValue NotX = DAG.getNOT(DL, X, VT);
7050     SDValue LHS = DAG.getNode(ISD::AND, DL, VT, NotX, M);
7051     SDValue NotLHS = DAG.getNOT(DL, LHS, VT);
7052     SDValue RHS = DAG.getNode(ISD::OR, DL, VT, M, Y);
7053     return DAG.getNode(ISD::AND, DL, VT, NotLHS, RHS);
7054   }
7055
7056   SDValue LHS = DAG.getNode(ISD::AND, DL, VT, X, M);
7057   SDValue NotM = DAG.getNOT(DL, M, VT);
7058   SDValue RHS = DAG.getNode(ISD::AND, DL, VT, Y, NotM);
7059
7060   return DAG.getNode(ISD::OR, DL, VT, LHS, RHS);
7061 }
7062
7063 SDValue DAGCombiner::visitXOR(SDNode *N) {
7064   SDValue N0 = N->getOperand(0);
7065   SDValue N1 = N->getOperand(1);
7066   EVT VT = N0.getValueType();
7067
7068   // fold vector ops
7069   if (VT.isVector()) {
7070     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7071       return FoldedVOp;
7072
7073     // fold (xor x, 0) -> x, vector edition
7074     if (ISD::isBuildVectorAllZeros(N0.getNode()))
7075       return N1;
7076     if (ISD::isBuildVectorAllZeros(N1.getNode()))
7077       return N0;
7078   }
7079
7080   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
7081   SDLoc DL(N);
7082   if (N0.isUndef() && N1.isUndef())
7083     return DAG.getConstant(0, DL, VT);
7084   // fold (xor x, undef) -> undef
7085   if (N0.isUndef())
7086     return N0;
7087   if (N1.isUndef())
7088     return N1;
7089   // fold (xor c1, c2) -> c1^c2
7090   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7091   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
7092   if (N0C && N1C)
7093     return DAG.FoldConstantArithmetic(ISD::XOR, DL, VT, N0C, N1C);
7094   // canonicalize constant to RHS
7095   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
7096      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
7097     return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
7098   // fold (xor x, 0) -> x
7099   if (isNullConstant(N1))
7100     return N0;
7101
7102   if (SDValue NewSel = foldBinOpIntoSelect(N))
7103     return NewSel;
7104
7105   // reassociate xor
7106   if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
7107     return RXOR;
7108
7109   // fold !(x cc y) -> (x !cc y)
7110   unsigned N0Opcode = N0.getOpcode();
7111   SDValue LHS, RHS, CC;
7112   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
7113     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
7114                                                LHS.getValueType().isInteger());
7115     if (!LegalOperations ||
7116         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
7117       switch (N0Opcode) {
7118       default:
7119         llvm_unreachable("Unhandled SetCC Equivalent!");
7120       case ISD::SETCC:
7121         return DAG.getSetCC(SDLoc(N0), VT, LHS, RHS, NotCC);
7122       case ISD::SELECT_CC:
7123         return DAG.getSelectCC(SDLoc(N0), LHS, RHS, N0.getOperand(2),
7124                                N0.getOperand(3), NotCC);
7125       }
7126     }
7127   }
7128
7129   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
7130   if (isOneConstant(N1) && N0Opcode == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7131       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
7132     SDValue V = N0.getOperand(0);
7133     SDLoc DL0(N0);
7134     V = DAG.getNode(ISD::XOR, DL0, V.getValueType(), V,
7135                     DAG.getConstant(1, DL0, V.getValueType()));
7136     AddToWorklist(V.getNode());
7137     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, V);
7138   }
7139
7140   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
7141   if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
7142       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7143     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7144     if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
7145       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7146       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7147       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7148       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7149       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7150     }
7151   }
7152   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
7153   if (isAllOnesConstant(N1) && N0.hasOneUse() &&
7154       (N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
7155     SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
7156     if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
7157       unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
7158       N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
7159       N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
7160       AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
7161       return DAG.getNode(NewOpcode, DL, VT, N00, N01);
7162     }
7163   }
7164
7165   // fold (not (neg x)) -> (add X, -1)
7166   // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
7167   // Y is a constant or the subtract has a single use.
7168   if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
7169       isNullConstant(N0.getOperand(0))) {
7170     return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
7171                        DAG.getAllOnesConstant(DL, VT));
7172   }
7173
7174   // fold (xor (and x, y), y) -> (and (not x), y)
7175   if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
7176     SDValue X = N0.getOperand(0);
7177     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
7178     AddToWorklist(NotX.getNode());
7179     return DAG.getNode(ISD::AND, DL, VT, NotX, N1);
7180   }
7181
7182   if ((N0Opcode == ISD::SRL || N0Opcode == ISD::SHL) && N0.hasOneUse()) {
7183     ConstantSDNode *XorC = isConstOrConstSplat(N1);
7184     ConstantSDNode *ShiftC = isConstOrConstSplat(N0.getOperand(1));
7185     unsigned BitWidth = VT.getScalarSizeInBits();
7186     if (XorC && ShiftC) {
7187       // Don't crash on an oversized shift. We can not guarantee that a bogus
7188       // shift has been simplified to undef.
7189       uint64_t ShiftAmt = ShiftC->getLimitedValue();
7190       if (ShiftAmt < BitWidth) {
7191         APInt Ones = APInt::getAllOnesValue(BitWidth);
7192         Ones = N0Opcode == ISD::SHL ? Ones.shl(ShiftAmt) : Ones.lshr(ShiftAmt);
7193         if (XorC->getAPIntValue() == Ones) {
7194           // If the xor constant is a shifted -1, do a 'not' before the shift:
7195           // xor (X << ShiftC), XorC --> (not X) << ShiftC
7196           // xor (X >> ShiftC), XorC --> (not X) >> ShiftC
7197           SDValue Not = DAG.getNOT(DL, N0.getOperand(0), VT);
7198           return DAG.getNode(N0Opcode, DL, VT, Not, N0.getOperand(1));
7199         }
7200       }
7201     }
7202   }
7203
7204   // fold Y = sra (X, size(X)-1); xor (add (X, Y), Y) -> (abs X)
7205   if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
7206     SDValue A = N0Opcode == ISD::ADD ? N0 : N1;
7207     SDValue S = N0Opcode == ISD::SRA ? N0 : N1;
7208     if (A.getOpcode() == ISD::ADD && S.getOpcode() == ISD::SRA) {
7209       SDValue A0 = A.getOperand(0), A1 = A.getOperand(1);
7210       SDValue S0 = S.getOperand(0);
7211       if ((A0 == S && A1 == S0) || (A1 == S && A0 == S0)) {
7212         unsigned OpSizeInBits = VT.getScalarSizeInBits();
7213         if (ConstantSDNode *C = isConstOrConstSplat(S.getOperand(1)))
7214           if (C->getAPIntValue() == (OpSizeInBits - 1))
7215             return DAG.getNode(ISD::ABS, DL, VT, S0);
7216       }
7217     }
7218   }
7219
7220   // fold (xor x, x) -> 0
7221   if (N0 == N1)
7222     return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations);
7223
7224   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
7225   // Here is a concrete example of this equivalence:
7226   // i16   x ==  14
7227   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
7228   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
7229   //
7230   // =>
7231   //
7232   // i16     ~1      == 0b1111111111111110
7233   // i16 rol(~1, 14) == 0b1011111111111111
7234   //
7235   // Some additional tips to help conceptualize this transform:
7236   // - Try to see the operation as placing a single zero in a value of all ones.
7237   // - There exists no value for x which would allow the result to contain zero.
7238   // - Values of x larger than the bitwidth are undefined and do not require a
7239   //   consistent result.
7240   // - Pushing the zero left requires shifting one bits in from the right.
7241   // A rotate left of ~1 is a nice way of achieving the desired result.
7242   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0Opcode == ISD::SHL &&
7243       isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
7244     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
7245                        N0.getOperand(1));
7246   }
7247
7248   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
7249   if (N0Opcode == N1.getOpcode())
7250     if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
7251       return V;
7252
7253   // Unfold  ((x ^ y) & m) ^ y  into  (x & m) | (y & ~m)  if profitable
7254   if (SDValue MM = unfoldMaskedMerge(N))
7255     return MM;
7256
7257   // Simplify the expression using non-local knowledge.
7258   if (SimplifyDemandedBits(SDValue(N, 0)))
7259     return SDValue(N, 0);
7260
7261   return SDValue();
7262 }
7263
7264 /// If we have a shift-by-constant of a bitwise logic op that itself has a
7265 /// shift-by-constant operand with identical opcode, we may be able to convert
7266 /// that into 2 independent shifts followed by the logic op. This is a
7267 /// throughput improvement.
7268 static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
7269   // Match a one-use bitwise logic op.
7270   SDValue LogicOp = Shift->getOperand(0);
7271   if (!LogicOp.hasOneUse())
7272     return SDValue();
7273
7274   unsigned LogicOpcode = LogicOp.getOpcode();
7275   if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
7276       LogicOpcode != ISD::XOR)
7277     return SDValue();
7278
7279   // Find a matching one-use shift by constant.
7280   unsigned ShiftOpcode = Shift->getOpcode();
7281   SDValue C1 = Shift->getOperand(1);
7282   ConstantSDNode *C1Node = isConstOrConstSplat(C1);
7283   assert(C1Node && "Expected a shift with constant operand");
7284   const APInt &C1Val = C1Node->getAPIntValue();
7285   auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
7286                              const APInt *&ShiftAmtVal) {
7287     if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
7288       return false;
7289
7290     ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
7291     if (!ShiftCNode)
7292       return false;
7293
7294     // Capture the shifted operand and shift amount value.
7295     ShiftOp = V.getOperand(0);
7296     ShiftAmtVal = &ShiftCNode->getAPIntValue();
7297
7298     // Shift amount types do not have to match their operand type, so check that
7299     // the constants are the same width.
7300     if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
7301       return false;
7302
7303     // The fold is not valid if the sum of the shift values exceeds bitwidth.
7304     if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
7305       return false;
7306
7307     return true;
7308   };
7309
7310   // Logic ops are commutative, so check each operand for a match.
7311   SDValue X, Y;
7312   const APInt *C0Val;
7313   if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
7314     Y = LogicOp.getOperand(1);
7315   else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
7316     Y = LogicOp.getOperand(0);
7317   else
7318     return SDValue();
7319
7320   // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
7321   SDLoc DL(Shift);
7322   EVT VT = Shift->getValueType(0);
7323   EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
7324   SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
7325   SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
7326   SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
7327   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
7328 }
7329
7330 /// Handle transforms common to the three shifts, when the shift amount is a
7331 /// constant.
7332 /// We are looking for: (shift being one of shl/sra/srl)
7333 ///   shift (binop X, C0), C1
7334 /// And want to transform into:
7335 ///   binop (shift X, C1), (shift C0, C1)
7336 SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
7337   assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
7338
7339   // Do not turn a 'not' into a regular xor.
7340   if (isBitwiseNot(N->getOperand(0)))
7341     return SDValue();
7342
7343   // The inner binop must be one-use, since we want to replace it.
7344   SDValue LHS = N->getOperand(0);
7345   if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
7346     return SDValue();
7347
7348   // TODO: This is limited to early combining because it may reveal regressions
7349   //       otherwise. But since we just checked a target hook to see if this is
7350   //       desirable, that should have filtered out cases where this interferes
7351   //       with some other pattern matching.
7352   if (!LegalTypes)
7353     if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
7354       return R;
7355
7356   // We want to pull some binops through shifts, so that we have (and (shift))
7357   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
7358   // thing happens with address calculations, so it's important to canonicalize
7359   // it.
7360   switch (LHS.getOpcode()) {
7361   default:
7362     return SDValue();
7363   case ISD::OR:
7364   case ISD::XOR:
7365   case ISD::AND:
7366     break;
7367   case ISD::ADD:
7368     if (N->getOpcode() != ISD::SHL)
7369       return SDValue(); // only shl(add) not sr[al](add).
7370     break;
7371   }
7372
7373   // We require the RHS of the binop to be a constant and not opaque as well.
7374   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
7375   if (!BinOpCst)
7376     return SDValue();
7377
7378   // FIXME: disable this unless the input to the binop is a shift by a constant
7379   // or is copy/select. Enable this in other cases when figure out it's exactly
7380   // profitable.
7381   SDValue BinOpLHSVal = LHS.getOperand(0);
7382   bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
7383                             BinOpLHSVal.getOpcode() == ISD::SRA ||
7384                             BinOpLHSVal.getOpcode() == ISD::SRL) &&
7385                            isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
7386   bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
7387                         BinOpLHSVal.getOpcode() == ISD::SELECT;
7388
7389   if (!IsShiftByConstant && !IsCopyOrSelect)
7390     return SDValue();
7391
7392   if (IsCopyOrSelect && N->hasOneUse())
7393     return SDValue();
7394
7395   // Fold the constants, shifting the binop RHS by the shift amount.
7396   SDLoc DL(N);
7397   EVT VT = N->getValueType(0);
7398   SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
7399                                N->getOperand(1));
7400   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
7401
7402   SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
7403                                  N->getOperand(1));
7404   return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
7405 }
7406
7407 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
7408   assert(N->getOpcode() == ISD::TRUNCATE);
7409   assert(N->getOperand(0).getOpcode() == ISD::AND);
7410
7411   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
7412   EVT TruncVT = N->getValueType(0);
7413   if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
7414       TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
7415     SDValue N01 = N->getOperand(0).getOperand(1);
7416     if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
7417       SDLoc DL(N);
7418       SDValue N00 = N->getOperand(0).getOperand(0);
7419       SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
7420       SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
7421       AddToWorklist(Trunc00.getNode());
7422       AddToWorklist(Trunc01.getNode());
7423       return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
7424     }
7425   }
7426
7427   return SDValue();
7428 }
7429
7430 SDValue DAGCombiner::visitRotate(SDNode *N) {
7431   SDLoc dl(N);
7432   SDValue N0 = N->getOperand(0);
7433   SDValue N1 = N->getOperand(1);
7434   EVT VT = N->getValueType(0);
7435   unsigned Bitsize = VT.getScalarSizeInBits();
7436
7437   // fold (rot x, 0) -> x
7438   if (isNullOrNullSplat(N1))
7439     return N0;
7440
7441   // fold (rot x, c) -> x iff (c % BitSize) == 0
7442   if (isPowerOf2_32(Bitsize) && Bitsize > 1) {
7443     APInt ModuloMask(N1.getScalarValueSizeInBits(), Bitsize - 1);
7444     if (DAG.MaskedValueIsZero(N1, ModuloMask))
7445       return N0;
7446   }
7447
7448   // fold (rot x, c) -> (rot x, c % BitSize)
7449   // TODO - support non-uniform vector amounts.
7450   if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
7451     if (Cst->getAPIntValue().uge(Bitsize)) {
7452       uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
7453       return DAG.getNode(N->getOpcode(), dl, VT, N0,
7454                          DAG.getConstant(RotAmt, dl, N1.getValueType()));
7455     }
7456   }
7457
7458   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
7459   if (N1.getOpcode() == ISD::TRUNCATE &&
7460       N1.getOperand(0).getOpcode() == ISD::AND) {
7461     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7462       return DAG.getNode(N->getOpcode(), dl, VT, N0, NewOp1);
7463   }
7464
7465   unsigned NextOp = N0.getOpcode();
7466   // fold (rot* (rot* x, c2), c1) -> (rot* x, c1 +- c2 % bitsize)
7467   if (NextOp == ISD::ROTL || NextOp == ISD::ROTR) {
7468     SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N1);
7469     SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1));
7470     if (C1 && C2 && C1->getValueType(0) == C2->getValueType(0)) {
7471       EVT ShiftVT = C1->getValueType(0);
7472       bool SameSide = (N->getOpcode() == NextOp);
7473       unsigned CombineOp = SameSide ? ISD::ADD : ISD::SUB;
7474       if (SDValue CombinedShift =
7475               DAG.FoldConstantArithmetic(CombineOp, dl, ShiftVT, C1, C2)) {
7476         SDValue BitsizeC = DAG.getConstant(Bitsize, dl, ShiftVT);
7477         SDValue CombinedShiftNorm = DAG.FoldConstantArithmetic(
7478             ISD::SREM, dl, ShiftVT, CombinedShift.getNode(),
7479             BitsizeC.getNode());
7480         return DAG.getNode(N->getOpcode(), dl, VT, N0->getOperand(0),
7481                            CombinedShiftNorm);
7482       }
7483     }
7484   }
7485   return SDValue();
7486 }
7487
7488 SDValue DAGCombiner::visitSHL(SDNode *N) {
7489   SDValue N0 = N->getOperand(0);
7490   SDValue N1 = N->getOperand(1);
7491   if (SDValue V = DAG.simplifyShift(N0, N1))
7492     return V;
7493
7494   EVT VT = N0.getValueType();
7495   EVT ShiftVT = N1.getValueType();
7496   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7497
7498   // fold vector ops
7499   if (VT.isVector()) {
7500     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7501       return FoldedVOp;
7502
7503     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
7504     // If setcc produces all-one true value then:
7505     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
7506     if (N1CV && N1CV->isConstant()) {
7507       if (N0.getOpcode() == ISD::AND) {
7508         SDValue N00 = N0->getOperand(0);
7509         SDValue N01 = N0->getOperand(1);
7510         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
7511
7512         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
7513             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
7514                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
7515           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
7516                                                      N01CV, N1CV))
7517             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
7518         }
7519       }
7520     }
7521   }
7522
7523   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7524
7525   // fold (shl c1, c2) -> c1<<c2
7526   // TODO - support non-uniform vector shift amounts.
7527   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7528   if (N0C && N1C && !N1C->isOpaque())
7529     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
7530
7531   if (SDValue NewSel = foldBinOpIntoSelect(N))
7532     return NewSel;
7533
7534   // if (shl x, c) is known to be zero, return 0
7535   if (DAG.MaskedValueIsZero(SDValue(N, 0),
7536                             APInt::getAllOnesValue(OpSizeInBits)))
7537     return DAG.getConstant(0, SDLoc(N), VT);
7538
7539   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
7540   if (N1.getOpcode() == ISD::TRUNCATE &&
7541       N1.getOperand(0).getOpcode() == ISD::AND) {
7542     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7543       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
7544   }
7545
7546   // TODO - support non-uniform vector shift amounts.
7547   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7548     return SDValue(N, 0);
7549
7550   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
7551   if (N0.getOpcode() == ISD::SHL) {
7552     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7553                                           ConstantSDNode *RHS) {
7554       APInt c1 = LHS->getAPIntValue();
7555       APInt c2 = RHS->getAPIntValue();
7556       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7557       return (c1 + c2).uge(OpSizeInBits);
7558     };
7559     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7560       return DAG.getConstant(0, SDLoc(N), VT);
7561
7562     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7563                                        ConstantSDNode *RHS) {
7564       APInt c1 = LHS->getAPIntValue();
7565       APInt c2 = RHS->getAPIntValue();
7566       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7567       return (c1 + c2).ult(OpSizeInBits);
7568     };
7569     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7570       SDLoc DL(N);
7571       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7572       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
7573     }
7574   }
7575
7576   // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
7577   // For this to be valid, the second form must not preserve any of the bits
7578   // that are shifted out by the inner shift in the first form.  This means
7579   // the outer shift size must be >= the number of bits added by the ext.
7580   // As a corollary, we don't care what kind of ext it is.
7581   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
7582        N0.getOpcode() == ISD::ANY_EXTEND ||
7583        N0.getOpcode() == ISD::SIGN_EXTEND) &&
7584       N0.getOperand(0).getOpcode() == ISD::SHL) {
7585     SDValue N0Op0 = N0.getOperand(0);
7586     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7587     EVT InnerVT = N0Op0.getValueType();
7588     uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
7589
7590     auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7591                                                          ConstantSDNode *RHS) {
7592       APInt c1 = LHS->getAPIntValue();
7593       APInt c2 = RHS->getAPIntValue();
7594       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7595       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7596              (c1 + c2).uge(OpSizeInBits);
7597     };
7598     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
7599                                   /*AllowUndefs*/ false,
7600                                   /*AllowTypeMismatch*/ true))
7601       return DAG.getConstant(0, SDLoc(N), VT);
7602
7603     auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
7604                                                       ConstantSDNode *RHS) {
7605       APInt c1 = LHS->getAPIntValue();
7606       APInt c2 = RHS->getAPIntValue();
7607       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7608       return c2.uge(OpSizeInBits - InnerBitwidth) &&
7609              (c1 + c2).ult(OpSizeInBits);
7610     };
7611     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
7612                                   /*AllowUndefs*/ false,
7613                                   /*AllowTypeMismatch*/ true)) {
7614       SDLoc DL(N);
7615       SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
7616       SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
7617       Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
7618       return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
7619     }
7620   }
7621
7622   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
7623   // Only fold this if the inner zext has no other uses to avoid increasing
7624   // the total number of instructions.
7625   if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
7626       N0.getOperand(0).getOpcode() == ISD::SRL) {
7627     SDValue N0Op0 = N0.getOperand(0);
7628     SDValue InnerShiftAmt = N0Op0.getOperand(1);
7629
7630     auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7631       APInt c1 = LHS->getAPIntValue();
7632       APInt c2 = RHS->getAPIntValue();
7633       zeroExtendToMatch(c1, c2);
7634       return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
7635     };
7636     if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
7637                                   /*AllowUndefs*/ false,
7638                                   /*AllowTypeMismatch*/ true)) {
7639       SDLoc DL(N);
7640       EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
7641       SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
7642       NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
7643       AddToWorklist(NewSHL.getNode());
7644       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
7645     }
7646   }
7647
7648   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
7649   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
7650   // TODO - support non-uniform vector shift amounts.
7651   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
7652       N0->getFlags().hasExact()) {
7653     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7654       uint64_t C1 = N0C1->getZExtValue();
7655       uint64_t C2 = N1C->getZExtValue();
7656       SDLoc DL(N);
7657       if (C1 <= C2)
7658         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7659                            DAG.getConstant(C2 - C1, DL, ShiftVT));
7660       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
7661                          DAG.getConstant(C1 - C2, DL, ShiftVT));
7662     }
7663   }
7664
7665   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
7666   //                               (and (srl x, (sub c1, c2), MASK)
7667   // Only fold this if the inner shift has no other uses -- if it does, folding
7668   // this will increase the total number of instructions.
7669   // TODO - drop hasOneUse requirement if c1 == c2?
7670   // TODO - support non-uniform vector shift amounts.
7671   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
7672       TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
7673     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
7674       if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
7675         uint64_t c1 = N0C1->getZExtValue();
7676         uint64_t c2 = N1C->getZExtValue();
7677         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
7678         SDValue Shift;
7679         if (c2 > c1) {
7680           Mask <<= c2 - c1;
7681           SDLoc DL(N);
7682           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
7683                               DAG.getConstant(c2 - c1, DL, ShiftVT));
7684         } else {
7685           Mask.lshrInPlace(c1 - c2);
7686           SDLoc DL(N);
7687           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
7688                               DAG.getConstant(c1 - c2, DL, ShiftVT));
7689         }
7690         SDLoc DL(N0);
7691         return DAG.getNode(ISD::AND, DL, VT, Shift,
7692                            DAG.getConstant(Mask, DL, VT));
7693       }
7694     }
7695   }
7696
7697   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
7698   if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
7699       isConstantOrConstantVector(N1, /* No Opaques */ true)) {
7700     SDLoc DL(N);
7701     SDValue AllBits = DAG.getAllOnesConstant(DL, VT);
7702     SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
7703     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
7704   }
7705
7706   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
7707   // fold (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
7708   // Variant of version done on multiply, except mul by a power of 2 is turned
7709   // into a shift.
7710   if ((N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR) &&
7711       N0.getNode()->hasOneUse() &&
7712       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7713       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true) &&
7714       TLI.isDesirableToCommuteWithShift(N, Level)) {
7715     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
7716     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7717     AddToWorklist(Shl0.getNode());
7718     AddToWorklist(Shl1.getNode());
7719     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, Shl0, Shl1);
7720   }
7721
7722   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
7723   if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
7724       isConstantOrConstantVector(N1, /* No Opaques */ true) &&
7725       isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
7726     SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
7727     if (isConstantOrConstantVector(Shl))
7728       return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
7729   }
7730
7731   if (N1C && !N1C->isOpaque())
7732     if (SDValue NewSHL = visitShiftByConstant(N))
7733       return NewSHL;
7734
7735   return SDValue();
7736 }
7737
7738 SDValue DAGCombiner::visitSRA(SDNode *N) {
7739   SDValue N0 = N->getOperand(0);
7740   SDValue N1 = N->getOperand(1);
7741   if (SDValue V = DAG.simplifyShift(N0, N1))
7742     return V;
7743
7744   EVT VT = N0.getValueType();
7745   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7746
7747   // Arithmetic shifting an all-sign-bit value is a no-op.
7748   // fold (sra 0, x) -> 0
7749   // fold (sra -1, x) -> -1
7750   if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
7751     return N0;
7752
7753   // fold vector ops
7754   if (VT.isVector())
7755     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7756       return FoldedVOp;
7757
7758   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7759
7760   // fold (sra c1, c2) -> (sra c1, c2)
7761   // TODO - support non-uniform vector shift amounts.
7762   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7763   if (N0C && N1C && !N1C->isOpaque())
7764     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
7765
7766   if (SDValue NewSel = foldBinOpIntoSelect(N))
7767     return NewSel;
7768
7769   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
7770   // sext_inreg.
7771   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
7772     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
7773     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
7774     if (VT.isVector())
7775       ExtVT = EVT::getVectorVT(*DAG.getContext(),
7776                                ExtVT, VT.getVectorNumElements());
7777     if ((!LegalOperations ||
7778          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
7779       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
7780                          N0.getOperand(0), DAG.getValueType(ExtVT));
7781   }
7782
7783   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
7784   // clamp (add c1, c2) to max shift.
7785   if (N0.getOpcode() == ISD::SRA) {
7786     SDLoc DL(N);
7787     EVT ShiftVT = N1.getValueType();
7788     EVT ShiftSVT = ShiftVT.getScalarType();
7789     SmallVector<SDValue, 16> ShiftValues;
7790
7791     auto SumOfShifts = [&](ConstantSDNode *LHS, ConstantSDNode *RHS) {
7792       APInt c1 = LHS->getAPIntValue();
7793       APInt c2 = RHS->getAPIntValue();
7794       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7795       APInt Sum = c1 + c2;
7796       unsigned ShiftSum =
7797           Sum.uge(OpSizeInBits) ? (OpSizeInBits - 1) : Sum.getZExtValue();
7798       ShiftValues.push_back(DAG.getConstant(ShiftSum, DL, ShiftSVT));
7799       return true;
7800     };
7801     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), SumOfShifts)) {
7802       SDValue ShiftValue;
7803       if (VT.isVector())
7804         ShiftValue = DAG.getBuildVector(ShiftVT, DL, ShiftValues);
7805       else
7806         ShiftValue = ShiftValues[0];
7807       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), ShiftValue);
7808     }
7809   }
7810
7811   // fold (sra (shl X, m), (sub result_size, n))
7812   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
7813   // result_size - n != m.
7814   // If truncate is free for the target sext(shl) is likely to result in better
7815   // code.
7816   if (N0.getOpcode() == ISD::SHL && N1C) {
7817     // Get the two constanst of the shifts, CN0 = m, CN = n.
7818     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
7819     if (N01C) {
7820       LLVMContext &Ctx = *DAG.getContext();
7821       // Determine what the truncate's result bitsize and type would be.
7822       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
7823
7824       if (VT.isVector())
7825         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7826
7827       // Determine the residual right-shift amount.
7828       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
7829
7830       // If the shift is not a no-op (in which case this should be just a sign
7831       // extend already), the truncated to type is legal, sign_extend is legal
7832       // on that type, and the truncate to that type is both legal and free,
7833       // perform the transform.
7834       if ((ShiftAmt > 0) &&
7835           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
7836           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
7837           TLI.isTruncateFree(VT, TruncVT)) {
7838         SDLoc DL(N);
7839         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
7840             getShiftAmountTy(N0.getOperand(0).getValueType()));
7841         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
7842                                     N0.getOperand(0), Amt);
7843         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
7844                                     Shift);
7845         return DAG.getNode(ISD::SIGN_EXTEND, DL,
7846                            N->getValueType(0), Trunc);
7847       }
7848     }
7849   }
7850
7851   // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
7852   //   sra (add (shl X, N1C), AddC), N1C -->
7853   //   sext (add (trunc X to (width - N1C)), AddC')
7854   if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
7855       N0.getOperand(0).getOpcode() == ISD::SHL &&
7856       N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
7857     if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
7858       SDValue Shl = N0.getOperand(0);
7859       // Determine what the truncate's type would be and ask the target if that
7860       // is a free operation.
7861       LLVMContext &Ctx = *DAG.getContext();
7862       unsigned ShiftAmt = N1C->getZExtValue();
7863       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
7864       if (VT.isVector())
7865         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
7866
7867       // TODO: The simple type check probably belongs in the default hook
7868       //       implementation and/or target-specific overrides (because
7869       //       non-simple types likely require masking when legalized), but that
7870       //       restriction may conflict with other transforms.
7871       if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
7872         SDLoc DL(N);
7873         SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
7874         SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
7875                              trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
7876         SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
7877         return DAG.getSExtOrTrunc(Add, DL, VT);
7878       }
7879     }
7880   }
7881
7882   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
7883   if (N1.getOpcode() == ISD::TRUNCATE &&
7884       N1.getOperand(0).getOpcode() == ISD::AND) {
7885     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
7886       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
7887   }
7888
7889   // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
7890   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
7891   //      if c1 is equal to the number of bits the trunc removes
7892   // TODO - support non-uniform vector shift amounts.
7893   if (N0.getOpcode() == ISD::TRUNCATE &&
7894       (N0.getOperand(0).getOpcode() == ISD::SRL ||
7895        N0.getOperand(0).getOpcode() == ISD::SRA) &&
7896       N0.getOperand(0).hasOneUse() &&
7897       N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
7898     SDValue N0Op0 = N0.getOperand(0);
7899     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
7900       EVT LargeVT = N0Op0.getValueType();
7901       unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
7902       if (LargeShift->getAPIntValue() == TruncBits) {
7903         SDLoc DL(N);
7904         SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
7905                                       getShiftAmountTy(LargeVT));
7906         SDValue SRA =
7907             DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
7908         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
7909       }
7910     }
7911   }
7912
7913   // Simplify, based on bits shifted out of the LHS.
7914   // TODO - support non-uniform vector shift amounts.
7915   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
7916     return SDValue(N, 0);
7917
7918   // If the sign bit is known to be zero, switch this to a SRL.
7919   if (DAG.SignBitIsZero(N0))
7920     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
7921
7922   if (N1C && !N1C->isOpaque())
7923     if (SDValue NewSRA = visitShiftByConstant(N))
7924       return NewSRA;
7925
7926   return SDValue();
7927 }
7928
7929 SDValue DAGCombiner::visitSRL(SDNode *N) {
7930   SDValue N0 = N->getOperand(0);
7931   SDValue N1 = N->getOperand(1);
7932   if (SDValue V = DAG.simplifyShift(N0, N1))
7933     return V;
7934
7935   EVT VT = N0.getValueType();
7936   unsigned OpSizeInBits = VT.getScalarSizeInBits();
7937
7938   // fold vector ops
7939   if (VT.isVector())
7940     if (SDValue FoldedVOp = SimplifyVBinOp(N))
7941       return FoldedVOp;
7942
7943   ConstantSDNode *N1C = isConstOrConstSplat(N1);
7944
7945   // fold (srl c1, c2) -> c1 >>u c2
7946   // TODO - support non-uniform vector shift amounts.
7947   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
7948   if (N0C && N1C && !N1C->isOpaque())
7949     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
7950
7951   if (SDValue NewSel = foldBinOpIntoSelect(N))
7952     return NewSel;
7953
7954   // if (srl x, c) is known to be zero, return 0
7955   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
7956                                    APInt::getAllOnesValue(OpSizeInBits)))
7957     return DAG.getConstant(0, SDLoc(N), VT);
7958
7959   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
7960   if (N0.getOpcode() == ISD::SRL) {
7961     auto MatchOutOfRange = [OpSizeInBits](ConstantSDNode *LHS,
7962                                           ConstantSDNode *RHS) {
7963       APInt c1 = LHS->getAPIntValue();
7964       APInt c2 = RHS->getAPIntValue();
7965       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7966       return (c1 + c2).uge(OpSizeInBits);
7967     };
7968     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchOutOfRange))
7969       return DAG.getConstant(0, SDLoc(N), VT);
7970
7971     auto MatchInRange = [OpSizeInBits](ConstantSDNode *LHS,
7972                                        ConstantSDNode *RHS) {
7973       APInt c1 = LHS->getAPIntValue();
7974       APInt c2 = RHS->getAPIntValue();
7975       zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
7976       return (c1 + c2).ult(OpSizeInBits);
7977     };
7978     if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
7979       SDLoc DL(N);
7980       EVT ShiftVT = N1.getValueType();
7981       SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
7982       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Sum);
7983     }
7984   }
7985
7986   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
7987   // TODO - support non-uniform vector shift amounts.
7988   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
7989       N0.getOperand(0).getOpcode() == ISD::SRL) {
7990     if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
7991       uint64_t c1 = N001C->getZExtValue();
7992       uint64_t c2 = N1C->getZExtValue();
7993       EVT InnerShiftVT = N0.getOperand(0).getValueType();
7994       EVT ShiftCountVT = N0.getOperand(0).getOperand(1).getValueType();
7995       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
7996       // This is only valid if the OpSizeInBits + c1 = size of inner shift.
7997       if (c1 + OpSizeInBits == InnerShiftSize) {
7998         SDLoc DL(N0);
7999         if (c1 + c2 >= InnerShiftSize)
8000           return DAG.getConstant(0, DL, VT);
8001         return DAG.getNode(ISD::TRUNCATE, DL, VT,
8002                            DAG.getNode(ISD::SRL, DL, InnerShiftVT,
8003                                        N0.getOperand(0).getOperand(0),
8004                                        DAG.getConstant(c1 + c2, DL,
8005                                                        ShiftCountVT)));
8006       }
8007     }
8008   }
8009
8010   // fold (srl (shl x, c), c) -> (and x, cst2)
8011   // TODO - (srl (shl x, c1), c2).
8012   if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
8013       isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
8014     SDLoc DL(N);
8015     SDValue Mask =
8016         DAG.getNode(ISD::SRL, DL, VT, DAG.getAllOnesConstant(DL, VT), N1);
8017     AddToWorklist(Mask.getNode());
8018     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
8019   }
8020
8021   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
8022   // TODO - support non-uniform vector shift amounts.
8023   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
8024     // Shifting in all undef bits?
8025     EVT SmallVT = N0.getOperand(0).getValueType();
8026     unsigned BitSize = SmallVT.getScalarSizeInBits();
8027     if (N1C->getAPIntValue().uge(BitSize))
8028       return DAG.getUNDEF(VT);
8029
8030     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
8031       uint64_t ShiftAmt = N1C->getZExtValue();
8032       SDLoc DL0(N0);
8033       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
8034                                        N0.getOperand(0),
8035                           DAG.getConstant(ShiftAmt, DL0,
8036                                           getShiftAmountTy(SmallVT)));
8037       AddToWorklist(SmallShift.getNode());
8038       APInt Mask = APInt::getLowBitsSet(OpSizeInBits, OpSizeInBits - ShiftAmt);
8039       SDLoc DL(N);
8040       return DAG.getNode(ISD::AND, DL, VT,
8041                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
8042                          DAG.getConstant(Mask, DL, VT));
8043     }
8044   }
8045
8046   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
8047   // bit, which is unmodified by sra.
8048   if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
8049     if (N0.getOpcode() == ISD::SRA)
8050       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
8051   }
8052
8053   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
8054   if (N1C && N0.getOpcode() == ISD::CTLZ &&
8055       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
8056     KnownBits Known = DAG.computeKnownBits(N0.getOperand(0));
8057
8058     // If any of the input bits are KnownOne, then the input couldn't be all
8059     // zeros, thus the result of the srl will always be zero.
8060     if (Known.One.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
8061
8062     // If all of the bits input the to ctlz node are known to be zero, then
8063     // the result of the ctlz is "32" and the result of the shift is one.
8064     APInt UnknownBits = ~Known.Zero;
8065     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
8066
8067     // Otherwise, check to see if there is exactly one bit input to the ctlz.
8068     if (UnknownBits.isPowerOf2()) {
8069       // Okay, we know that only that the single bit specified by UnknownBits
8070       // could be set on input to the CTLZ node. If this bit is set, the SRL
8071       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
8072       // to an SRL/XOR pair, which is likely to simplify more.
8073       unsigned ShAmt = UnknownBits.countTrailingZeros();
8074       SDValue Op = N0.getOperand(0);
8075
8076       if (ShAmt) {
8077         SDLoc DL(N0);
8078         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
8079                   DAG.getConstant(ShAmt, DL,
8080                                   getShiftAmountTy(Op.getValueType())));
8081         AddToWorklist(Op.getNode());
8082       }
8083
8084       SDLoc DL(N);
8085       return DAG.getNode(ISD::XOR, DL, VT,
8086                          Op, DAG.getConstant(1, DL, VT));
8087     }
8088   }
8089
8090   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
8091   if (N1.getOpcode() == ISD::TRUNCATE &&
8092       N1.getOperand(0).getOpcode() == ISD::AND) {
8093     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
8094       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
8095   }
8096
8097   // fold operands of srl based on knowledge that the low bits are not
8098   // demanded.
8099   // TODO - support non-uniform vector shift amounts.
8100   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
8101     return SDValue(N, 0);
8102
8103   if (N1C && !N1C->isOpaque())
8104     if (SDValue NewSRL = visitShiftByConstant(N))
8105       return NewSRL;
8106
8107   // Attempt to convert a srl of a load into a narrower zero-extending load.
8108   if (SDValue NarrowLoad = ReduceLoadWidth(N))
8109     return NarrowLoad;
8110
8111   // Here is a common situation. We want to optimize:
8112   //
8113   //   %a = ...
8114   //   %b = and i32 %a, 2
8115   //   %c = srl i32 %b, 1
8116   //   brcond i32 %c ...
8117   //
8118   // into
8119   //
8120   //   %a = ...
8121   //   %b = and %a, 2
8122   //   %c = setcc eq %b, 0
8123   //   brcond %c ...
8124   //
8125   // However when after the source operand of SRL is optimized into AND, the SRL
8126   // itself may not be optimized further. Look for it and add the BRCOND into
8127   // the worklist.
8128   if (N->hasOneUse()) {
8129     SDNode *Use = *N->use_begin();
8130     if (Use->getOpcode() == ISD::BRCOND)
8131       AddToWorklist(Use);
8132     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
8133       // Also look pass the truncate.
8134       Use = *Use->use_begin();
8135       if (Use->getOpcode() == ISD::BRCOND)
8136         AddToWorklist(Use);
8137     }
8138   }
8139
8140   return SDValue();
8141 }
8142
8143 SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
8144   EVT VT = N->getValueType(0);
8145   SDValue N0 = N->getOperand(0);
8146   SDValue N1 = N->getOperand(1);
8147   SDValue N2 = N->getOperand(2);
8148   bool IsFSHL = N->getOpcode() == ISD::FSHL;
8149   unsigned BitWidth = VT.getScalarSizeInBits();
8150
8151   // fold (fshl N0, N1, 0) -> N0
8152   // fold (fshr N0, N1, 0) -> N1
8153   if (isPowerOf2_32(BitWidth))
8154     if (DAG.MaskedValueIsZero(
8155             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
8156       return IsFSHL ? N0 : N1;
8157
8158   auto IsUndefOrZero = [](SDValue V) {
8159     return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
8160   };
8161
8162   // TODO - support non-uniform vector shift amounts.
8163   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
8164     EVT ShAmtTy = N2.getValueType();
8165
8166     // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
8167     if (Cst->getAPIntValue().uge(BitWidth)) {
8168       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
8169       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
8170                          DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
8171     }
8172
8173     unsigned ShAmt = Cst->getZExtValue();
8174     if (ShAmt == 0)
8175       return IsFSHL ? N0 : N1;
8176
8177     // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
8178     // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
8179     // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
8180     // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
8181     if (IsUndefOrZero(N0))
8182       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
8183                          DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
8184                                          SDLoc(N), ShAmtTy));
8185     if (IsUndefOrZero(N1))
8186       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
8187                          DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
8188                                          SDLoc(N), ShAmtTy));
8189   }
8190
8191   // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
8192   // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
8193   // iff We know the shift amount is in range.
8194   // TODO: when is it worth doing SUB(BW, N2) as well?
8195   if (isPowerOf2_32(BitWidth)) {
8196     APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
8197     if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8198       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
8199     if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
8200       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
8201   }
8202
8203   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
8204   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
8205   // TODO: Investigate flipping this rotate if only one is legal, if funnel shift
8206   // is legal as well we might be better off avoiding non-constant (BW - N2).
8207   unsigned RotOpc = IsFSHL ? ISD::ROTL : ISD::ROTR;
8208   if (N0 == N1 && hasOperation(RotOpc, VT))
8209     return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
8210
8211   // Simplify, based on bits shifted out of N0/N1.
8212   if (SimplifyDemandedBits(SDValue(N, 0)))
8213     return SDValue(N, 0);
8214
8215   return SDValue();
8216 }
8217
8218 SDValue DAGCombiner::visitABS(SDNode *N) {
8219   SDValue N0 = N->getOperand(0);
8220   EVT VT = N->getValueType(0);
8221
8222   // fold (abs c1) -> c2
8223   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8224     return DAG.getNode(ISD::ABS, SDLoc(N), VT, N0);
8225   // fold (abs (abs x)) -> (abs x)
8226   if (N0.getOpcode() == ISD::ABS)
8227     return N0;
8228   // fold (abs x) -> x iff not-negative
8229   if (DAG.SignBitIsZero(N0))
8230     return N0;
8231   return SDValue();
8232 }
8233
8234 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
8235   SDValue N0 = N->getOperand(0);
8236   EVT VT = N->getValueType(0);
8237
8238   // fold (bswap c1) -> c2
8239   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8240     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
8241   // fold (bswap (bswap x)) -> x
8242   if (N0.getOpcode() == ISD::BSWAP)
8243     return N0->getOperand(0);
8244   return SDValue();
8245 }
8246
8247 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
8248   SDValue N0 = N->getOperand(0);
8249   EVT VT = N->getValueType(0);
8250
8251   // fold (bitreverse c1) -> c2
8252   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8253     return DAG.getNode(ISD::BITREVERSE, SDLoc(N), VT, N0);
8254   // fold (bitreverse (bitreverse x)) -> x
8255   if (N0.getOpcode() == ISD::BITREVERSE)
8256     return N0.getOperand(0);
8257   return SDValue();
8258 }
8259
8260 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
8261   SDValue N0 = N->getOperand(0);
8262   EVT VT = N->getValueType(0);
8263
8264   // fold (ctlz c1) -> c2
8265   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8266     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
8267
8268   // If the value is known never to be zero, switch to the undef version.
8269   if (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ_ZERO_UNDEF, VT)) {
8270     if (DAG.isKnownNeverZero(N0))
8271       return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8272   }
8273
8274   return SDValue();
8275 }
8276
8277 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
8278   SDValue N0 = N->getOperand(0);
8279   EVT VT = N->getValueType(0);
8280
8281   // fold (ctlz_zero_undef c1) -> c2
8282   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8283     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8284   return SDValue();
8285 }
8286
8287 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
8288   SDValue N0 = N->getOperand(0);
8289   EVT VT = N->getValueType(0);
8290
8291   // fold (cttz c1) -> c2
8292   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8293     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
8294
8295   // If the value is known never to be zero, switch to the undef version.
8296   if (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ_ZERO_UNDEF, VT)) {
8297     if (DAG.isKnownNeverZero(N0))
8298       return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8299   }
8300
8301   return SDValue();
8302 }
8303
8304 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
8305   SDValue N0 = N->getOperand(0);
8306   EVT VT = N->getValueType(0);
8307
8308   // fold (cttz_zero_undef c1) -> c2
8309   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8310     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
8311   return SDValue();
8312 }
8313
8314 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
8315   SDValue N0 = N->getOperand(0);
8316   EVT VT = N->getValueType(0);
8317
8318   // fold (ctpop c1) -> c2
8319   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
8320     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
8321   return SDValue();
8322 }
8323
8324 // FIXME: This should be checking for no signed zeros on individual operands, as
8325 // well as no nans.
8326 static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
8327                                          SDValue RHS,
8328                                          const TargetLowering &TLI) {
8329   const TargetOptions &Options = DAG.getTarget().Options;
8330   EVT VT = LHS.getValueType();
8331
8332   return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
8333          TLI.isProfitableToCombineMinNumMaxNum(VT) &&
8334          DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
8335 }
8336
8337 /// Generate Min/Max node
8338 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
8339                                    SDValue RHS, SDValue True, SDValue False,
8340                                    ISD::CondCode CC, const TargetLowering &TLI,
8341                                    SelectionDAG &DAG) {
8342   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
8343     return SDValue();
8344
8345   EVT TransformVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
8346   switch (CC) {
8347   case ISD::SETOLT:
8348   case ISD::SETOLE:
8349   case ISD::SETLT:
8350   case ISD::SETLE:
8351   case ISD::SETULT:
8352   case ISD::SETULE: {
8353     // Since it's known never nan to get here already, either fminnum or
8354     // fminnum_ieee are OK. Try the ieee version first, since it's fminnum is
8355     // expanded in terms of it.
8356     unsigned IEEEOpcode = (LHS == True) ? ISD::FMINNUM_IEEE : ISD::FMAXNUM_IEEE;
8357     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8358       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8359
8360     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
8361     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8362       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8363     return SDValue();
8364   }
8365   case ISD::SETOGT:
8366   case ISD::SETOGE:
8367   case ISD::SETGT:
8368   case ISD::SETGE:
8369   case ISD::SETUGT:
8370   case ISD::SETUGE: {
8371     unsigned IEEEOpcode = (LHS == True) ? ISD::FMAXNUM_IEEE : ISD::FMINNUM_IEEE;
8372     if (TLI.isOperationLegalOrCustom(IEEEOpcode, VT))
8373       return DAG.getNode(IEEEOpcode, DL, VT, LHS, RHS);
8374
8375     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
8376     if (TLI.isOperationLegalOrCustom(Opcode, TransformVT))
8377       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
8378     return SDValue();
8379   }
8380   default:
8381     return SDValue();
8382   }
8383 }
8384
8385 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
8386   SDValue Cond = N->getOperand(0);
8387   SDValue N1 = N->getOperand(1);
8388   SDValue N2 = N->getOperand(2);
8389   EVT VT = N->getValueType(0);
8390   EVT CondVT = Cond.getValueType();
8391   SDLoc DL(N);
8392
8393   if (!VT.isInteger())
8394     return SDValue();
8395
8396   auto *C1 = dyn_cast<ConstantSDNode>(N1);
8397   auto *C2 = dyn_cast<ConstantSDNode>(N2);
8398   if (!C1 || !C2)
8399     return SDValue();
8400
8401   // Only do this before legalization to avoid conflicting with target-specific
8402   // transforms in the other direction (create a select from a zext/sext). There
8403   // is also a target-independent combine here in DAGCombiner in the other
8404   // direction for (select Cond, -1, 0) when the condition is not i1.
8405   if (CondVT == MVT::i1 && !LegalOperations) {
8406     if (C1->isNullValue() && C2->isOne()) {
8407       // select Cond, 0, 1 --> zext (!Cond)
8408       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8409       if (VT != MVT::i1)
8410         NotCond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, NotCond);
8411       return NotCond;
8412     }
8413     if (C1->isNullValue() && C2->isAllOnesValue()) {
8414       // select Cond, 0, -1 --> sext (!Cond)
8415       SDValue NotCond = DAG.getNOT(DL, Cond, MVT::i1);
8416       if (VT != MVT::i1)
8417         NotCond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, NotCond);
8418       return NotCond;
8419     }
8420     if (C1->isOne() && C2->isNullValue()) {
8421       // select Cond, 1, 0 --> zext (Cond)
8422       if (VT != MVT::i1)
8423         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8424       return Cond;
8425     }
8426     if (C1->isAllOnesValue() && C2->isNullValue()) {
8427       // select Cond, -1, 0 --> sext (Cond)
8428       if (VT != MVT::i1)
8429         Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8430       return Cond;
8431     }
8432
8433     // For any constants that differ by 1, we can transform the select into an
8434     // extend and add. Use a target hook because some targets may prefer to
8435     // transform in the other direction.
8436     if (TLI.convertSelectOfConstantsToMath(VT)) {
8437       if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
8438         // select Cond, C1, C1-1 --> add (zext Cond), C1-1
8439         if (VT != MVT::i1)
8440           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
8441         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8442       }
8443       if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
8444         // select Cond, C1, C1+1 --> add (sext Cond), C1+1
8445         if (VT != MVT::i1)
8446           Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
8447         return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
8448       }
8449     }
8450
8451     return SDValue();
8452   }
8453
8454   // fold (select Cond, 0, 1) -> (xor Cond, 1)
8455   // We can't do this reliably if integer based booleans have different contents
8456   // to floating point based booleans. This is because we can't tell whether we
8457   // have an integer-based boolean or a floating-point-based boolean unless we
8458   // can find the SETCC that produced it and inspect its operands. This is
8459   // fairly easy if C is the SETCC node, but it can potentially be
8460   // undiscoverable (or not reasonably discoverable). For example, it could be
8461   // in another basic block or it could require searching a complicated
8462   // expression.
8463   if (CondVT.isInteger() &&
8464       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/true) ==
8465           TargetLowering::ZeroOrOneBooleanContent &&
8466       TLI.getBooleanContents(/*isVec*/false, /*isFloat*/false) ==
8467           TargetLowering::ZeroOrOneBooleanContent &&
8468       C1->isNullValue() && C2->isOne()) {
8469     SDValue NotCond =
8470         DAG.getNode(ISD::XOR, DL, CondVT, Cond, DAG.getConstant(1, DL, CondVT));
8471     if (VT.bitsEq(CondVT))
8472       return NotCond;
8473     return DAG.getZExtOrTrunc(NotCond, DL, VT);
8474   }
8475
8476   return SDValue();
8477 }
8478
8479 SDValue DAGCombiner::visitSELECT(SDNode *N) {
8480   SDValue N0 = N->getOperand(0);
8481   SDValue N1 = N->getOperand(1);
8482   SDValue N2 = N->getOperand(2);
8483   EVT VT = N->getValueType(0);
8484   EVT VT0 = N0.getValueType();
8485   SDLoc DL(N);
8486   SDNodeFlags Flags = N->getFlags();
8487
8488   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8489     return V;
8490
8491   // fold (select X, X, Y) -> (or X, Y)
8492   // fold (select X, 1, Y) -> (or C, Y)
8493   if (VT == VT0 && VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
8494     return DAG.getNode(ISD::OR, DL, VT, N0, N2);
8495
8496   if (SDValue V = foldSelectOfConstants(N))
8497     return V;
8498
8499   // fold (select C, 0, X) -> (and (not C), X)
8500   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
8501     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8502     AddToWorklist(NOTNode.getNode());
8503     return DAG.getNode(ISD::AND, DL, VT, NOTNode, N2);
8504   }
8505   // fold (select C, X, 1) -> (or (not C), X)
8506   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
8507     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
8508     AddToWorklist(NOTNode.getNode());
8509     return DAG.getNode(ISD::OR, DL, VT, NOTNode, N1);
8510   }
8511   // fold (select X, Y, X) -> (and X, Y)
8512   // fold (select X, Y, 0) -> (and X, Y)
8513   if (VT == VT0 && VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
8514     return DAG.getNode(ISD::AND, DL, VT, N0, N1);
8515
8516   // If we can fold this based on the true/false value, do so.
8517   if (SimplifySelectOps(N, N1, N2))
8518     return SDValue(N, 0); // Don't revisit N.
8519
8520   if (VT0 == MVT::i1) {
8521     // The code in this block deals with the following 2 equivalences:
8522     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
8523     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
8524     // The target can specify its preferred form with the
8525     // shouldNormalizeToSelectSequence() callback. However we always transform
8526     // to the right anyway if we find the inner select exists in the DAG anyway
8527     // and we always transform to the left side if we know that we can further
8528     // optimize the combination of the conditions.
8529     bool normalizeToSequence =
8530         TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
8531     // select (and Cond0, Cond1), X, Y
8532     //   -> select Cond0, (select Cond1, X, Y), Y
8533     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
8534       SDValue Cond0 = N0->getOperand(0);
8535       SDValue Cond1 = N0->getOperand(1);
8536       SDValue InnerSelect =
8537           DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
8538       if (normalizeToSequence || !InnerSelect.use_empty())
8539         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
8540                            InnerSelect, N2, Flags);
8541       // Cleanup on failure.
8542       if (InnerSelect.use_empty())
8543         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8544     }
8545     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
8546     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
8547       SDValue Cond0 = N0->getOperand(0);
8548       SDValue Cond1 = N0->getOperand(1);
8549       SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
8550                                         Cond1, N1, N2, Flags);
8551       if (normalizeToSequence || !InnerSelect.use_empty())
8552         return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
8553                            InnerSelect, Flags);
8554       // Cleanup on failure.
8555       if (InnerSelect.use_empty())
8556         recursivelyDeleteUnusedNodes(InnerSelect.getNode());
8557     }
8558
8559     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
8560     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
8561       SDValue N1_0 = N1->getOperand(0);
8562       SDValue N1_1 = N1->getOperand(1);
8563       SDValue N1_2 = N1->getOperand(2);
8564       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
8565         // Create the actual and node if we can generate good code for it.
8566         if (!normalizeToSequence) {
8567           SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
8568           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
8569                              N2, Flags);
8570         }
8571         // Otherwise see if we can optimize the "and" to a better pattern.
8572         if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
8573           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
8574                              N2, Flags);
8575         }
8576       }
8577     }
8578     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
8579     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
8580       SDValue N2_0 = N2->getOperand(0);
8581       SDValue N2_1 = N2->getOperand(1);
8582       SDValue N2_2 = N2->getOperand(2);
8583       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
8584         // Create the actual or node if we can generate good code for it.
8585         if (!normalizeToSequence) {
8586           SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
8587           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
8588                              N2_2, Flags);
8589         }
8590         // Otherwise see if we can optimize to a better pattern.
8591         if (SDValue Combined = visitORLike(N0, N2_0, N))
8592           return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
8593                              N2_2, Flags);
8594       }
8595     }
8596   }
8597
8598   // select (not Cond), N1, N2 -> select Cond, N2, N1
8599   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
8600     SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
8601     SelectOp->setFlags(Flags);
8602     return SelectOp;
8603   }
8604
8605   // Fold selects based on a setcc into other things, such as min/max/abs.
8606   if (N0.getOpcode() == ISD::SETCC) {
8607     SDValue Cond0 = N0.getOperand(0), Cond1 = N0.getOperand(1);
8608     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8609
8610     // select (fcmp lt x, y), x, y -> fminnum x, y
8611     // select (fcmp gt x, y), x, y -> fmaxnum x, y
8612     //
8613     // This is OK if we don't care what happens if either operand is a NaN.
8614     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
8615       if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
8616                                                 CC, TLI, DAG))
8617         return FMinMax;
8618
8619     // Use 'unsigned add with overflow' to optimize an unsigned saturating add.
8620     // This is conservatively limited to pre-legal-operations to give targets
8621     // a chance to reverse the transform if they want to do that. Also, it is
8622     // unlikely that the pattern would be formed late, so it's probably not
8623     // worth going through the other checks.
8624     if (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::UADDO, VT) &&
8625         CC == ISD::SETUGT && N0.hasOneUse() && isAllOnesConstant(N1) &&
8626         N2.getOpcode() == ISD::ADD && Cond0 == N2.getOperand(0)) {
8627       auto *C = dyn_cast<ConstantSDNode>(N2.getOperand(1));
8628       auto *NotC = dyn_cast<ConstantSDNode>(Cond1);
8629       if (C && NotC && C->getAPIntValue() == ~NotC->getAPIntValue()) {
8630         // select (setcc Cond0, ~C, ugt), -1, (add Cond0, C) -->
8631         // uaddo Cond0, C; select uaddo.1, -1, uaddo.0
8632         //
8633         // The IR equivalent of this transform would have this form:
8634         //   %a = add %x, C
8635         //   %c = icmp ugt %x, ~C
8636         //   %r = select %c, -1, %a
8637         //   =>
8638         //   %u = call {iN,i1} llvm.uadd.with.overflow(%x, C)
8639         //   %u0 = extractvalue %u, 0
8640         //   %u1 = extractvalue %u, 1
8641         //   %r = select %u1, -1, %u0
8642         SDVTList VTs = DAG.getVTList(VT, VT0);
8643         SDValue UAO = DAG.getNode(ISD::UADDO, DL, VTs, Cond0, N2.getOperand(1));
8644         return DAG.getSelect(DL, VT, UAO.getValue(1), N1, UAO.getValue(0));
8645       }
8646     }
8647
8648     if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
8649         (!LegalOperations &&
8650          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
8651       // Any flags available in a select/setcc fold will be on the setcc as they
8652       // migrated from fcmp
8653       Flags = N0.getNode()->getFlags();
8654       SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
8655                                        N2, N0.getOperand(2));
8656       SelectNode->setFlags(Flags);
8657       return SelectNode;
8658     }
8659
8660     return SimplifySelect(DL, N0, N1, N2);
8661   }
8662
8663   return SDValue();
8664 }
8665
8666 // This function assumes all the vselect's arguments are CONCAT_VECTOR
8667 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
8668 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
8669   SDLoc DL(N);
8670   SDValue Cond = N->getOperand(0);
8671   SDValue LHS = N->getOperand(1);
8672   SDValue RHS = N->getOperand(2);
8673   EVT VT = N->getValueType(0);
8674   int NumElems = VT.getVectorNumElements();
8675   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
8676          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
8677          Cond.getOpcode() == ISD::BUILD_VECTOR);
8678
8679   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
8680   // binary ones here.
8681   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
8682     return SDValue();
8683
8684   // We're sure we have an even number of elements due to the
8685   // concat_vectors we have as arguments to vselect.
8686   // Skip BV elements until we find one that's not an UNDEF
8687   // After we find an UNDEF element, keep looping until we get to half the
8688   // length of the BV and see if all the non-undef nodes are the same.
8689   ConstantSDNode *BottomHalf = nullptr;
8690   for (int i = 0; i < NumElems / 2; ++i) {
8691     if (Cond->getOperand(i)->isUndef())
8692       continue;
8693
8694     if (BottomHalf == nullptr)
8695       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8696     else if (Cond->getOperand(i).getNode() != BottomHalf)
8697       return SDValue();
8698   }
8699
8700   // Do the same for the second half of the BuildVector
8701   ConstantSDNode *TopHalf = nullptr;
8702   for (int i = NumElems / 2; i < NumElems; ++i) {
8703     if (Cond->getOperand(i)->isUndef())
8704       continue;
8705
8706     if (TopHalf == nullptr)
8707       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
8708     else if (Cond->getOperand(i).getNode() != TopHalf)
8709       return SDValue();
8710   }
8711
8712   assert(TopHalf && BottomHalf &&
8713          "One half of the selector was all UNDEFs and the other was all the "
8714          "same value. This should have been addressed before this function.");
8715   return DAG.getNode(
8716       ISD::CONCAT_VECTORS, DL, VT,
8717       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
8718       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
8719 }
8720
8721 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
8722   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
8723   SDValue Mask = MSC->getMask();
8724   SDValue Chain = MSC->getChain();
8725   SDLoc DL(N);
8726
8727   // Zap scatters with a zero mask.
8728   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8729     return Chain;
8730
8731   return SDValue();
8732 }
8733
8734 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
8735   MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
8736   SDValue Mask = MST->getMask();
8737   SDValue Chain = MST->getChain();
8738   SDLoc DL(N);
8739
8740   // Zap masked stores with a zero mask.
8741   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8742     return Chain;
8743
8744   return SDValue();
8745 }
8746
8747 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
8748   MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
8749   SDValue Mask = MGT->getMask();
8750   SDLoc DL(N);
8751
8752   // Zap gathers with a zero mask.
8753   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8754     return CombineTo(N, MGT->getPassThru(), MGT->getChain());
8755
8756   return SDValue();
8757 }
8758
8759 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
8760   MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
8761   SDValue Mask = MLD->getMask();
8762   SDLoc DL(N);
8763
8764   // Zap masked loads with a zero mask.
8765   if (ISD::isBuildVectorAllZeros(Mask.getNode()))
8766     return CombineTo(N, MLD->getPassThru(), MLD->getChain());
8767
8768   return SDValue();
8769 }
8770
8771 /// A vector select of 2 constant vectors can be simplified to math/logic to
8772 /// avoid a variable select instruction and possibly avoid constant loads.
8773 SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
8774   SDValue Cond = N->getOperand(0);
8775   SDValue N1 = N->getOperand(1);
8776   SDValue N2 = N->getOperand(2);
8777   EVT VT = N->getValueType(0);
8778   if (!Cond.hasOneUse() || Cond.getScalarValueSizeInBits() != 1 ||
8779       !TLI.convertSelectOfConstantsToMath(VT) ||
8780       !ISD::isBuildVectorOfConstantSDNodes(N1.getNode()) ||
8781       !ISD::isBuildVectorOfConstantSDNodes(N2.getNode()))
8782     return SDValue();
8783
8784   // Check if we can use the condition value to increment/decrement a single
8785   // constant value. This simplifies a select to an add and removes a constant
8786   // load/materialization from the general case.
8787   bool AllAddOne = true;
8788   bool AllSubOne = true;
8789   unsigned Elts = VT.getVectorNumElements();
8790   for (unsigned i = 0; i != Elts; ++i) {
8791     SDValue N1Elt = N1.getOperand(i);
8792     SDValue N2Elt = N2.getOperand(i);
8793     if (N1Elt.isUndef() || N2Elt.isUndef())
8794       continue;
8795
8796     const APInt &C1 = cast<ConstantSDNode>(N1Elt)->getAPIntValue();
8797     const APInt &C2 = cast<ConstantSDNode>(N2Elt)->getAPIntValue();
8798     if (C1 != C2 + 1)
8799       AllAddOne = false;
8800     if (C1 != C2 - 1)
8801       AllSubOne = false;
8802   }
8803
8804   // Further simplifications for the extra-special cases where the constants are
8805   // all 0 or all -1 should be implemented as folds of these patterns.
8806   SDLoc DL(N);
8807   if (AllAddOne || AllSubOne) {
8808     // vselect <N x i1> Cond, C+1, C --> add (zext Cond), C
8809     // vselect <N x i1> Cond, C-1, C --> add (sext Cond), C
8810     auto ExtendOpcode = AllAddOne ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
8811     SDValue ExtendedCond = DAG.getNode(ExtendOpcode, DL, VT, Cond);
8812     return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
8813   }
8814
8815   // The general case for select-of-constants:
8816   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
8817   // ...but that only makes sense if a vselect is slower than 2 logic ops, so
8818   // leave that to a machine-specific pass.
8819   return SDValue();
8820 }
8821
8822 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
8823   SDValue N0 = N->getOperand(0);
8824   SDValue N1 = N->getOperand(1);
8825   SDValue N2 = N->getOperand(2);
8826   EVT VT = N->getValueType(0);
8827   SDLoc DL(N);
8828
8829   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
8830     return V;
8831
8832   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
8833   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
8834     return DAG.getSelect(DL, VT, F, N2, N1);
8835
8836   // Canonicalize integer abs.
8837   // vselect (setg[te] X,  0),  X, -X ->
8838   // vselect (setgt    X, -1),  X, -X ->
8839   // vselect (setl[te] X,  0), -X,  X ->
8840   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
8841   if (N0.getOpcode() == ISD::SETCC) {
8842     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
8843     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
8844     bool isAbs = false;
8845     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
8846
8847     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
8848          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
8849         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
8850       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
8851     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
8852              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
8853       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
8854
8855     if (isAbs) {
8856       if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
8857         return DAG.getNode(ISD::ABS, DL, VT, LHS);
8858
8859       SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
8860                                   DAG.getConstant(VT.getScalarSizeInBits() - 1,
8861                                                   DL, getShiftAmountTy(VT)));
8862       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
8863       AddToWorklist(Shift.getNode());
8864       AddToWorklist(Add.getNode());
8865       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
8866     }
8867
8868     // vselect x, y (fcmp lt x, y) -> fminnum x, y
8869     // vselect x, y (fcmp gt x, y) -> fmaxnum x, y
8870     //
8871     // This is OK if we don't care about what happens if either operand is a
8872     // NaN.
8873     //
8874     if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
8875       if (SDValue FMinMax =
8876               combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
8877         return FMinMax;
8878     }
8879
8880     // If this select has a condition (setcc) with narrower operands than the
8881     // select, try to widen the compare to match the select width.
8882     // TODO: This should be extended to handle any constant.
8883     // TODO: This could be extended to handle non-loading patterns, but that
8884     //       requires thorough testing to avoid regressions.
8885     if (isNullOrNullSplat(RHS)) {
8886       EVT NarrowVT = LHS.getValueType();
8887       EVT WideVT = N1.getValueType().changeVectorElementTypeToInteger();
8888       EVT SetCCVT = getSetCCResultType(LHS.getValueType());
8889       unsigned SetCCWidth = SetCCVT.getScalarSizeInBits();
8890       unsigned WideWidth = WideVT.getScalarSizeInBits();
8891       bool IsSigned = isSignedIntSetCC(CC);
8892       auto LoadExtOpcode = IsSigned ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
8893       if (LHS.getOpcode() == ISD::LOAD && LHS.hasOneUse() &&
8894           SetCCWidth != 1 && SetCCWidth < WideWidth &&
8895           TLI.isLoadExtLegalOrCustom(LoadExtOpcode, WideVT, NarrowVT) &&
8896           TLI.isOperationLegalOrCustom(ISD::SETCC, WideVT)) {
8897         // Both compare operands can be widened for free. The LHS can use an
8898         // extended load, and the RHS is a constant:
8899         //   vselect (ext (setcc load(X), C)), N1, N2 -->
8900         //   vselect (setcc extload(X), C'), N1, N2
8901         auto ExtOpcode = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
8902         SDValue WideLHS = DAG.getNode(ExtOpcode, DL, WideVT, LHS);
8903         SDValue WideRHS = DAG.getNode(ExtOpcode, DL, WideVT, RHS);
8904         EVT WideSetCCVT = getSetCCResultType(WideVT);
8905         SDValue WideSetCC = DAG.getSetCC(DL, WideSetCCVT, WideLHS, WideRHS, CC);
8906         return DAG.getSelect(DL, N1.getValueType(), WideSetCC, N1, N2);
8907       }
8908     }
8909   }
8910
8911   if (SimplifySelectOps(N, N1, N2))
8912     return SDValue(N, 0);  // Don't revisit N.
8913
8914   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
8915   if (ISD::isBuildVectorAllOnes(N0.getNode()))
8916     return N1;
8917   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
8918   if (ISD::isBuildVectorAllZeros(N0.getNode()))
8919     return N2;
8920
8921   // The ConvertSelectToConcatVector function is assuming both the above
8922   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
8923   // and addressed.
8924   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
8925       N2.getOpcode() == ISD::CONCAT_VECTORS &&
8926       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
8927     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
8928       return CV;
8929   }
8930
8931   if (SDValue V = foldVSelectOfConstants(N))
8932     return V;
8933
8934   return SDValue();
8935 }
8936
8937 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
8938   SDValue N0 = N->getOperand(0);
8939   SDValue N1 = N->getOperand(1);
8940   SDValue N2 = N->getOperand(2);
8941   SDValue N3 = N->getOperand(3);
8942   SDValue N4 = N->getOperand(4);
8943   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
8944
8945   // fold select_cc lhs, rhs, x, x, cc -> x
8946   if (N2 == N3)
8947     return N2;
8948
8949   // Determine if the condition we're dealing with is constant
8950   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
8951                                   CC, SDLoc(N), false)) {
8952     AddToWorklist(SCC.getNode());
8953
8954     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
8955       if (!SCCC->isNullValue())
8956         return N2;    // cond always true -> true val
8957       else
8958         return N3;    // cond always false -> false val
8959     } else if (SCC->isUndef()) {
8960       // When the condition is UNDEF, just return the first operand. This is
8961       // coherent the DAG creation, no setcc node is created in this case
8962       return N2;
8963     } else if (SCC.getOpcode() == ISD::SETCC) {
8964       // Fold to a simpler select_cc
8965       SDValue SelectOp = DAG.getNode(
8966           ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
8967           SCC.getOperand(1), N2, N3, SCC.getOperand(2));
8968       SelectOp->setFlags(SCC->getFlags());
8969       return SelectOp;
8970     }
8971   }
8972
8973   // If we can fold this based on the true/false value, do so.
8974   if (SimplifySelectOps(N, N2, N3))
8975     return SDValue(N, 0);  // Don't revisit N.
8976
8977   // fold select_cc into other things, such as min/max/abs
8978   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
8979 }
8980
8981 SDValue DAGCombiner::visitSETCC(SDNode *N) {
8982   // setcc is very commonly used as an argument to brcond. This pattern
8983   // also lend itself to numerous combines and, as a result, it is desired
8984   // we keep the argument to a brcond as a setcc as much as possible.
8985   bool PreferSetCC =
8986       N->hasOneUse() && N->use_begin()->getOpcode() == ISD::BRCOND;
8987
8988   SDValue Combined = SimplifySetCC(
8989       N->getValueType(0), N->getOperand(0), N->getOperand(1),
8990       cast<CondCodeSDNode>(N->getOperand(2))->get(), SDLoc(N), !PreferSetCC);
8991
8992   if (!Combined)
8993     return SDValue();
8994
8995   // If we prefer to have a setcc, and we don't, we'll try our best to
8996   // recreate one using rebuildSetCC.
8997   if (PreferSetCC && Combined.getOpcode() != ISD::SETCC) {
8998     SDValue NewSetCC = rebuildSetCC(Combined);
8999
9000     // We don't have anything interesting to combine to.
9001     if (NewSetCC.getNode() == N)
9002       return SDValue();
9003
9004     if (NewSetCC)
9005       return NewSetCC;
9006   }
9007
9008   return Combined;
9009 }
9010
9011 SDValue DAGCombiner::visitSETCCCARRY(SDNode *N) {
9012   SDValue LHS = N->getOperand(0);
9013   SDValue RHS = N->getOperand(1);
9014   SDValue Carry = N->getOperand(2);
9015   SDValue Cond = N->getOperand(3);
9016
9017   // If Carry is false, fold to a regular SETCC.
9018   if (isNullConstant(Carry))
9019     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
9020
9021   return SDValue();
9022 }
9023
9024 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
9025 /// a build_vector of constants.
9026 /// This function is called by the DAGCombiner when visiting sext/zext/aext
9027 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
9028 /// Vector extends are not folded if operations are legal; this is to
9029 /// avoid introducing illegal build_vector dag nodes.
9030 static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
9031                                          SelectionDAG &DAG, bool LegalTypes) {
9032   unsigned Opcode = N->getOpcode();
9033   SDValue N0 = N->getOperand(0);
9034   EVT VT = N->getValueType(0);
9035   SDLoc DL(N);
9036
9037   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
9038          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
9039          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
9040          && "Expected EXTEND dag node in input!");
9041
9042   // fold (sext c1) -> c1
9043   // fold (zext c1) -> c1
9044   // fold (aext c1) -> c1
9045   if (isa<ConstantSDNode>(N0))
9046     return DAG.getNode(Opcode, DL, VT, N0);
9047
9048   // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9049   // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
9050   // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
9051   if (N0->getOpcode() == ISD::SELECT) {
9052     SDValue Op1 = N0->getOperand(1);
9053     SDValue Op2 = N0->getOperand(2);
9054     if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
9055         (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
9056       // For any_extend, choose sign extension of the constants to allow a
9057       // possible further transform to sign_extend_inreg.i.e.
9058       //
9059       // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
9060       // t2: i64 = any_extend t1
9061       // -->
9062       // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
9063       // -->
9064       // t4: i64 = sign_extend_inreg t3
9065       unsigned FoldOpc = Opcode;
9066       if (FoldOpc == ISD::ANY_EXTEND)
9067         FoldOpc = ISD::SIGN_EXTEND;
9068       return DAG.getSelect(DL, VT, N0->getOperand(0),
9069                            DAG.getNode(FoldOpc, DL, VT, Op1),
9070                            DAG.getNode(FoldOpc, DL, VT, Op2));
9071     }
9072   }
9073
9074   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
9075   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
9076   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
9077   EVT SVT = VT.getScalarType();
9078   if (!(VT.isVector() && (!LegalTypes || TLI.isTypeLegal(SVT)) &&
9079       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
9080     return SDValue();
9081
9082   // We can fold this node into a build_vector.
9083   unsigned VTBits = SVT.getSizeInBits();
9084   unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
9085   SmallVector<SDValue, 8> Elts;
9086   unsigned NumElts = VT.getVectorNumElements();
9087
9088   // For zero-extensions, UNDEF elements still guarantee to have the upper
9089   // bits set to zero.
9090   bool IsZext =
9091       Opcode == ISD::ZERO_EXTEND || Opcode == ISD::ZERO_EXTEND_VECTOR_INREG;
9092
9093   for (unsigned i = 0; i != NumElts; ++i) {
9094     SDValue Op = N0.getOperand(i);
9095     if (Op.isUndef()) {
9096       Elts.push_back(IsZext ? DAG.getConstant(0, DL, SVT) : DAG.getUNDEF(SVT));
9097       continue;
9098     }
9099
9100     SDLoc DL(Op);
9101     // Get the constant value and if needed trunc it to the size of the type.
9102     // Nodes like build_vector might have constants wider than the scalar type.
9103     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
9104     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
9105       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
9106     else
9107       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
9108   }
9109
9110   return DAG.getBuildVector(VT, DL, Elts);
9111 }
9112
9113 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
9114 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
9115 // transformation. Returns true if extension are possible and the above
9116 // mentioned transformation is profitable.
9117 static bool ExtendUsesToFormExtLoad(EVT VT, SDNode *N, SDValue N0,
9118                                     unsigned ExtOpc,
9119                                     SmallVectorImpl<SDNode *> &ExtendNodes,
9120                                     const TargetLowering &TLI) {
9121   bool HasCopyToRegUses = false;
9122   bool isTruncFree = TLI.isTruncateFree(VT, N0.getValueType());
9123   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
9124                             UE = N0.getNode()->use_end();
9125        UI != UE; ++UI) {
9126     SDNode *User = *UI;
9127     if (User == N)
9128       continue;
9129     if (UI.getUse().getResNo() != N0.getResNo())
9130       continue;
9131     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
9132     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
9133       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
9134       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
9135         // Sign bits will be lost after a zext.
9136         return false;
9137       bool Add = false;
9138       for (unsigned i = 0; i != 2; ++i) {
9139         SDValue UseOp = User->getOperand(i);
9140         if (UseOp == N0)
9141           continue;
9142         if (!isa<ConstantSDNode>(UseOp))
9143           return false;
9144         Add = true;
9145       }
9146       if (Add)
9147         ExtendNodes.push_back(User);
9148       continue;
9149     }
9150     // If truncates aren't free and there are users we can't
9151     // extend, it isn't worthwhile.
9152     if (!isTruncFree)
9153       return false;
9154     // Remember if this value is live-out.
9155     if (User->getOpcode() == ISD::CopyToReg)
9156       HasCopyToRegUses = true;
9157   }
9158
9159   if (HasCopyToRegUses) {
9160     bool BothLiveOut = false;
9161     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
9162          UI != UE; ++UI) {
9163       SDUse &Use = UI.getUse();
9164       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
9165         BothLiveOut = true;
9166         break;
9167       }
9168     }
9169     if (BothLiveOut)
9170       // Both unextended and extended values are live out. There had better be
9171       // a good reason for the transformation.
9172       return ExtendNodes.size();
9173   }
9174   return true;
9175 }
9176
9177 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
9178                                   SDValue OrigLoad, SDValue ExtLoad,
9179                                   ISD::NodeType ExtType) {
9180   // Extend SetCC uses if necessary.
9181   SDLoc DL(ExtLoad);
9182   for (SDNode *SetCC : SetCCs) {
9183     SmallVector<SDValue, 4> Ops;
9184
9185     for (unsigned j = 0; j != 2; ++j) {
9186       SDValue SOp = SetCC->getOperand(j);
9187       if (SOp == OrigLoad)
9188         Ops.push_back(ExtLoad);
9189       else
9190         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
9191     }
9192
9193     Ops.push_back(SetCC->getOperand(2));
9194     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
9195   }
9196 }
9197
9198 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
9199 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
9200   SDValue N0 = N->getOperand(0);
9201   EVT DstVT = N->getValueType(0);
9202   EVT SrcVT = N0.getValueType();
9203
9204   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9205           N->getOpcode() == ISD::ZERO_EXTEND) &&
9206          "Unexpected node type (not an extend)!");
9207
9208   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
9209   // For example, on a target with legal v4i32, but illegal v8i32, turn:
9210   //   (v8i32 (sext (v8i16 (load x))))
9211   // into:
9212   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
9213   //                          (v4i32 (sextload (x + 16)))))
9214   // Where uses of the original load, i.e.:
9215   //   (v8i16 (load x))
9216   // are replaced with:
9217   //   (v8i16 (truncate
9218   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
9219   //                            (v4i32 (sextload (x + 16)))))))
9220   //
9221   // This combine is only applicable to illegal, but splittable, vectors.
9222   // All legal types, and illegal non-vector types, are handled elsewhere.
9223   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
9224   //
9225   if (N0->getOpcode() != ISD::LOAD)
9226     return SDValue();
9227
9228   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9229
9230   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
9231       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
9232       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
9233     return SDValue();
9234
9235   SmallVector<SDNode *, 4> SetCCs;
9236   if (!ExtendUsesToFormExtLoad(DstVT, N, N0, N->getOpcode(), SetCCs, TLI))
9237     return SDValue();
9238
9239   ISD::LoadExtType ExtType =
9240       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
9241
9242   // Try to split the vector types to get down to legal types.
9243   EVT SplitSrcVT = SrcVT;
9244   EVT SplitDstVT = DstVT;
9245   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
9246          SplitSrcVT.getVectorNumElements() > 1) {
9247     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
9248     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
9249   }
9250
9251   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
9252     return SDValue();
9253
9254   SDLoc DL(N);
9255   const unsigned NumSplits =
9256       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
9257   const unsigned Stride = SplitSrcVT.getStoreSize();
9258   SmallVector<SDValue, 4> Loads;
9259   SmallVector<SDValue, 4> Chains;
9260
9261   SDValue BasePtr = LN0->getBasePtr();
9262   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
9263     const unsigned Offset = Idx * Stride;
9264     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
9265
9266     SDValue SplitLoad = DAG.getExtLoad(
9267         ExtType, SDLoc(LN0), SplitDstVT, LN0->getChain(), BasePtr,
9268         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT, Align,
9269         LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
9270
9271     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
9272                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
9273
9274     Loads.push_back(SplitLoad.getValue(0));
9275     Chains.push_back(SplitLoad.getValue(1));
9276   }
9277
9278   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
9279   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
9280
9281   // Simplify TF.
9282   AddToWorklist(NewChain.getNode());
9283
9284   CombineTo(N, NewValue);
9285
9286   // Replace uses of the original load (before extension)
9287   // with a truncate of the concatenated sextloaded vectors.
9288   SDValue Trunc =
9289       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
9290   ExtendSetCCUses(SetCCs, N0, NewValue, (ISD::NodeType)N->getOpcode());
9291   CombineTo(N0.getNode(), Trunc, NewChain);
9292   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9293 }
9294
9295 // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9296 //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9297 SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
9298   assert(N->getOpcode() == ISD::ZERO_EXTEND);
9299   EVT VT = N->getValueType(0);
9300   EVT OrigVT = N->getOperand(0).getValueType();
9301   if (TLI.isZExtFree(OrigVT, VT))
9302     return SDValue();
9303
9304   // and/or/xor
9305   SDValue N0 = N->getOperand(0);
9306   if (!(N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9307         N0.getOpcode() == ISD::XOR) ||
9308       N0.getOperand(1).getOpcode() != ISD::Constant ||
9309       (LegalOperations && !TLI.isOperationLegal(N0.getOpcode(), VT)))
9310     return SDValue();
9311
9312   // shl/shr
9313   SDValue N1 = N0->getOperand(0);
9314   if (!(N1.getOpcode() == ISD::SHL || N1.getOpcode() == ISD::SRL) ||
9315       N1.getOperand(1).getOpcode() != ISD::Constant ||
9316       (LegalOperations && !TLI.isOperationLegal(N1.getOpcode(), VT)))
9317     return SDValue();
9318
9319   // load
9320   if (!isa<LoadSDNode>(N1.getOperand(0)))
9321     return SDValue();
9322   LoadSDNode *Load = cast<LoadSDNode>(N1.getOperand(0));
9323   EVT MemVT = Load->getMemoryVT();
9324   if (!TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) ||
9325       Load->getExtensionType() == ISD::SEXTLOAD || Load->isIndexed())
9326     return SDValue();
9327
9328
9329   // If the shift op is SHL, the logic op must be AND, otherwise the result
9330   // will be wrong.
9331   if (N1.getOpcode() == ISD::SHL && N0.getOpcode() != ISD::AND)
9332     return SDValue();
9333
9334   if (!N0.hasOneUse() || !N1.hasOneUse())
9335     return SDValue();
9336
9337   SmallVector<SDNode*, 4> SetCCs;
9338   if (!ExtendUsesToFormExtLoad(VT, N1.getNode(), N1.getOperand(0),
9339                                ISD::ZERO_EXTEND, SetCCs, TLI))
9340     return SDValue();
9341
9342   // Actually do the transformation.
9343   SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(Load), VT,
9344                                    Load->getChain(), Load->getBasePtr(),
9345                                    Load->getMemoryVT(), Load->getMemOperand());
9346
9347   SDLoc DL1(N1);
9348   SDValue Shift = DAG.getNode(N1.getOpcode(), DL1, VT, ExtLoad,
9349                               N1.getOperand(1));
9350
9351   APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9352   Mask = Mask.zext(VT.getSizeInBits());
9353   SDLoc DL0(N0);
9354   SDValue And = DAG.getNode(N0.getOpcode(), DL0, VT, Shift,
9355                             DAG.getConstant(Mask, DL0, VT));
9356
9357   ExtendSetCCUses(SetCCs, N1.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9358   CombineTo(N, And);
9359   if (SDValue(Load, 0).hasOneUse()) {
9360     DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), ExtLoad.getValue(1));
9361   } else {
9362     SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(Load),
9363                                 Load->getValueType(0), ExtLoad);
9364     CombineTo(Load, Trunc, ExtLoad.getValue(1));
9365   }
9366
9367   // N0 is dead at this point.
9368   recursivelyDeleteUnusedNodes(N0.getNode());
9369
9370   return SDValue(N,0); // Return N so it doesn't get rechecked!
9371 }
9372
9373 /// If we're narrowing or widening the result of a vector select and the final
9374 /// size is the same size as a setcc (compare) feeding the select, then try to
9375 /// apply the cast operation to the select's operands because matching vector
9376 /// sizes for a select condition and other operands should be more efficient.
9377 SDValue DAGCombiner::matchVSelectOpSizesWithSetCC(SDNode *Cast) {
9378   unsigned CastOpcode = Cast->getOpcode();
9379   assert((CastOpcode == ISD::SIGN_EXTEND || CastOpcode == ISD::ZERO_EXTEND ||
9380           CastOpcode == ISD::TRUNCATE || CastOpcode == ISD::FP_EXTEND ||
9381           CastOpcode == ISD::FP_ROUND) &&
9382          "Unexpected opcode for vector select narrowing/widening");
9383
9384   // We only do this transform before legal ops because the pattern may be
9385   // obfuscated by target-specific operations after legalization. Do not create
9386   // an illegal select op, however, because that may be difficult to lower.
9387   EVT VT = Cast->getValueType(0);
9388   if (LegalOperations || !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT))
9389     return SDValue();
9390
9391   SDValue VSel = Cast->getOperand(0);
9392   if (VSel.getOpcode() != ISD::VSELECT || !VSel.hasOneUse() ||
9393       VSel.getOperand(0).getOpcode() != ISD::SETCC)
9394     return SDValue();
9395
9396   // Does the setcc have the same vector size as the casted select?
9397   SDValue SetCC = VSel.getOperand(0);
9398   EVT SetCCVT = getSetCCResultType(SetCC.getOperand(0).getValueType());
9399   if (SetCCVT.getSizeInBits() != VT.getSizeInBits())
9400     return SDValue();
9401
9402   // cast (vsel (setcc X), A, B) --> vsel (setcc X), (cast A), (cast B)
9403   SDValue A = VSel.getOperand(1);
9404   SDValue B = VSel.getOperand(2);
9405   SDValue CastA, CastB;
9406   SDLoc DL(Cast);
9407   if (CastOpcode == ISD::FP_ROUND) {
9408     // FP_ROUND (fptrunc) has an extra flag operand to pass along.
9409     CastA = DAG.getNode(CastOpcode, DL, VT, A, Cast->getOperand(1));
9410     CastB = DAG.getNode(CastOpcode, DL, VT, B, Cast->getOperand(1));
9411   } else {
9412     CastA = DAG.getNode(CastOpcode, DL, VT, A);
9413     CastB = DAG.getNode(CastOpcode, DL, VT, B);
9414   }
9415   return DAG.getNode(ISD::VSELECT, DL, VT, SetCC, CastA, CastB);
9416 }
9417
9418 // fold ([s|z]ext ([s|z]extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9419 // fold ([s|z]ext (     extload x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9420 static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
9421                                      const TargetLowering &TLI, EVT VT,
9422                                      bool LegalOperations, SDNode *N,
9423                                      SDValue N0, ISD::LoadExtType ExtLoadType) {
9424   SDNode *N0Node = N0.getNode();
9425   bool isAExtLoad = (ExtLoadType == ISD::SEXTLOAD) ? ISD::isSEXTLoad(N0Node)
9426                                                    : ISD::isZEXTLoad(N0Node);
9427   if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
9428       !ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
9429     return SDValue();
9430
9431   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9432   EVT MemVT = LN0->getMemoryVT();
9433   if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
9434       !TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
9435     return SDValue();
9436
9437   SDValue ExtLoad =
9438       DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9439                      LN0->getBasePtr(), MemVT, LN0->getMemOperand());
9440   Combiner.CombineTo(N, ExtLoad);
9441   DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9442   if (LN0->use_empty())
9443     Combiner.recursivelyDeleteUnusedNodes(LN0);
9444   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9445 }
9446
9447 // fold ([s|z]ext (load x)) -> ([s|z]ext (truncate ([s|z]extload x)))
9448 // Only generate vector extloads when 1) they're legal, and 2) they are
9449 // deemed desirable by the target.
9450 static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
9451                                   const TargetLowering &TLI, EVT VT,
9452                                   bool LegalOperations, SDNode *N, SDValue N0,
9453                                   ISD::LoadExtType ExtLoadType,
9454                                   ISD::NodeType ExtOpc) {
9455   if (!ISD::isNON_EXTLoad(N0.getNode()) ||
9456       !ISD::isUNINDEXEDLoad(N0.getNode()) ||
9457       ((LegalOperations || VT.isVector() ||
9458         cast<LoadSDNode>(N0)->isVolatile()) &&
9459        !TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
9460     return {};
9461
9462   bool DoXform = true;
9463   SmallVector<SDNode *, 4> SetCCs;
9464   if (!N0.hasOneUse())
9465     DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ExtOpc, SetCCs, TLI);
9466   if (VT.isVector())
9467     DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
9468   if (!DoXform)
9469     return {};
9470
9471   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
9472   SDValue ExtLoad = DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
9473                                    LN0->getBasePtr(), N0.getValueType(),
9474                                    LN0->getMemOperand());
9475   Combiner.ExtendSetCCUses(SetCCs, N0, ExtLoad, ExtOpc);
9476   // If the load value is used only by N, replace it via CombineTo N.
9477   bool NoReplaceTrunc = SDValue(LN0, 0).hasOneUse();
9478   Combiner.CombineTo(N, ExtLoad);
9479   if (NoReplaceTrunc) {
9480     DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
9481     Combiner.recursivelyDeleteUnusedNodes(LN0);
9482   } else {
9483     SDValue Trunc =
9484         DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
9485     Combiner.CombineTo(LN0, Trunc, ExtLoad.getValue(1));
9486   }
9487   return SDValue(N, 0); // Return N so it doesn't get rechecked!
9488 }
9489
9490 static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
9491                                        bool LegalOperations) {
9492   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
9493           N->getOpcode() == ISD::ZERO_EXTEND) && "Expected sext or zext");
9494
9495   SDValue SetCC = N->getOperand(0);
9496   if (LegalOperations || SetCC.getOpcode() != ISD::SETCC ||
9497       !SetCC.hasOneUse() || SetCC.getValueType() != MVT::i1)
9498     return SDValue();
9499
9500   SDValue X = SetCC.getOperand(0);
9501   SDValue Ones = SetCC.getOperand(1);
9502   ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
9503   EVT VT = N->getValueType(0);
9504   EVT XVT = X.getValueType();
9505   // setge X, C is canonicalized to setgt, so we do not need to match that
9506   // pattern. The setlt sibling is folded in SimplifySelectCC() because it does
9507   // not require the 'not' op.
9508   if (CC == ISD::SETGT && isAllOnesConstant(Ones) && VT == XVT) {
9509     // Invert and smear/shift the sign bit:
9510     // sext i1 (setgt iN X, -1) --> sra (not X), (N - 1)
9511     // zext i1 (setgt iN X, -1) --> srl (not X), (N - 1)
9512     SDLoc DL(N);
9513     SDValue NotX = DAG.getNOT(DL, X, VT);
9514     SDValue ShiftAmount = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
9515     auto ShiftOpcode = N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SRA : ISD::SRL;
9516     return DAG.getNode(ShiftOpcode, DL, VT, NotX, ShiftAmount);
9517   }
9518   return SDValue();
9519 }
9520
9521 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
9522   SDValue N0 = N->getOperand(0);
9523   EVT VT = N->getValueType(0);
9524   SDLoc DL(N);
9525
9526   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9527     return Res;
9528
9529   // fold (sext (sext x)) -> (sext x)
9530   // fold (sext (aext x)) -> (sext x)
9531   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9532     return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N0.getOperand(0));
9533
9534   if (N0.getOpcode() == ISD::TRUNCATE) {
9535     // fold (sext (truncate (load x))) -> (sext (smaller load x))
9536     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
9537     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9538       SDNode *oye = N0.getOperand(0).getNode();
9539       if (NarrowLoad.getNode() != N0.getNode()) {
9540         CombineTo(N0.getNode(), NarrowLoad);
9541         // CombineTo deleted the truncate, if needed, but not what's under it.
9542         AddToWorklist(oye);
9543       }
9544       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
9545     }
9546
9547     // See if the value being truncated is already sign extended.  If so, just
9548     // eliminate the trunc/sext pair.
9549     SDValue Op = N0.getOperand(0);
9550     unsigned OpBits   = Op.getScalarValueSizeInBits();
9551     unsigned MidBits  = N0.getScalarValueSizeInBits();
9552     unsigned DestBits = VT.getScalarSizeInBits();
9553     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
9554
9555     if (OpBits == DestBits) {
9556       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
9557       // bits, it is already ready.
9558       if (NumSignBits > DestBits-MidBits)
9559         return Op;
9560     } else if (OpBits < DestBits) {
9561       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
9562       // bits, just sext from i32.
9563       if (NumSignBits > OpBits-MidBits)
9564         return DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op);
9565     } else {
9566       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
9567       // bits, just truncate to i32.
9568       if (NumSignBits > OpBits-MidBits)
9569         return DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
9570     }
9571
9572     // fold (sext (truncate x)) -> (sextinreg x).
9573     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
9574                                                  N0.getValueType())) {
9575       if (OpBits < DestBits)
9576         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
9577       else if (OpBits > DestBits)
9578         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
9579       return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Op,
9580                          DAG.getValueType(N0.getValueType()));
9581     }
9582   }
9583
9584   // Try to simplify (sext (load x)).
9585   if (SDValue foldedExt =
9586           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9587                              ISD::SEXTLOAD, ISD::SIGN_EXTEND))
9588     return foldedExt;
9589
9590   // fold (sext (load x)) to multiple smaller sextloads.
9591   // Only on illegal but splittable vectors.
9592   if (SDValue ExtLoad = CombineExtLoad(N))
9593     return ExtLoad;
9594
9595   // Try to simplify (sext (sextload x)).
9596   if (SDValue foldedExt = tryToFoldExtOfExtload(
9597           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::SEXTLOAD))
9598     return foldedExt;
9599
9600   // fold (sext (and/or/xor (load x), cst)) ->
9601   //      (and/or/xor (sextload x), (sext cst))
9602   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9603        N0.getOpcode() == ISD::XOR) &&
9604       isa<LoadSDNode>(N0.getOperand(0)) &&
9605       N0.getOperand(1).getOpcode() == ISD::Constant &&
9606       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9607     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9608     EVT MemVT = LN00->getMemoryVT();
9609     if (TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT) &&
9610       LN00->getExtensionType() != ISD::ZEXTLOAD && LN00->isUnindexed()) {
9611       SmallVector<SDNode*, 4> SetCCs;
9612       bool DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9613                                              ISD::SIGN_EXTEND, SetCCs, TLI);
9614       if (DoXform) {
9615         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN00), VT,
9616                                          LN00->getChain(), LN00->getBasePtr(),
9617                                          LN00->getMemoryVT(),
9618                                          LN00->getMemOperand());
9619         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9620         Mask = Mask.sext(VT.getSizeInBits());
9621         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9622                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9623         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::SIGN_EXTEND);
9624         bool NoReplaceTruncAnd = !N0.hasOneUse();
9625         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9626         CombineTo(N, And);
9627         // If N0 has multiple uses, change other uses as well.
9628         if (NoReplaceTruncAnd) {
9629           SDValue TruncAnd =
9630               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9631           CombineTo(N0.getNode(), TruncAnd);
9632         }
9633         if (NoReplaceTrunc) {
9634           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9635         } else {
9636           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9637                                       LN00->getValueType(0), ExtLoad);
9638           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9639         }
9640         return SDValue(N,0); // Return N so it doesn't get rechecked!
9641       }
9642     }
9643   }
9644
9645   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9646     return V;
9647
9648   if (N0.getOpcode() == ISD::SETCC) {
9649     SDValue N00 = N0.getOperand(0);
9650     SDValue N01 = N0.getOperand(1);
9651     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
9652     EVT N00VT = N0.getOperand(0).getValueType();
9653
9654     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
9655     // Only do this before legalize for now.
9656     if (VT.isVector() && !LegalOperations &&
9657         TLI.getBooleanContents(N00VT) ==
9658             TargetLowering::ZeroOrNegativeOneBooleanContent) {
9659       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
9660       // of the same size as the compared operands. Only optimize sext(setcc())
9661       // if this is the case.
9662       EVT SVT = getSetCCResultType(N00VT);
9663
9664       // If we already have the desired type, don't change it.
9665       if (SVT != N0.getValueType()) {
9666         // We know that the # elements of the results is the same as the
9667         // # elements of the compare (and the # elements of the compare result
9668         // for that matter).  Check to see that they are the same size.  If so,
9669         // we know that the element size of the sext'd result matches the
9670         // element size of the compare operands.
9671         if (VT.getSizeInBits() == SVT.getSizeInBits())
9672           return DAG.getSetCC(DL, VT, N00, N01, CC);
9673
9674         // If the desired elements are smaller or larger than the source
9675         // elements, we can use a matching integer vector type and then
9676         // truncate/sign extend.
9677         EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger();
9678         if (SVT == MatchingVecType) {
9679           SDValue VsetCC = DAG.getSetCC(DL, MatchingVecType, N00, N01, CC);
9680           return DAG.getSExtOrTrunc(VsetCC, DL, VT);
9681         }
9682       }
9683     }
9684
9685     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
9686     // Here, T can be 1 or -1, depending on the type of the setcc and
9687     // getBooleanContents().
9688     unsigned SetCCWidth = N0.getScalarValueSizeInBits();
9689
9690     // To determine the "true" side of the select, we need to know the high bit
9691     // of the value returned by the setcc if it evaluates to true.
9692     // If the type of the setcc is i1, then the true case of the select is just
9693     // sext(i1 1), that is, -1.
9694     // If the type of the setcc is larger (say, i8) then the value of the high
9695     // bit depends on getBooleanContents(), so ask TLI for a real "true" value
9696     // of the appropriate width.
9697     SDValue ExtTrueVal = (SetCCWidth == 1)
9698                              ? DAG.getAllOnesConstant(DL, VT)
9699                              : DAG.getBoolConstant(true, DL, VT, N00VT);
9700     SDValue Zero = DAG.getConstant(0, DL, VT);
9701     if (SDValue SCC =
9702             SimplifySelectCC(DL, N00, N01, ExtTrueVal, Zero, CC, true))
9703       return SCC;
9704
9705     if (!VT.isVector() && !TLI.convertSelectOfConstantsToMath(VT)) {
9706       EVT SetCCVT = getSetCCResultType(N00VT);
9707       // Don't do this transform for i1 because there's a select transform
9708       // that would reverse it.
9709       // TODO: We should not do this transform at all without a target hook
9710       // because a sext is likely cheaper than a select?
9711       if (SetCCVT.getScalarSizeInBits() != 1 &&
9712           (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, N00VT))) {
9713         SDValue SetCC = DAG.getSetCC(DL, SetCCVT, N00, N01, CC);
9714         return DAG.getSelect(DL, VT, SetCC, ExtTrueVal, Zero);
9715       }
9716     }
9717   }
9718
9719   // fold (sext x) -> (zext x) if the sign bit is known zero.
9720   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
9721       DAG.SignBitIsZero(N0))
9722     return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0);
9723
9724   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
9725     return NewVSel;
9726
9727   // Eliminate this sign extend by doing a negation in the destination type:
9728   // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
9729   if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
9730       isNullOrNullSplat(N0.getOperand(0)) &&
9731       N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
9732       TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
9733     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
9734     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
9735   }
9736   // Eliminate this sign extend by doing a decrement in the destination type:
9737   // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
9738   if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
9739       isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
9740       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
9741       TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
9742     SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
9743     return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
9744   }
9745
9746   return SDValue();
9747 }
9748
9749 // isTruncateOf - If N is a truncate of some other value, return true, record
9750 // the value being truncated in Op and which of Op's bits are zero/one in Known.
9751 // This function computes KnownBits to avoid a duplicated call to
9752 // computeKnownBits in the caller.
9753 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
9754                          KnownBits &Known) {
9755   if (N->getOpcode() == ISD::TRUNCATE) {
9756     Op = N->getOperand(0);
9757     Known = DAG.computeKnownBits(Op);
9758     return true;
9759   }
9760
9761   if (N.getOpcode() != ISD::SETCC ||
9762       N.getValueType().getScalarType() != MVT::i1 ||
9763       cast<CondCodeSDNode>(N.getOperand(2))->get() != ISD::SETNE)
9764     return false;
9765
9766   SDValue Op0 = N->getOperand(0);
9767   SDValue Op1 = N->getOperand(1);
9768   assert(Op0.getValueType() == Op1.getValueType());
9769
9770   if (isNullOrNullSplat(Op0))
9771     Op = Op1;
9772   else if (isNullOrNullSplat(Op1))
9773     Op = Op0;
9774   else
9775     return false;
9776
9777   Known = DAG.computeKnownBits(Op);
9778
9779   return (Known.Zero | 1).isAllOnesValue();
9780 }
9781
9782 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
9783   SDValue N0 = N->getOperand(0);
9784   EVT VT = N->getValueType(0);
9785
9786   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
9787     return Res;
9788
9789   // fold (zext (zext x)) -> (zext x)
9790   // fold (zext (aext x)) -> (zext x)
9791   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
9792     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
9793                        N0.getOperand(0));
9794
9795   // fold (zext (truncate x)) -> (zext x) or
9796   //      (zext (truncate x)) -> (truncate x)
9797   // This is valid when the truncated bits of x are already zero.
9798   SDValue Op;
9799   KnownBits Known;
9800   if (isTruncateOf(DAG, N0, Op, Known)) {
9801     APInt TruncatedBits =
9802       (Op.getScalarValueSizeInBits() == N0.getScalarValueSizeInBits()) ?
9803       APInt(Op.getScalarValueSizeInBits(), 0) :
9804       APInt::getBitsSet(Op.getScalarValueSizeInBits(),
9805                         N0.getScalarValueSizeInBits(),
9806                         std::min(Op.getScalarValueSizeInBits(),
9807                                  VT.getScalarSizeInBits()));
9808     if (TruncatedBits.isSubsetOf(Known.Zero))
9809       return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9810   }
9811
9812   // fold (zext (truncate x)) -> (and x, mask)
9813   if (N0.getOpcode() == ISD::TRUNCATE) {
9814     // fold (zext (truncate (load x))) -> (zext (smaller load x))
9815     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
9816     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
9817       SDNode *oye = N0.getOperand(0).getNode();
9818       if (NarrowLoad.getNode() != N0.getNode()) {
9819         CombineTo(N0.getNode(), NarrowLoad);
9820         // CombineTo deleted the truncate, if needed, but not what's under it.
9821         AddToWorklist(oye);
9822       }
9823       return SDValue(N, 0); // Return N so it doesn't get rechecked!
9824     }
9825
9826     EVT SrcVT = N0.getOperand(0).getValueType();
9827     EVT MinVT = N0.getValueType();
9828
9829     // Try to mask before the extension to avoid having to generate a larger mask,
9830     // possibly over several sub-vectors.
9831     if (SrcVT.bitsLT(VT) && VT.isVector()) {
9832       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
9833                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
9834         SDValue Op = N0.getOperand(0);
9835         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9836         AddToWorklist(Op.getNode());
9837         SDValue ZExtOrTrunc = DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
9838         // Transfer the debug info; the new node is equivalent to N0.
9839         DAG.transferDbgValues(N0, ZExtOrTrunc);
9840         return ZExtOrTrunc;
9841       }
9842     }
9843
9844     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
9845       SDValue Op = DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
9846       AddToWorklist(Op.getNode());
9847       SDValue And = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
9848       // We may safely transfer the debug info describing the truncate node over
9849       // to the equivalent and operation.
9850       DAG.transferDbgValues(N0, And);
9851       return And;
9852     }
9853   }
9854
9855   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
9856   // if either of the casts is not free.
9857   if (N0.getOpcode() == ISD::AND &&
9858       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
9859       N0.getOperand(1).getOpcode() == ISD::Constant &&
9860       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
9861                            N0.getValueType()) ||
9862        !TLI.isZExtFree(N0.getValueType(), VT))) {
9863     SDValue X = N0.getOperand(0).getOperand(0);
9864     X = DAG.getAnyExtOrTrunc(X, SDLoc(X), VT);
9865     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9866     Mask = Mask.zext(VT.getSizeInBits());
9867     SDLoc DL(N);
9868     return DAG.getNode(ISD::AND, DL, VT,
9869                        X, DAG.getConstant(Mask, DL, VT));
9870   }
9871
9872   // Try to simplify (zext (load x)).
9873   if (SDValue foldedExt =
9874           tryToFoldExtOfLoad(DAG, *this, TLI, VT, LegalOperations, N, N0,
9875                              ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
9876     return foldedExt;
9877
9878   // fold (zext (load x)) to multiple smaller zextloads.
9879   // Only on illegal but splittable vectors.
9880   if (SDValue ExtLoad = CombineExtLoad(N))
9881     return ExtLoad;
9882
9883   // fold (zext (and/or/xor (load x), cst)) ->
9884   //      (and/or/xor (zextload x), (zext cst))
9885   // Unless (and (load x) cst) will match as a zextload already and has
9886   // additional users.
9887   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
9888        N0.getOpcode() == ISD::XOR) &&
9889       isa<LoadSDNode>(N0.getOperand(0)) &&
9890       N0.getOperand(1).getOpcode() == ISD::Constant &&
9891       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
9892     LoadSDNode *LN00 = cast<LoadSDNode>(N0.getOperand(0));
9893     EVT MemVT = LN00->getMemoryVT();
9894     if (TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT) &&
9895         LN00->getExtensionType() != ISD::SEXTLOAD && LN00->isUnindexed()) {
9896       bool DoXform = true;
9897       SmallVector<SDNode*, 4> SetCCs;
9898       if (!N0.hasOneUse()) {
9899         if (N0.getOpcode() == ISD::AND) {
9900           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
9901           EVT LoadResultTy = AndC->getValueType(0);
9902           EVT ExtVT;
9903           if (isAndLoadExtLoad(AndC, LN00, LoadResultTy, ExtVT))
9904             DoXform = false;
9905         }
9906       }
9907       if (DoXform)
9908         DoXform = ExtendUsesToFormExtLoad(VT, N0.getNode(), N0.getOperand(0),
9909                                           ISD::ZERO_EXTEND, SetCCs, TLI);
9910       if (DoXform) {
9911         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN00), VT,
9912                                          LN00->getChain(), LN00->getBasePtr(),
9913                                          LN00->getMemoryVT(),
9914                                          LN00->getMemOperand());
9915         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
9916         Mask = Mask.zext(VT.getSizeInBits());
9917         SDLoc DL(N);
9918         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
9919                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
9920         ExtendSetCCUses(SetCCs, N0.getOperand(0), ExtLoad, ISD::ZERO_EXTEND);
9921         bool NoReplaceTruncAnd = !N0.hasOneUse();
9922         bool NoReplaceTrunc = SDValue(LN00, 0).hasOneUse();
9923         CombineTo(N, And);
9924         // If N0 has multiple uses, change other uses as well.
9925         if (NoReplaceTruncAnd) {
9926           SDValue TruncAnd =
9927               DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(), And);
9928           CombineTo(N0.getNode(), TruncAnd);
9929         }
9930         if (NoReplaceTrunc) {
9931           DAG.ReplaceAllUsesOfValueWith(SDValue(LN00, 1), ExtLoad.getValue(1));
9932         } else {
9933           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(LN00),
9934                                       LN00->getValueType(0), ExtLoad);
9935           CombineTo(LN00, Trunc, ExtLoad.getValue(1));
9936         }
9937         return SDValue(N,0); // Return N so it doesn't get rechecked!
9938       }
9939     }
9940   }
9941
9942   // fold (zext (and/or/xor (shl/shr (load x), cst), cst)) ->
9943   //      (and/or/xor (shl/shr (zextload x), (zext cst)), (zext cst))
9944   if (SDValue ZExtLoad = CombineZExtLogicopShiftLoad(N))
9945     return ZExtLoad;
9946
9947   // Try to simplify (zext (zextload x)).
9948   if (SDValue foldedExt = tryToFoldExtOfExtload(
9949           DAG, *this, TLI, VT, LegalOperations, N, N0, ISD::ZEXTLOAD))
9950     return foldedExt;
9951
9952   if (SDValue V = foldExtendedSignBitTest(N, DAG, LegalOperations))
9953     return V;
9954
9955   if (N0.getOpcode() == ISD::SETCC) {
9956     // Only do this before legalize for now.
9957     if (!LegalOperations && VT.isVector() &&
9958         N0.getValueType().getVectorElementType() == MVT::i1) {
9959       EVT N00VT = N0.getOperand(0).getValueType();
9960       if (getSetCCResultType(N00VT) == N0.getValueType())
9961         return SDValue();
9962
9963       // We know that the # elements of the results is the same as the #
9964       // elements of the compare (and the # elements of the compare result for
9965       // that matter). Check to see that they are the same size. If so, we know
9966       // that the element size of the sext'd result matches the element size of
9967       // the compare operands.
9968       SDLoc DL(N);
9969       SDValue VecOnes = DAG.getConstant(1, DL, VT);
9970       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
9971         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
9972         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
9973                                      N0.getOperand(1), N0.getOperand(2));
9974         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
9975       }
9976
9977       // If the desired elements are smaller or larger than the source
9978       // elements we can use a matching integer vector type and then
9979       // truncate/sign extend.
9980       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
9981       SDValue VsetCC =
9982           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
9983                       N0.getOperand(1), N0.getOperand(2));
9984       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
9985                          VecOnes);
9986     }
9987
9988     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
9989     SDLoc DL(N);
9990     if (SDValue SCC = SimplifySelectCC(
9991             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
9992             DAG.getConstant(0, DL, VT),
9993             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
9994       return SCC;
9995   }
9996
9997   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
9998   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
9999       isa<ConstantSDNode>(N0.getOperand(1)) &&
10000       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
10001       N0.hasOneUse()) {
10002     SDValue ShAmt = N0.getOperand(1);
10003     if (N0.getOpcode() == ISD::SHL) {
10004       SDValue InnerZExt = N0.getOperand(0);
10005       // If the original shl may be shifting out bits, do not perform this
10006       // transformation.
10007       unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
10008         InnerZExt.getOperand(0).getValueSizeInBits();
10009       if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
10010         return SDValue();
10011     }
10012
10013     SDLoc DL(N);
10014
10015     // Ensure that the shift amount is wide enough for the shifted value.
10016     if (VT.getSizeInBits() >= 256)
10017       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
10018
10019     return DAG.getNode(N0.getOpcode(), DL, VT,
10020                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
10021                        ShAmt);
10022   }
10023
10024   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10025     return NewVSel;
10026
10027   return SDValue();
10028 }
10029
10030 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
10031   SDValue N0 = N->getOperand(0);
10032   EVT VT = N->getValueType(0);
10033
10034   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10035     return Res;
10036
10037   // fold (aext (aext x)) -> (aext x)
10038   // fold (aext (zext x)) -> (zext x)
10039   // fold (aext (sext x)) -> (sext x)
10040   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
10041       N0.getOpcode() == ISD::ZERO_EXTEND ||
10042       N0.getOpcode() == ISD::SIGN_EXTEND)
10043     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10044
10045   // fold (aext (truncate (load x))) -> (aext (smaller load x))
10046   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
10047   if (N0.getOpcode() == ISD::TRUNCATE) {
10048     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
10049       SDNode *oye = N0.getOperand(0).getNode();
10050       if (NarrowLoad.getNode() != N0.getNode()) {
10051         CombineTo(N0.getNode(), NarrowLoad);
10052         // CombineTo deleted the truncate, if needed, but not what's under it.
10053         AddToWorklist(oye);
10054       }
10055       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10056     }
10057   }
10058
10059   // fold (aext (truncate x))
10060   if (N0.getOpcode() == ISD::TRUNCATE)
10061     return DAG.getAnyExtOrTrunc(N0.getOperand(0), SDLoc(N), VT);
10062
10063   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
10064   // if the trunc is not free.
10065   if (N0.getOpcode() == ISD::AND &&
10066       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
10067       N0.getOperand(1).getOpcode() == ISD::Constant &&
10068       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
10069                           N0.getValueType())) {
10070     SDLoc DL(N);
10071     SDValue X = N0.getOperand(0).getOperand(0);
10072     X = DAG.getAnyExtOrTrunc(X, DL, VT);
10073     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
10074     Mask = Mask.zext(VT.getSizeInBits());
10075     return DAG.getNode(ISD::AND, DL, VT,
10076                        X, DAG.getConstant(Mask, DL, VT));
10077   }
10078
10079   // fold (aext (load x)) -> (aext (truncate (extload x)))
10080   // None of the supported targets knows how to perform load and any_ext
10081   // on vectors in one instruction.  We only perform this transformation on
10082   // scalars.
10083   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
10084       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10085       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
10086     bool DoXform = true;
10087     SmallVector<SDNode*, 4> SetCCs;
10088     if (!N0.hasOneUse())
10089       DoXform = ExtendUsesToFormExtLoad(VT, N, N0, ISD::ANY_EXTEND, SetCCs,
10090                                         TLI);
10091     if (DoXform) {
10092       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10093       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
10094                                        LN0->getChain(),
10095                                        LN0->getBasePtr(), N0.getValueType(),
10096                                        LN0->getMemOperand());
10097       ExtendSetCCUses(SetCCs, N0, ExtLoad, ISD::ANY_EXTEND);
10098       // If the load value is used only by N, replace it via CombineTo N.
10099       bool NoReplaceTrunc = N0.hasOneUse();
10100       CombineTo(N, ExtLoad);
10101       if (NoReplaceTrunc) {
10102         DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10103         recursivelyDeleteUnusedNodes(LN0);
10104       } else {
10105         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
10106                                     N0.getValueType(), ExtLoad);
10107         CombineTo(LN0, Trunc, ExtLoad.getValue(1));
10108       }
10109       return SDValue(N, 0); // Return N so it doesn't get rechecked!
10110     }
10111   }
10112
10113   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
10114   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
10115   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
10116   if (N0.getOpcode() == ISD::LOAD && !ISD::isNON_EXTLoad(N0.getNode()) &&
10117       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
10118     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10119     ISD::LoadExtType ExtType = LN0->getExtensionType();
10120     EVT MemVT = LN0->getMemoryVT();
10121     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
10122       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
10123                                        VT, LN0->getChain(), LN0->getBasePtr(),
10124                                        MemVT, LN0->getMemOperand());
10125       CombineTo(N, ExtLoad);
10126       DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
10127       recursivelyDeleteUnusedNodes(LN0);
10128       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10129     }
10130   }
10131
10132   if (N0.getOpcode() == ISD::SETCC) {
10133     // For vectors:
10134     // aext(setcc) -> vsetcc
10135     // aext(setcc) -> truncate(vsetcc)
10136     // aext(setcc) -> aext(vsetcc)
10137     // Only do this before legalize for now.
10138     if (VT.isVector() && !LegalOperations) {
10139       EVT N00VT = N0.getOperand(0).getValueType();
10140       if (getSetCCResultType(N00VT) == N0.getValueType())
10141         return SDValue();
10142
10143       // We know that the # elements of the results is the same as the
10144       // # elements of the compare (and the # elements of the compare result
10145       // for that matter).  Check to see that they are the same size.  If so,
10146       // we know that the element size of the sext'd result matches the
10147       // element size of the compare operands.
10148       if (VT.getSizeInBits() == N00VT.getSizeInBits())
10149         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
10150                              N0.getOperand(1),
10151                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
10152
10153       // If the desired elements are smaller or larger than the source
10154       // elements we can use a matching integer vector type and then
10155       // truncate/any extend
10156       EVT MatchingVectorType = N00VT.changeVectorElementTypeToInteger();
10157       SDValue VsetCC =
10158         DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
10159                       N0.getOperand(1),
10160                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
10161       return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
10162     }
10163
10164     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
10165     SDLoc DL(N);
10166     if (SDValue SCC = SimplifySelectCC(
10167             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
10168             DAG.getConstant(0, DL, VT),
10169             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
10170       return SCC;
10171   }
10172
10173   return SDValue();
10174 }
10175
10176 SDValue DAGCombiner::visitAssertExt(SDNode *N) {
10177   unsigned Opcode = N->getOpcode();
10178   SDValue N0 = N->getOperand(0);
10179   SDValue N1 = N->getOperand(1);
10180   EVT AssertVT = cast<VTSDNode>(N1)->getVT();
10181
10182   // fold (assert?ext (assert?ext x, vt), vt) -> (assert?ext x, vt)
10183   if (N0.getOpcode() == Opcode &&
10184       AssertVT == cast<VTSDNode>(N0.getOperand(1))->getVT())
10185     return N0;
10186
10187   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10188       N0.getOperand(0).getOpcode() == Opcode) {
10189     // We have an assert, truncate, assert sandwich. Make one stronger assert
10190     // by asserting on the smallest asserted type to the larger source type.
10191     // This eliminates the later assert:
10192     // assert (trunc (assert X, i8) to iN), i1 --> trunc (assert X, i1) to iN
10193     // assert (trunc (assert X, i1) to iN), i8 --> trunc (assert X, i1) to iN
10194     SDValue BigA = N0.getOperand(0);
10195     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10196     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10197            "Asserting zero/sign-extended bits to a type larger than the "
10198            "truncated destination does not provide information");
10199
10200     SDLoc DL(N);
10201     EVT MinAssertVT = AssertVT.bitsLT(BigA_AssertVT) ? AssertVT : BigA_AssertVT;
10202     SDValue MinAssertVTVal = DAG.getValueType(MinAssertVT);
10203     SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10204                                     BigA.getOperand(0), MinAssertVTVal);
10205     return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10206   }
10207
10208   // If we have (AssertZext (truncate (AssertSext X, iX)), iY) and Y is smaller
10209   // than X. Just move the AssertZext in front of the truncate and drop the
10210   // AssertSExt.
10211   if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse() &&
10212       N0.getOperand(0).getOpcode() == ISD::AssertSext &&
10213       Opcode == ISD::AssertZext) {
10214     SDValue BigA = N0.getOperand(0);
10215     EVT BigA_AssertVT = cast<VTSDNode>(BigA.getOperand(1))->getVT();
10216     assert(BigA_AssertVT.bitsLE(N0.getValueType()) &&
10217            "Asserting zero/sign-extended bits to a type larger than the "
10218            "truncated destination does not provide information");
10219
10220     if (AssertVT.bitsLT(BigA_AssertVT)) {
10221       SDLoc DL(N);
10222       SDValue NewAssert = DAG.getNode(Opcode, DL, BigA.getValueType(),
10223                                       BigA.getOperand(0), N1);
10224       return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewAssert);
10225     }
10226   }
10227
10228   return SDValue();
10229 }
10230
10231 /// If the result of a wider load is shifted to right of N  bits and then
10232 /// truncated to a narrower type and where N is a multiple of number of bits of
10233 /// the narrower type, transform it to a narrower load from address + N / num of
10234 /// bits of new type. Also narrow the load if the result is masked with an AND
10235 /// to effectively produce a smaller type. If the result is to be extended, also
10236 /// fold the extension to form a extending load.
10237 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
10238   unsigned Opc = N->getOpcode();
10239
10240   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
10241   SDValue N0 = N->getOperand(0);
10242   EVT VT = N->getValueType(0);
10243   EVT ExtVT = VT;
10244
10245   // This transformation isn't valid for vector loads.
10246   if (VT.isVector())
10247     return SDValue();
10248
10249   unsigned ShAmt = 0;
10250   bool HasShiftedOffset = false;
10251   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
10252   // extended to VT.
10253   if (Opc == ISD::SIGN_EXTEND_INREG) {
10254     ExtType = ISD::SEXTLOAD;
10255     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
10256   } else if (Opc == ISD::SRL) {
10257     // Another special-case: SRL is basically zero-extending a narrower value,
10258     // or it maybe shifting a higher subword, half or byte into the lowest
10259     // bits.
10260     ExtType = ISD::ZEXTLOAD;
10261     N0 = SDValue(N, 0);
10262
10263     auto *LN0 = dyn_cast<LoadSDNode>(N0.getOperand(0));
10264     auto *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
10265     if (!N01 || !LN0)
10266       return SDValue();
10267
10268     uint64_t ShiftAmt = N01->getZExtValue();
10269     uint64_t MemoryWidth = LN0->getMemoryVT().getSizeInBits();
10270     if (LN0->getExtensionType() != ISD::SEXTLOAD && MemoryWidth > ShiftAmt)
10271       ExtVT = EVT::getIntegerVT(*DAG.getContext(), MemoryWidth - ShiftAmt);
10272     else
10273       ExtVT = EVT::getIntegerVT(*DAG.getContext(),
10274                                 VT.getSizeInBits() - ShiftAmt);
10275   } else if (Opc == ISD::AND) {
10276     // An AND with a constant mask is the same as a truncate + zero-extend.
10277     auto AndC = dyn_cast<ConstantSDNode>(N->getOperand(1));
10278     if (!AndC)
10279       return SDValue();
10280
10281     const APInt &Mask = AndC->getAPIntValue();
10282     unsigned ActiveBits = 0;
10283     if (Mask.isMask()) {
10284       ActiveBits = Mask.countTrailingOnes();
10285     } else if (Mask.isShiftedMask()) {
10286       ShAmt = Mask.countTrailingZeros();
10287       APInt ShiftedMask = Mask.lshr(ShAmt);
10288       ActiveBits = ShiftedMask.countTrailingOnes();
10289       HasShiftedOffset = true;
10290     } else
10291       return SDValue();
10292
10293     ExtType = ISD::ZEXTLOAD;
10294     ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
10295   }
10296
10297   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
10298     SDValue SRL = N0;
10299     if (auto *ConstShift = dyn_cast<ConstantSDNode>(SRL.getOperand(1))) {
10300       ShAmt = ConstShift->getZExtValue();
10301       unsigned EVTBits = ExtVT.getSizeInBits();
10302       // Is the shift amount a multiple of size of VT?
10303       if ((ShAmt & (EVTBits-1)) == 0) {
10304         N0 = N0.getOperand(0);
10305         // Is the load width a multiple of size of VT?
10306         if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
10307           return SDValue();
10308       }
10309
10310       // At this point, we must have a load or else we can't do the transform.
10311       if (!isa<LoadSDNode>(N0)) return SDValue();
10312
10313       auto *LN0 = cast<LoadSDNode>(N0);
10314
10315       // Because a SRL must be assumed to *need* to zero-extend the high bits
10316       // (as opposed to anyext the high bits), we can't combine the zextload
10317       // lowering of SRL and an sextload.
10318       if (LN0->getExtensionType() == ISD::SEXTLOAD)
10319         return SDValue();
10320
10321       // If the shift amount is larger than the input type then we're not
10322       // accessing any of the loaded bytes.  If the load was a zextload/extload
10323       // then the result of the shift+trunc is zero/undef (handled elsewhere).
10324       if (ShAmt >= LN0->getMemoryVT().getSizeInBits())
10325         return SDValue();
10326
10327       // If the SRL is only used by a masking AND, we may be able to adjust
10328       // the ExtVT to make the AND redundant.
10329       SDNode *Mask = *(SRL->use_begin());
10330       if (Mask->getOpcode() == ISD::AND &&
10331           isa<ConstantSDNode>(Mask->getOperand(1))) {
10332         const APInt &ShiftMask =
10333           cast<ConstantSDNode>(Mask->getOperand(1))->getAPIntValue();
10334         if (ShiftMask.isMask()) {
10335           EVT MaskedVT = EVT::getIntegerVT(*DAG.getContext(),
10336                                            ShiftMask.countTrailingOnes());
10337           // If the mask is smaller, recompute the type.
10338           if ((ExtVT.getSizeInBits() > MaskedVT.getSizeInBits()) &&
10339               TLI.isLoadExtLegal(ExtType, N0.getValueType(), MaskedVT))
10340             ExtVT = MaskedVT;
10341         }
10342       }
10343     }
10344   }
10345
10346   // If the load is shifted left (and the result isn't shifted back right),
10347   // we can fold the truncate through the shift.
10348   unsigned ShLeftAmt = 0;
10349   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10350       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
10351     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
10352       ShLeftAmt = N01->getZExtValue();
10353       N0 = N0.getOperand(0);
10354     }
10355   }
10356
10357   // If we haven't found a load, we can't narrow it.
10358   if (!isa<LoadSDNode>(N0))
10359     return SDValue();
10360
10361   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10362   if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
10363     return SDValue();
10364
10365   auto AdjustBigEndianShift = [&](unsigned ShAmt) {
10366     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
10367     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
10368     return LVTStoreBits - EVTStoreBits - ShAmt;
10369   };
10370
10371   // For big endian targets, we need to adjust the offset to the pointer to
10372   // load the correct bytes.
10373   if (DAG.getDataLayout().isBigEndian())
10374     ShAmt = AdjustBigEndianShift(ShAmt);
10375
10376   EVT PtrType = N0.getOperand(1).getValueType();
10377   uint64_t PtrOff = ShAmt / 8;
10378   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
10379   SDLoc DL(LN0);
10380   // The original load itself didn't wrap, so an offset within it doesn't.
10381   SDNodeFlags Flags;
10382   Flags.setNoUnsignedWrap(true);
10383   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
10384                                PtrType, LN0->getBasePtr(),
10385                                DAG.getConstant(PtrOff, DL, PtrType),
10386                                Flags);
10387   AddToWorklist(NewPtr.getNode());
10388
10389   SDValue Load;
10390   if (ExtType == ISD::NON_EXTLOAD)
10391     Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
10392                        LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
10393                        LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
10394   else
10395     Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
10396                           LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
10397                           NewAlign, LN0->getMemOperand()->getFlags(),
10398                           LN0->getAAInfo());
10399
10400   // Replace the old load's chain with the new load's chain.
10401   WorklistRemover DeadNodes(*this);
10402   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
10403
10404   // Shift the result left, if we've swallowed a left shift.
10405   SDValue Result = Load;
10406   if (ShLeftAmt != 0) {
10407     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
10408     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
10409       ShImmTy = VT;
10410     // If the shift amount is as large as the result size (but, presumably,
10411     // no larger than the source) then the useful bits of the result are
10412     // zero; we can't simply return the shortened shift, because the result
10413     // of that operation is undefined.
10414     if (ShLeftAmt >= VT.getSizeInBits())
10415       Result = DAG.getConstant(0, DL, VT);
10416     else
10417       Result = DAG.getNode(ISD::SHL, DL, VT,
10418                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
10419   }
10420
10421   if (HasShiftedOffset) {
10422     // Recalculate the shift amount after it has been altered to calculate
10423     // the offset.
10424     if (DAG.getDataLayout().isBigEndian())
10425       ShAmt = AdjustBigEndianShift(ShAmt);
10426
10427     // We're using a shifted mask, so the load now has an offset. This means
10428     // that data has been loaded into the lower bytes than it would have been
10429     // before, so we need to shl the loaded data into the correct position in the
10430     // register.
10431     SDValue ShiftC = DAG.getConstant(ShAmt, DL, VT);
10432     Result = DAG.getNode(ISD::SHL, DL, VT, Result, ShiftC);
10433     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
10434   }
10435
10436   // Return the new loaded value.
10437   return Result;
10438 }
10439
10440 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
10441   SDValue N0 = N->getOperand(0);
10442   SDValue N1 = N->getOperand(1);
10443   EVT VT = N->getValueType(0);
10444   EVT EVT = cast<VTSDNode>(N1)->getVT();
10445   unsigned VTBits = VT.getScalarSizeInBits();
10446   unsigned EVTBits = EVT.getScalarSizeInBits();
10447
10448   if (N0.isUndef())
10449     return DAG.getUNDEF(VT);
10450
10451   // fold (sext_in_reg c1) -> c1
10452   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
10453     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
10454
10455   // If the input is already sign extended, just drop the extension.
10456   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
10457     return N0;
10458
10459   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
10460   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
10461       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
10462     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10463                        N0.getOperand(0), N1);
10464
10465   // fold (sext_in_reg (sext x)) -> (sext x)
10466   // fold (sext_in_reg (aext x)) -> (sext x)
10467   // if x is small enough or if we know that x has more than 1 sign bit and the
10468   // sign_extend_inreg is extending from one of them.
10469   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
10470     SDValue N00 = N0.getOperand(0);
10471     unsigned N00Bits = N00.getScalarValueSizeInBits();
10472     if ((N00Bits <= EVTBits ||
10473          (N00Bits - DAG.ComputeNumSignBits(N00)) < EVTBits) &&
10474         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10475       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00);
10476   }
10477
10478   // fold (sext_in_reg (*_extend_vector_inreg x)) -> (sext_vector_inreg x)
10479   if ((N0.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG ||
10480        N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ||
10481        N0.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) &&
10482       N0.getOperand(0).getScalarValueSizeInBits() == EVTBits) {
10483     if (!LegalOperations ||
10484         TLI.isOperationLegal(ISD::SIGN_EXTEND_VECTOR_INREG, VT))
10485       return DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, SDLoc(N), VT,
10486                          N0.getOperand(0));
10487   }
10488
10489   // fold (sext_in_reg (zext x)) -> (sext x)
10490   // iff we are extending the source sign bit.
10491   if (N0.getOpcode() == ISD::ZERO_EXTEND) {
10492     SDValue N00 = N0.getOperand(0);
10493     if (N00.getScalarValueSizeInBits() == EVTBits &&
10494         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
10495       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
10496   }
10497
10498   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
10499   if (DAG.MaskedValueIsZero(N0, APInt::getOneBitSet(VTBits, EVTBits - 1)))
10500     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
10501
10502   // fold operands of sext_in_reg based on knowledge that the top bits are not
10503   // demanded.
10504   if (SimplifyDemandedBits(SDValue(N, 0)))
10505     return SDValue(N, 0);
10506
10507   // fold (sext_in_reg (load x)) -> (smaller sextload x)
10508   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
10509   if (SDValue NarrowLoad = ReduceLoadWidth(N))
10510     return NarrowLoad;
10511
10512   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
10513   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
10514   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
10515   if (N0.getOpcode() == ISD::SRL) {
10516     if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
10517       if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
10518         // We can turn this into an SRA iff the input to the SRL is already sign
10519         // extended enough.
10520         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
10521         if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
10522           return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
10523                              N0.getOperand(1));
10524       }
10525   }
10526
10527   // fold (sext_inreg (extload x)) -> (sextload x)
10528   // If sextload is not supported by target, we can only do the combine when
10529   // load has one use. Doing otherwise can block folding the extload with other
10530   // extends that the target does support.
10531   if (ISD::isEXTLoad(N0.getNode()) &&
10532       ISD::isUNINDEXEDLoad(N0.getNode()) &&
10533       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10534       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
10535         N0.hasOneUse()) ||
10536        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10537     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10538     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10539                                      LN0->getChain(),
10540                                      LN0->getBasePtr(), EVT,
10541                                      LN0->getMemOperand());
10542     CombineTo(N, ExtLoad);
10543     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10544     AddToWorklist(ExtLoad.getNode());
10545     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10546   }
10547   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
10548   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
10549       N0.hasOneUse() &&
10550       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
10551       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
10552        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
10553     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10554     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
10555                                      LN0->getChain(),
10556                                      LN0->getBasePtr(), EVT,
10557                                      LN0->getMemOperand());
10558     CombineTo(N, ExtLoad);
10559     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
10560     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
10561   }
10562
10563   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
10564   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
10565     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
10566                                            N0.getOperand(1), false))
10567       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
10568                          BSwap, N1);
10569   }
10570
10571   return SDValue();
10572 }
10573
10574 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
10575   SDValue N0 = N->getOperand(0);
10576   EVT VT = N->getValueType(0);
10577
10578   if (N0.isUndef())
10579     return DAG.getUNDEF(VT);
10580
10581   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10582     return Res;
10583
10584   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10585     return SDValue(N, 0);
10586
10587   return SDValue();
10588 }
10589
10590 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
10591   SDValue N0 = N->getOperand(0);
10592   EVT VT = N->getValueType(0);
10593
10594   if (N0.isUndef())
10595     return DAG.getUNDEF(VT);
10596
10597   if (SDValue Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes))
10598     return Res;
10599
10600   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
10601     return SDValue(N, 0);
10602
10603   return SDValue();
10604 }
10605
10606 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
10607   SDValue N0 = N->getOperand(0);
10608   EVT VT = N->getValueType(0);
10609   EVT SrcVT = N0.getValueType();
10610   bool isLE = DAG.getDataLayout().isLittleEndian();
10611
10612   // noop truncate
10613   if (SrcVT == VT)
10614     return N0;
10615
10616   // fold (truncate (truncate x)) -> (truncate x)
10617   if (N0.getOpcode() == ISD::TRUNCATE)
10618     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10619
10620   // fold (truncate c1) -> c1
10621   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
10622     SDValue C = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
10623     if (C.getNode() != N)
10624       return C;
10625   }
10626
10627   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
10628   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
10629       N0.getOpcode() == ISD::SIGN_EXTEND ||
10630       N0.getOpcode() == ISD::ANY_EXTEND) {
10631     // if the source is smaller than the dest, we still need an extend.
10632     if (N0.getOperand(0).getValueType().bitsLT(VT))
10633       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
10634     // if the source is larger than the dest, than we just need the truncate.
10635     if (N0.getOperand(0).getValueType().bitsGT(VT))
10636       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
10637     // if the source and dest are the same type, we can drop both the extend
10638     // and the truncate.
10639     return N0.getOperand(0);
10640   }
10641
10642   // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
10643   if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
10644     return SDValue();
10645
10646   // Fold extract-and-trunc into a narrow extract. For example:
10647   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
10648   //   i32 y = TRUNCATE(i64 x)
10649   //        -- becomes --
10650   //   v16i8 b = BITCAST (v2i64 val)
10651   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
10652   //
10653   // Note: We only run this optimization after type legalization (which often
10654   // creates this pattern) and before operation legalization after which
10655   // we need to be more careful about the vector instructions that we generate.
10656   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
10657       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
10658     EVT VecTy = N0.getOperand(0).getValueType();
10659     EVT ExTy = N0.getValueType();
10660     EVT TrTy = N->getValueType(0);
10661
10662     unsigned NumElem = VecTy.getVectorNumElements();
10663     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
10664
10665     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
10666     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
10667
10668     SDValue EltNo = N0->getOperand(1);
10669     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
10670       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
10671       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
10672       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
10673
10674       SDLoc DL(N);
10675       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
10676                          DAG.getBitcast(NVT, N0.getOperand(0)),
10677                          DAG.getConstant(Index, DL, IndexTy));
10678     }
10679   }
10680
10681   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
10682   if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
10683     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
10684         TLI.isTruncateFree(SrcVT, VT)) {
10685       SDLoc SL(N0);
10686       SDValue Cond = N0.getOperand(0);
10687       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10688       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
10689       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
10690     }
10691   }
10692
10693   // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
10694   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
10695       (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
10696       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
10697     SDValue Amt = N0.getOperand(1);
10698     KnownBits Known = DAG.computeKnownBits(Amt);
10699     unsigned Size = VT.getScalarSizeInBits();
10700     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
10701       SDLoc SL(N);
10702       EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
10703
10704       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10705       if (AmtVT != Amt.getValueType()) {
10706         Amt = DAG.getZExtOrTrunc(Amt, SL, AmtVT);
10707         AddToWorklist(Amt.getNode());
10708       }
10709       return DAG.getNode(ISD::SHL, SL, VT, Trunc, Amt);
10710     }
10711   }
10712
10713   // Attempt to pre-truncate BUILD_VECTOR sources.
10714   if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
10715       TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
10716     SDLoc DL(N);
10717     EVT SVT = VT.getScalarType();
10718     SmallVector<SDValue, 8> TruncOps;
10719     for (const SDValue &Op : N0->op_values()) {
10720       SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
10721       TruncOps.push_back(TruncOp);
10722     }
10723     return DAG.getBuildVector(VT, DL, TruncOps);
10724   }
10725
10726   // Fold a series of buildvector, bitcast, and truncate if possible.
10727   // For example fold
10728   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
10729   //   (2xi32 (buildvector x, y)).
10730   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
10731       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
10732       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
10733       N0.getOperand(0).hasOneUse()) {
10734     SDValue BuildVect = N0.getOperand(0);
10735     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
10736     EVT TruncVecEltTy = VT.getVectorElementType();
10737
10738     // Check that the element types match.
10739     if (BuildVectEltTy == TruncVecEltTy) {
10740       // Now we only need to compute the offset of the truncated elements.
10741       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
10742       unsigned TruncVecNumElts = VT.getVectorNumElements();
10743       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
10744
10745       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
10746              "Invalid number of elements");
10747
10748       SmallVector<SDValue, 8> Opnds;
10749       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
10750         Opnds.push_back(BuildVect.getOperand(i));
10751
10752       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
10753     }
10754   }
10755
10756   // See if we can simplify the input to this truncate through knowledge that
10757   // only the low bits are being used.
10758   // For example "trunc (or (shl x, 8), y)" // -> trunc y
10759   // Currently we only perform this optimization on scalars because vectors
10760   // may have different active low bits.
10761   if (!VT.isVector()) {
10762     APInt Mask =
10763         APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits());
10764     if (SDValue Shorter = DAG.GetDemandedBits(N0, Mask))
10765       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
10766   }
10767
10768   // fold (truncate (load x)) -> (smaller load x)
10769   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
10770   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
10771     if (SDValue Reduced = ReduceLoadWidth(N))
10772       return Reduced;
10773
10774     // Handle the case where the load remains an extending load even
10775     // after truncation.
10776     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
10777       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
10778       if (!LN0->isVolatile() &&
10779           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
10780         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
10781                                          VT, LN0->getChain(), LN0->getBasePtr(),
10782                                          LN0->getMemoryVT(),
10783                                          LN0->getMemOperand());
10784         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
10785         return NewLoad;
10786       }
10787     }
10788   }
10789
10790   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
10791   // where ... are all 'undef'.
10792   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
10793     SmallVector<EVT, 8> VTs;
10794     SDValue V;
10795     unsigned Idx = 0;
10796     unsigned NumDefs = 0;
10797
10798     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
10799       SDValue X = N0.getOperand(i);
10800       if (!X.isUndef()) {
10801         V = X;
10802         Idx = i;
10803         NumDefs++;
10804       }
10805       // Stop if more than one members are non-undef.
10806       if (NumDefs > 1)
10807         break;
10808       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
10809                                      VT.getVectorElementType(),
10810                                      X.getValueType().getVectorNumElements()));
10811     }
10812
10813     if (NumDefs == 0)
10814       return DAG.getUNDEF(VT);
10815
10816     if (NumDefs == 1) {
10817       assert(V.getNode() && "The single defined operand is empty!");
10818       SmallVector<SDValue, 8> Opnds;
10819       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
10820         if (i != Idx) {
10821           Opnds.push_back(DAG.getUNDEF(VTs[i]));
10822           continue;
10823         }
10824         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
10825         AddToWorklist(NV.getNode());
10826         Opnds.push_back(NV);
10827       }
10828       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
10829     }
10830   }
10831
10832   // Fold truncate of a bitcast of a vector to an extract of the low vector
10833   // element.
10834   //
10835   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, idx
10836   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
10837     SDValue VecSrc = N0.getOperand(0);
10838     EVT SrcVT = VecSrc.getValueType();
10839     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
10840         (!LegalOperations ||
10841          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
10842       SDLoc SL(N);
10843
10844       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
10845       unsigned Idx = isLE ? 0 : SrcVT.getVectorNumElements() - 1;
10846       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
10847                          VecSrc, DAG.getConstant(Idx, SL, IdxVT));
10848     }
10849   }
10850
10851   // Simplify the operands using demanded-bits information.
10852   if (!VT.isVector() &&
10853       SimplifyDemandedBits(SDValue(N, 0)))
10854     return SDValue(N, 0);
10855
10856   // (trunc adde(X, Y, Carry)) -> (adde trunc(X), trunc(Y), Carry)
10857   // (trunc addcarry(X, Y, Carry)) -> (addcarry trunc(X), trunc(Y), Carry)
10858   // When the adde's carry is not used.
10859   if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
10860       N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
10861       // We only do for addcarry before legalize operation
10862       ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
10863        TLI.isOperationLegal(N0.getOpcode(), VT))) {
10864     SDLoc SL(N);
10865     auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
10866     auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
10867     auto VTs = DAG.getVTList(VT, N0->getValueType(1));
10868     return DAG.getNode(N0.getOpcode(), SL, VTs, X, Y, N0.getOperand(2));
10869   }
10870
10871   // fold (truncate (extract_subvector(ext x))) ->
10872   //      (extract_subvector x)
10873   // TODO: This can be generalized to cover cases where the truncate and extract
10874   // do not fully cancel each other out.
10875   if (!LegalTypes && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) {
10876     SDValue N00 = N0.getOperand(0);
10877     if (N00.getOpcode() == ISD::SIGN_EXTEND ||
10878         N00.getOpcode() == ISD::ZERO_EXTEND ||
10879         N00.getOpcode() == ISD::ANY_EXTEND) {
10880       if (N00.getOperand(0)->getValueType(0).getVectorElementType() ==
10881           VT.getVectorElementType())
10882         return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N0->getOperand(0)), VT,
10883                            N00.getOperand(0), N0.getOperand(1));
10884     }
10885   }
10886
10887   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
10888     return NewVSel;
10889
10890   // Narrow a suitable binary operation with a non-opaque constant operand by
10891   // moving it ahead of the truncate. This is limited to pre-legalization
10892   // because targets may prefer a wider type during later combines and invert
10893   // this transform.
10894   switch (N0.getOpcode()) {
10895   case ISD::ADD:
10896   case ISD::SUB:
10897   case ISD::MUL:
10898   case ISD::AND:
10899   case ISD::OR:
10900   case ISD::XOR:
10901     if (!LegalOperations && N0.hasOneUse() &&
10902         (isConstantOrConstantVector(N0.getOperand(0), true) ||
10903          isConstantOrConstantVector(N0.getOperand(1), true))) {
10904       // TODO: We already restricted this to pre-legalization, but for vectors
10905       // we are extra cautious to not create an unsupported operation.
10906       // Target-specific changes are likely needed to avoid regressions here.
10907       if (VT.isScalarInteger() || TLI.isOperationLegal(N0.getOpcode(), VT)) {
10908         SDLoc DL(N);
10909         SDValue NarrowL = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(0));
10910         SDValue NarrowR = DAG.getNode(ISD::TRUNCATE, DL, VT, N0.getOperand(1));
10911         return DAG.getNode(N0.getOpcode(), DL, VT, NarrowL, NarrowR);
10912       }
10913     }
10914   }
10915
10916   return SDValue();
10917 }
10918
10919 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
10920   SDValue Elt = N->getOperand(i);
10921   if (Elt.getOpcode() != ISD::MERGE_VALUES)
10922     return Elt.getNode();
10923   return Elt.getOperand(Elt.getResNo()).getNode();
10924 }
10925
10926 /// build_pair (load, load) -> load
10927 /// if load locations are consecutive.
10928 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
10929   assert(N->getOpcode() == ISD::BUILD_PAIR);
10930
10931   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
10932   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
10933
10934   // A BUILD_PAIR is always having the least significant part in elt 0 and the
10935   // most significant part in elt 1. So when combining into one large load, we
10936   // need to consider the endianness.
10937   if (DAG.getDataLayout().isBigEndian())
10938     std::swap(LD1, LD2);
10939
10940   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
10941       LD1->getAddressSpace() != LD2->getAddressSpace())
10942     return SDValue();
10943   EVT LD1VT = LD1->getValueType(0);
10944   unsigned LD1Bytes = LD1VT.getStoreSize();
10945   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
10946       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
10947     unsigned Align = LD1->getAlignment();
10948     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
10949         VT.getTypeForEVT(*DAG.getContext()));
10950
10951     if (NewAlign <= Align &&
10952         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
10953       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(),
10954                          LD1->getPointerInfo(), Align);
10955   }
10956
10957   return SDValue();
10958 }
10959
10960 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
10961   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
10962   // and Lo parts; on big-endian machines it doesn't.
10963   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
10964 }
10965
10966 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
10967                                     const TargetLowering &TLI) {
10968   // If this is not a bitcast to an FP type or if the target doesn't have
10969   // IEEE754-compliant FP logic, we're done.
10970   EVT VT = N->getValueType(0);
10971   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
10972     return SDValue();
10973
10974   // TODO: Handle cases where the integer constant is a different scalar
10975   // bitwidth to the FP.
10976   SDValue N0 = N->getOperand(0);
10977   EVT SourceVT = N0.getValueType();
10978   if (VT.getScalarSizeInBits() != SourceVT.getScalarSizeInBits())
10979     return SDValue();
10980
10981   unsigned FPOpcode;
10982   APInt SignMask;
10983   switch (N0.getOpcode()) {
10984   case ISD::AND:
10985     FPOpcode = ISD::FABS;
10986     SignMask = ~APInt::getSignMask(SourceVT.getScalarSizeInBits());
10987     break;
10988   case ISD::XOR:
10989     FPOpcode = ISD::FNEG;
10990     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10991     break;
10992   case ISD::OR:
10993     FPOpcode = ISD::FABS;
10994     SignMask = APInt::getSignMask(SourceVT.getScalarSizeInBits());
10995     break;
10996   default:
10997     return SDValue();
10998   }
10999
11000   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
11001   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
11002   // Fold (bitcast int (or (bitcast fp X to int), 0x8000...) to fp) ->
11003   //   fneg (fabs X)
11004   SDValue LogicOp0 = N0.getOperand(0);
11005   ConstantSDNode *LogicOp1 = isConstOrConstSplat(N0.getOperand(1), true);
11006   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
11007       LogicOp0.getOpcode() == ISD::BITCAST &&
11008       LogicOp0.getOperand(0).getValueType() == VT) {
11009     SDValue FPOp = DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0.getOperand(0));
11010     NumFPLogicOpsConv++;
11011     if (N0.getOpcode() == ISD::OR)
11012       return DAG.getNode(ISD::FNEG, SDLoc(N), VT, FPOp);
11013     return FPOp;
11014   }
11015
11016   return SDValue();
11017 }
11018
11019 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
11020   SDValue N0 = N->getOperand(0);
11021   EVT VT = N->getValueType(0);
11022
11023   if (N0.isUndef())
11024     return DAG.getUNDEF(VT);
11025
11026   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
11027   // Only do this before legalize types, unless both types are integer and the
11028   // scalar type is legal. Only do this before legalize ops, since the target
11029   // maybe depending on the bitcast.
11030   // First check to see if this is all constant.
11031   // TODO: Support FP bitcasts after legalize types.
11032   if (VT.isVector() &&
11033       (!LegalTypes ||
11034        (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
11035         TLI.isTypeLegal(VT.getVectorElementType()))) &&
11036       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
11037       cast<BuildVectorSDNode>(N0)->isConstant())
11038     return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
11039                                              VT.getVectorElementType());
11040
11041   // If the input is a constant, let getNode fold it.
11042   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
11043     // If we can't allow illegal operations, we need to check that this is just
11044     // a fp -> int or int -> conversion and that the resulting operation will
11045     // be legal.
11046     if (!LegalOperations ||
11047         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
11048          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
11049         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
11050          TLI.isOperationLegal(ISD::Constant, VT))) {
11051       SDValue C = DAG.getBitcast(VT, N0);
11052       if (C.getNode() != N)
11053         return C;
11054     }
11055   }
11056
11057   // (conv (conv x, t1), t2) -> (conv x, t2)
11058   if (N0.getOpcode() == ISD::BITCAST)
11059     return DAG.getBitcast(VT, N0.getOperand(0));
11060
11061   // fold (conv (load x)) -> (load (conv*)x)
11062   // If the resultant load doesn't need a higher alignment than the original!
11063   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
11064       // Do not remove the cast if the types differ in endian layout.
11065       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
11066           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
11067       // If the load is volatile, we only want to change the load type if the
11068       // resulting load is legal. Otherwise we might increase the number of
11069       // memory accesses. We don't care if the original type was legal or not
11070       // as we assume software couldn't rely on the number of accesses of an
11071       // illegal type.
11072       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
11073        TLI.isOperationLegal(ISD::LOAD, VT))) {
11074     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
11075
11076     if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
11077                                     *LN0->getMemOperand())) {
11078       SDValue Load =
11079           DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
11080                       LN0->getPointerInfo(), LN0->getAlignment(),
11081                       LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
11082       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
11083       return Load;
11084     }
11085   }
11086
11087   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
11088     return V;
11089
11090   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
11091   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
11092   //
11093   // For ppc_fp128:
11094   // fold (bitcast (fneg x)) ->
11095   //     flipbit = signbit
11096   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11097   //
11098   // fold (bitcast (fabs x)) ->
11099   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
11100   //     (xor (bitcast x) (build_pair flipbit, flipbit))
11101   // This often reduces constant pool loads.
11102   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
11103        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
11104       N0.getNode()->hasOneUse() && VT.isInteger() &&
11105       !VT.isVector() && !N0.getValueType().isVector()) {
11106     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
11107     AddToWorklist(NewConv.getNode());
11108
11109     SDLoc DL(N);
11110     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11111       assert(VT.getSizeInBits() == 128);
11112       SDValue SignBit = DAG.getConstant(
11113           APInt::getSignMask(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
11114       SDValue FlipBit;
11115       if (N0.getOpcode() == ISD::FNEG) {
11116         FlipBit = SignBit;
11117         AddToWorklist(FlipBit.getNode());
11118       } else {
11119         assert(N0.getOpcode() == ISD::FABS);
11120         SDValue Hi =
11121             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
11122                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11123                                               SDLoc(NewConv)));
11124         AddToWorklist(Hi.getNode());
11125         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
11126         AddToWorklist(FlipBit.getNode());
11127       }
11128       SDValue FlipBits =
11129           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11130       AddToWorklist(FlipBits.getNode());
11131       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
11132     }
11133     APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11134     if (N0.getOpcode() == ISD::FNEG)
11135       return DAG.getNode(ISD::XOR, DL, VT,
11136                          NewConv, DAG.getConstant(SignBit, DL, VT));
11137     assert(N0.getOpcode() == ISD::FABS);
11138     return DAG.getNode(ISD::AND, DL, VT,
11139                        NewConv, DAG.getConstant(~SignBit, DL, VT));
11140   }
11141
11142   // fold (bitconvert (fcopysign cst, x)) ->
11143   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
11144   // Note that we don't handle (copysign x, cst) because this can always be
11145   // folded to an fneg or fabs.
11146   //
11147   // For ppc_fp128:
11148   // fold (bitcast (fcopysign cst, x)) ->
11149   //     flipbit = (and (extract_element
11150   //                     (xor (bitcast cst), (bitcast x)), 0),
11151   //                    signbit)
11152   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
11153   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
11154       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
11155       VT.isInteger() && !VT.isVector()) {
11156     unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
11157     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
11158     if (isTypeLegal(IntXVT)) {
11159       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
11160       AddToWorklist(X.getNode());
11161
11162       // If X has a different width than the result/lhs, sext it or truncate it.
11163       unsigned VTWidth = VT.getSizeInBits();
11164       if (OrigXWidth < VTWidth) {
11165         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
11166         AddToWorklist(X.getNode());
11167       } else if (OrigXWidth > VTWidth) {
11168         // To get the sign bit in the right place, we have to shift it right
11169         // before truncating.
11170         SDLoc DL(X);
11171         X = DAG.getNode(ISD::SRL, DL,
11172                         X.getValueType(), X,
11173                         DAG.getConstant(OrigXWidth-VTWidth, DL,
11174                                         X.getValueType()));
11175         AddToWorklist(X.getNode());
11176         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
11177         AddToWorklist(X.getNode());
11178       }
11179
11180       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
11181         APInt SignBit = APInt::getSignMask(VT.getSizeInBits() / 2);
11182         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11183         AddToWorklist(Cst.getNode());
11184         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
11185         AddToWorklist(X.getNode());
11186         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
11187         AddToWorklist(XorResult.getNode());
11188         SDValue XorResult64 = DAG.getNode(
11189             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
11190             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
11191                                   SDLoc(XorResult)));
11192         AddToWorklist(XorResult64.getNode());
11193         SDValue FlipBit =
11194             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
11195                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
11196         AddToWorklist(FlipBit.getNode());
11197         SDValue FlipBits =
11198             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
11199         AddToWorklist(FlipBits.getNode());
11200         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
11201       }
11202       APInt SignBit = APInt::getSignMask(VT.getSizeInBits());
11203       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
11204                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
11205       AddToWorklist(X.getNode());
11206
11207       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
11208       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
11209                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
11210       AddToWorklist(Cst.getNode());
11211
11212       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
11213     }
11214   }
11215
11216   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
11217   if (N0.getOpcode() == ISD::BUILD_PAIR)
11218     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
11219       return CombineLD;
11220
11221   // Remove double bitcasts from shuffles - this is often a legacy of
11222   // XformToShuffleWithZero being used to combine bitmaskings (of
11223   // float vectors bitcast to integer vectors) into shuffles.
11224   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
11225   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
11226       N0->getOpcode() == ISD::VECTOR_SHUFFLE && N0.hasOneUse() &&
11227       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
11228       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
11229     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
11230
11231     // If operands are a bitcast, peek through if it casts the original VT.
11232     // If operands are a constant, just bitcast back to original VT.
11233     auto PeekThroughBitcast = [&](SDValue Op) {
11234       if (Op.getOpcode() == ISD::BITCAST &&
11235           Op.getOperand(0).getValueType() == VT)
11236         return SDValue(Op.getOperand(0));
11237       if (Op.isUndef() || ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
11238           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
11239         return DAG.getBitcast(VT, Op);
11240       return SDValue();
11241     };
11242
11243     // FIXME: If either input vector is bitcast, try to convert the shuffle to
11244     // the result type of this bitcast. This would eliminate at least one
11245     // bitcast. See the transform in InstCombine.
11246     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
11247     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
11248     if (!(SV0 && SV1))
11249       return SDValue();
11250
11251     int MaskScale =
11252         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
11253     SmallVector<int, 8> NewMask;
11254     for (int M : SVN->getMask())
11255       for (int i = 0; i != MaskScale; ++i)
11256         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
11257
11258     SDValue LegalShuffle =
11259         TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
11260     if (LegalShuffle)
11261       return LegalShuffle;
11262   }
11263
11264   return SDValue();
11265 }
11266
11267 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
11268   EVT VT = N->getValueType(0);
11269   return CombineConsecutiveLoads(N, VT);
11270 }
11271
11272 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
11273 /// operands. DstEltVT indicates the destination element value type.
11274 SDValue DAGCombiner::
11275 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
11276   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
11277
11278   // If this is already the right type, we're done.
11279   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
11280
11281   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
11282   unsigned DstBitSize = DstEltVT.getSizeInBits();
11283
11284   // If this is a conversion of N elements of one type to N elements of another
11285   // type, convert each element.  This handles FP<->INT cases.
11286   if (SrcBitSize == DstBitSize) {
11287     SmallVector<SDValue, 8> Ops;
11288     for (SDValue Op : BV->op_values()) {
11289       // If the vector element type is not legal, the BUILD_VECTOR operands
11290       // are promoted and implicitly truncated.  Make that explicit here.
11291       if (Op.getValueType() != SrcEltVT)
11292         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
11293       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
11294       AddToWorklist(Ops.back().getNode());
11295     }
11296     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11297                               BV->getValueType(0).getVectorNumElements());
11298     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
11299   }
11300
11301   // Otherwise, we're growing or shrinking the elements.  To avoid having to
11302   // handle annoying details of growing/shrinking FP values, we convert them to
11303   // int first.
11304   if (SrcEltVT.isFloatingPoint()) {
11305     // Convert the input float vector to a int vector where the elements are the
11306     // same sizes.
11307     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
11308     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
11309     SrcEltVT = IntVT;
11310   }
11311
11312   // Now we know the input is an integer vector.  If the output is a FP type,
11313   // convert to integer first, then to FP of the right size.
11314   if (DstEltVT.isFloatingPoint()) {
11315     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
11316     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
11317
11318     // Next, convert to FP elements of the same size.
11319     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
11320   }
11321
11322   SDLoc DL(BV);
11323
11324   // Okay, we know the src/dst types are both integers of differing types.
11325   // Handling growing first.
11326   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
11327   if (SrcBitSize < DstBitSize) {
11328     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
11329
11330     SmallVector<SDValue, 8> Ops;
11331     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
11332          i += NumInputsPerOutput) {
11333       bool isLE = DAG.getDataLayout().isLittleEndian();
11334       APInt NewBits = APInt(DstBitSize, 0);
11335       bool EltIsUndef = true;
11336       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
11337         // Shift the previously computed bits over.
11338         NewBits <<= SrcBitSize;
11339         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
11340         if (Op.isUndef()) continue;
11341         EltIsUndef = false;
11342
11343         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
11344                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
11345       }
11346
11347       if (EltIsUndef)
11348         Ops.push_back(DAG.getUNDEF(DstEltVT));
11349       else
11350         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
11351     }
11352
11353     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
11354     return DAG.getBuildVector(VT, DL, Ops);
11355   }
11356
11357   // Finally, this must be the case where we are shrinking elements: each input
11358   // turns into multiple outputs.
11359   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
11360   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
11361                             NumOutputsPerInput*BV->getNumOperands());
11362   SmallVector<SDValue, 8> Ops;
11363
11364   for (const SDValue &Op : BV->op_values()) {
11365     if (Op.isUndef()) {
11366       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
11367       continue;
11368     }
11369
11370     APInt OpVal = cast<ConstantSDNode>(Op)->
11371                   getAPIntValue().zextOrTrunc(SrcBitSize);
11372
11373     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
11374       APInt ThisVal = OpVal.trunc(DstBitSize);
11375       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
11376       OpVal.lshrInPlace(DstBitSize);
11377     }
11378
11379     // For big endian targets, swap the order of the pieces of each element.
11380     if (DAG.getDataLayout().isBigEndian())
11381       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
11382   }
11383
11384   return DAG.getBuildVector(VT, DL, Ops);
11385 }
11386
11387 static bool isContractable(SDNode *N) {
11388   SDNodeFlags F = N->getFlags();
11389   return F.hasAllowContract() || F.hasAllowReassociation();
11390 }
11391
11392 /// Try to perform FMA combining on a given FADD node.
11393 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
11394   SDValue N0 = N->getOperand(0);
11395   SDValue N1 = N->getOperand(1);
11396   EVT VT = N->getValueType(0);
11397   SDLoc SL(N);
11398
11399   const TargetOptions &Options = DAG.getTarget().Options;
11400
11401   // Floating-point multiply-add with intermediate rounding.
11402   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11403
11404   // Floating-point multiply-add without intermediate rounding.
11405   bool HasFMA =
11406       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11407       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11408
11409   // No valid opcode, do not combine.
11410   if (!HasFMAD && !HasFMA)
11411     return SDValue();
11412
11413   SDNodeFlags Flags = N->getFlags();
11414   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11415   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11416                               CanFuse || HasFMAD);
11417   // If the addition is not contractable, do not combine.
11418   if (!AllowFusionGlobally && !isContractable(N))
11419     return SDValue();
11420
11421   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11422   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11423     return SDValue();
11424
11425   // Always prefer FMAD to FMA for precision.
11426   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11427   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11428
11429   // Is the node an FMUL and contractable either due to global flags or
11430   // SDNodeFlags.
11431   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11432     if (N.getOpcode() != ISD::FMUL)
11433       return false;
11434     return AllowFusionGlobally || isContractable(N.getNode());
11435   };
11436   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
11437   // prefer to fold the multiply with fewer uses.
11438   if (Aggressive && isContractableFMUL(N0) && isContractableFMUL(N1)) {
11439     if (N0.getNode()->use_size() > N1.getNode()->use_size())
11440       std::swap(N0, N1);
11441   }
11442
11443   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
11444   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11445     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11446                        N0.getOperand(0), N0.getOperand(1), N1, Flags);
11447   }
11448
11449   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
11450   // Note: Commutes FADD operands.
11451   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11452     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11453                        N1.getOperand(0), N1.getOperand(1), N0, Flags);
11454   }
11455
11456   // Look through FP_EXTEND nodes to do more combining.
11457
11458   // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
11459   if (N0.getOpcode() == ISD::FP_EXTEND) {
11460     SDValue N00 = N0.getOperand(0);
11461     if (isContractableFMUL(N00) &&
11462         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11463       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11464                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11465                                      N00.getOperand(0)),
11466                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11467                                      N00.getOperand(1)), N1, Flags);
11468     }
11469   }
11470
11471   // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
11472   // Note: Commutes FADD operands.
11473   if (N1.getOpcode() == ISD::FP_EXTEND) {
11474     SDValue N10 = N1.getOperand(0);
11475     if (isContractableFMUL(N10) &&
11476         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11477       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11478                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11479                                      N10.getOperand(0)),
11480                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11481                                      N10.getOperand(1)), N0, Flags);
11482     }
11483   }
11484
11485   // More folding opportunities when target permits.
11486   if (Aggressive) {
11487     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
11488     if (CanFuse &&
11489         N0.getOpcode() == PreferredFusedOpcode &&
11490         N0.getOperand(2).getOpcode() == ISD::FMUL &&
11491         N0->hasOneUse() && N0.getOperand(2)->hasOneUse()) {
11492       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11493                          N0.getOperand(0), N0.getOperand(1),
11494                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11495                                      N0.getOperand(2).getOperand(0),
11496                                      N0.getOperand(2).getOperand(1),
11497                                      N1, Flags), Flags);
11498     }
11499
11500     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
11501     if (CanFuse &&
11502         N1->getOpcode() == PreferredFusedOpcode &&
11503         N1.getOperand(2).getOpcode() == ISD::FMUL &&
11504         N1->hasOneUse() && N1.getOperand(2)->hasOneUse()) {
11505       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11506                          N1.getOperand(0), N1.getOperand(1),
11507                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11508                                      N1.getOperand(2).getOperand(0),
11509                                      N1.getOperand(2).getOperand(1),
11510                                      N0, Flags), Flags);
11511     }
11512
11513
11514     // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
11515     //   -> (fma x, y, (fma (fpext u), (fpext v), z))
11516     auto FoldFAddFMAFPExtFMul = [&] (
11517       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11518       SDNodeFlags Flags) {
11519       return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
11520                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11521                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11522                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11523                                      Z, Flags), Flags);
11524     };
11525     if (N0.getOpcode() == PreferredFusedOpcode) {
11526       SDValue N02 = N0.getOperand(2);
11527       if (N02.getOpcode() == ISD::FP_EXTEND) {
11528         SDValue N020 = N02.getOperand(0);
11529         if (isContractableFMUL(N020) &&
11530             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11531           return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
11532                                       N020.getOperand(0), N020.getOperand(1),
11533                                       N1, Flags);
11534         }
11535       }
11536     }
11537
11538     // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
11539     //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
11540     // FIXME: This turns two single-precision and one double-precision
11541     // operation into two double-precision operations, which might not be
11542     // interesting for all targets, especially GPUs.
11543     auto FoldFAddFPExtFMAFMul = [&] (
11544       SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z,
11545       SDNodeFlags Flags) {
11546       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11547                          DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
11548                          DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
11549                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11550                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
11551                                      DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
11552                                      Z, Flags), Flags);
11553     };
11554     if (N0.getOpcode() == ISD::FP_EXTEND) {
11555       SDValue N00 = N0.getOperand(0);
11556       if (N00.getOpcode() == PreferredFusedOpcode) {
11557         SDValue N002 = N00.getOperand(2);
11558         if (isContractableFMUL(N002) &&
11559             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11560           return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
11561                                       N002.getOperand(0), N002.getOperand(1),
11562                                       N1, Flags);
11563         }
11564       }
11565     }
11566
11567     // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
11568     //   -> (fma y, z, (fma (fpext u), (fpext v), x))
11569     if (N1.getOpcode() == PreferredFusedOpcode) {
11570       SDValue N12 = N1.getOperand(2);
11571       if (N12.getOpcode() == ISD::FP_EXTEND) {
11572         SDValue N120 = N12.getOperand(0);
11573         if (isContractableFMUL(N120) &&
11574             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11575           return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
11576                                       N120.getOperand(0), N120.getOperand(1),
11577                                       N0, Flags);
11578         }
11579       }
11580     }
11581
11582     // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
11583     //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
11584     // FIXME: This turns two single-precision and one double-precision
11585     // operation into two double-precision operations, which might not be
11586     // interesting for all targets, especially GPUs.
11587     if (N1.getOpcode() == ISD::FP_EXTEND) {
11588       SDValue N10 = N1.getOperand(0);
11589       if (N10.getOpcode() == PreferredFusedOpcode) {
11590         SDValue N102 = N10.getOperand(2);
11591         if (isContractableFMUL(N102) &&
11592             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11593           return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
11594                                       N102.getOperand(0), N102.getOperand(1),
11595                                       N0, Flags);
11596         }
11597       }
11598     }
11599   }
11600
11601   return SDValue();
11602 }
11603
11604 /// Try to perform FMA combining on a given FSUB node.
11605 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
11606   SDValue N0 = N->getOperand(0);
11607   SDValue N1 = N->getOperand(1);
11608   EVT VT = N->getValueType(0);
11609   SDLoc SL(N);
11610
11611   const TargetOptions &Options = DAG.getTarget().Options;
11612   // Floating-point multiply-add with intermediate rounding.
11613   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11614
11615   // Floating-point multiply-add without intermediate rounding.
11616   bool HasFMA =
11617       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11618       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11619
11620   // No valid opcode, do not combine.
11621   if (!HasFMAD && !HasFMA)
11622     return SDValue();
11623
11624   const SDNodeFlags Flags = N->getFlags();
11625   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
11626   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
11627                               CanFuse || HasFMAD);
11628
11629   // If the subtraction is not contractable, do not combine.
11630   if (!AllowFusionGlobally && !isContractable(N))
11631     return SDValue();
11632
11633   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
11634   if (STI && STI->generateFMAsInMachineCombiner(OptLevel))
11635     return SDValue();
11636
11637   // Always prefer FMAD to FMA for precision.
11638   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11639   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11640
11641   // Is the node an FMUL and contractable either due to global flags or
11642   // SDNodeFlags.
11643   auto isContractableFMUL = [AllowFusionGlobally](SDValue N) {
11644     if (N.getOpcode() != ISD::FMUL)
11645       return false;
11646     return AllowFusionGlobally || isContractable(N.getNode());
11647   };
11648
11649   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
11650   if (isContractableFMUL(N0) && (Aggressive || N0->hasOneUse())) {
11651     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11652                        N0.getOperand(0), N0.getOperand(1),
11653                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11654   }
11655
11656   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
11657   // Note: Commutes FSUB operands.
11658   if (isContractableFMUL(N1) && (Aggressive || N1->hasOneUse())) {
11659     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11660                        DAG.getNode(ISD::FNEG, SL, VT,
11661                                    N1.getOperand(0)),
11662                        N1.getOperand(1), N0, Flags);
11663   }
11664
11665   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
11666   if (N0.getOpcode() == ISD::FNEG && isContractableFMUL(N0.getOperand(0)) &&
11667       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
11668     SDValue N00 = N0.getOperand(0).getOperand(0);
11669     SDValue N01 = N0.getOperand(0).getOperand(1);
11670     return DAG.getNode(PreferredFusedOpcode, SL, VT,
11671                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
11672                        DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11673   }
11674
11675   // Look through FP_EXTEND nodes to do more combining.
11676
11677   // fold (fsub (fpext (fmul x, y)), z)
11678   //   -> (fma (fpext x), (fpext y), (fneg z))
11679   if (N0.getOpcode() == ISD::FP_EXTEND) {
11680     SDValue N00 = N0.getOperand(0);
11681     if (isContractableFMUL(N00) &&
11682         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11683       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11684                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11685                                      N00.getOperand(0)),
11686                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11687                                      N00.getOperand(1)),
11688                          DAG.getNode(ISD::FNEG, SL, VT, N1), Flags);
11689     }
11690   }
11691
11692   // fold (fsub x, (fpext (fmul y, z)))
11693   //   -> (fma (fneg (fpext y)), (fpext z), x)
11694   // Note: Commutes FSUB operands.
11695   if (N1.getOpcode() == ISD::FP_EXTEND) {
11696     SDValue N10 = N1.getOperand(0);
11697     if (isContractableFMUL(N10) &&
11698         TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N10.getValueType())) {
11699       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11700                          DAG.getNode(ISD::FNEG, SL, VT,
11701                                      DAG.getNode(ISD::FP_EXTEND, SL, VT,
11702                                                  N10.getOperand(0))),
11703                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11704                                      N10.getOperand(1)),
11705                          N0, Flags);
11706     }
11707   }
11708
11709   // fold (fsub (fpext (fneg (fmul, x, y))), z)
11710   //   -> (fneg (fma (fpext x), (fpext y), z))
11711   // Note: This could be removed with appropriate canonicalization of the
11712   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11713   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11714   // from implementing the canonicalization in visitFSUB.
11715   if (N0.getOpcode() == ISD::FP_EXTEND) {
11716     SDValue N00 = N0.getOperand(0);
11717     if (N00.getOpcode() == ISD::FNEG) {
11718       SDValue N000 = N00.getOperand(0);
11719       if (isContractableFMUL(N000) &&
11720           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11721         return DAG.getNode(ISD::FNEG, SL, VT,
11722                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11723                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11724                                                    N000.getOperand(0)),
11725                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11726                                                    N000.getOperand(1)),
11727                                        N1, Flags));
11728       }
11729     }
11730   }
11731
11732   // fold (fsub (fneg (fpext (fmul, x, y))), z)
11733   //   -> (fneg (fma (fpext x)), (fpext y), z)
11734   // Note: This could be removed with appropriate canonicalization of the
11735   // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
11736   // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
11737   // from implementing the canonicalization in visitFSUB.
11738   if (N0.getOpcode() == ISD::FNEG) {
11739     SDValue N00 = N0.getOperand(0);
11740     if (N00.getOpcode() == ISD::FP_EXTEND) {
11741       SDValue N000 = N00.getOperand(0);
11742       if (isContractableFMUL(N000) &&
11743           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N000.getValueType())) {
11744         return DAG.getNode(ISD::FNEG, SL, VT,
11745                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11746                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11747                                                    N000.getOperand(0)),
11748                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11749                                                    N000.getOperand(1)),
11750                                        N1, Flags));
11751       }
11752     }
11753   }
11754
11755   // More folding opportunities when target permits.
11756   if (Aggressive) {
11757     // fold (fsub (fma x, y, (fmul u, v)), z)
11758     //   -> (fma x, y (fma u, v, (fneg z)))
11759     if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
11760         isContractableFMUL(N0.getOperand(2)) && N0->hasOneUse() &&
11761         N0.getOperand(2)->hasOneUse()) {
11762       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11763                          N0.getOperand(0), N0.getOperand(1),
11764                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11765                                      N0.getOperand(2).getOperand(0),
11766                                      N0.getOperand(2).getOperand(1),
11767                                      DAG.getNode(ISD::FNEG, SL, VT,
11768                                                  N1), Flags), Flags);
11769     }
11770
11771     // fold (fsub x, (fma y, z, (fmul u, v)))
11772     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
11773     if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
11774         isContractableFMUL(N1.getOperand(2))) {
11775       SDValue N20 = N1.getOperand(2).getOperand(0);
11776       SDValue N21 = N1.getOperand(2).getOperand(1);
11777       return DAG.getNode(PreferredFusedOpcode, SL, VT,
11778                          DAG.getNode(ISD::FNEG, SL, VT,
11779                                      N1.getOperand(0)),
11780                          N1.getOperand(1),
11781                          DAG.getNode(PreferredFusedOpcode, SL, VT,
11782                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
11783                                      N21, N0, Flags), Flags);
11784     }
11785
11786
11787     // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
11788     //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
11789     if (N0.getOpcode() == PreferredFusedOpcode) {
11790       SDValue N02 = N0.getOperand(2);
11791       if (N02.getOpcode() == ISD::FP_EXTEND) {
11792         SDValue N020 = N02.getOperand(0);
11793         if (isContractableFMUL(N020) &&
11794             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N020.getValueType())) {
11795           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11796                              N0.getOperand(0), N0.getOperand(1),
11797                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11798                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11799                                                      N020.getOperand(0)),
11800                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11801                                                      N020.getOperand(1)),
11802                                          DAG.getNode(ISD::FNEG, SL, VT,
11803                                                      N1), Flags), Flags);
11804         }
11805       }
11806     }
11807
11808     // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
11809     //   -> (fma (fpext x), (fpext y),
11810     //           (fma (fpext u), (fpext v), (fneg z)))
11811     // FIXME: This turns two single-precision and one double-precision
11812     // operation into two double-precision operations, which might not be
11813     // interesting for all targets, especially GPUs.
11814     if (N0.getOpcode() == ISD::FP_EXTEND) {
11815       SDValue N00 = N0.getOperand(0);
11816       if (N00.getOpcode() == PreferredFusedOpcode) {
11817         SDValue N002 = N00.getOperand(2);
11818         if (isContractableFMUL(N002) &&
11819             TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N00.getValueType())) {
11820           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11821                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11822                                          N00.getOperand(0)),
11823                              DAG.getNode(ISD::FP_EXTEND, SL, VT,
11824                                          N00.getOperand(1)),
11825                              DAG.getNode(PreferredFusedOpcode, SL, VT,
11826                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11827                                                      N002.getOperand(0)),
11828                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
11829                                                      N002.getOperand(1)),
11830                                          DAG.getNode(ISD::FNEG, SL, VT,
11831                                                      N1), Flags), Flags);
11832         }
11833       }
11834     }
11835
11836     // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
11837     //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
11838     if (N1.getOpcode() == PreferredFusedOpcode &&
11839         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
11840       SDValue N120 = N1.getOperand(2).getOperand(0);
11841       if (isContractableFMUL(N120) &&
11842           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, N120.getValueType())) {
11843         SDValue N1200 = N120.getOperand(0);
11844         SDValue N1201 = N120.getOperand(1);
11845         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11846                            DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
11847                            N1.getOperand(1),
11848                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11849                                        DAG.getNode(ISD::FNEG, SL, VT,
11850                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11851                                                                VT, N1200)),
11852                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11853                                                    N1201),
11854                                        N0, Flags), Flags);
11855       }
11856     }
11857
11858     // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
11859     //   -> (fma (fneg (fpext y)), (fpext z),
11860     //           (fma (fneg (fpext u)), (fpext v), x))
11861     // FIXME: This turns two single-precision and one double-precision
11862     // operation into two double-precision operations, which might not be
11863     // interesting for all targets, especially GPUs.
11864     if (N1.getOpcode() == ISD::FP_EXTEND &&
11865         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
11866       SDValue CvtSrc = N1.getOperand(0);
11867       SDValue N100 = CvtSrc.getOperand(0);
11868       SDValue N101 = CvtSrc.getOperand(1);
11869       SDValue N102 = CvtSrc.getOperand(2);
11870       if (isContractableFMUL(N102) &&
11871           TLI.isFPExtFoldable(PreferredFusedOpcode, VT, CvtSrc.getValueType())) {
11872         SDValue N1020 = N102.getOperand(0);
11873         SDValue N1021 = N102.getOperand(1);
11874         return DAG.getNode(PreferredFusedOpcode, SL, VT,
11875                            DAG.getNode(ISD::FNEG, SL, VT,
11876                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11877                                                    N100)),
11878                            DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
11879                            DAG.getNode(PreferredFusedOpcode, SL, VT,
11880                                        DAG.getNode(ISD::FNEG, SL, VT,
11881                                                    DAG.getNode(ISD::FP_EXTEND, SL,
11882                                                                VT, N1020)),
11883                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
11884                                                    N1021),
11885                                        N0, Flags), Flags);
11886       }
11887     }
11888   }
11889
11890   return SDValue();
11891 }
11892
11893 /// Try to perform FMA combining on a given FMUL node based on the distributive
11894 /// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
11895 /// subtraction instead of addition).
11896 SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
11897   SDValue N0 = N->getOperand(0);
11898   SDValue N1 = N->getOperand(1);
11899   EVT VT = N->getValueType(0);
11900   SDLoc SL(N);
11901   const SDNodeFlags Flags = N->getFlags();
11902
11903   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
11904
11905   const TargetOptions &Options = DAG.getTarget().Options;
11906
11907   // The transforms below are incorrect when x == 0 and y == inf, because the
11908   // intermediate multiplication produces a nan.
11909   if (!Options.NoInfsFPMath)
11910     return SDValue();
11911
11912   // Floating-point multiply-add without intermediate rounding.
11913   bool HasFMA =
11914       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
11915       TLI.isFMAFasterThanFMulAndFAdd(VT) &&
11916       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
11917
11918   // Floating-point multiply-add with intermediate rounding. This can result
11919   // in a less precise result due to the changed rounding order.
11920   bool HasFMAD = Options.UnsafeFPMath &&
11921                  (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
11922
11923   // No valid opcode, do not combine.
11924   if (!HasFMAD && !HasFMA)
11925     return SDValue();
11926
11927   // Always prefer FMAD to FMA for precision.
11928   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
11929   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
11930
11931   // fold (fmul (fadd x0, +1.0), y) -> (fma x0, y, y)
11932   // fold (fmul (fadd x0, -1.0), y) -> (fma x0, y, (fneg y))
11933   auto FuseFADD = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11934     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
11935       if (auto *C = isConstOrConstSplatFP(X.getOperand(1), true)) {
11936         if (C->isExactlyValue(+1.0))
11937           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11938                              Y, Flags);
11939         if (C->isExactlyValue(-1.0))
11940           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11941                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11942       }
11943     }
11944     return SDValue();
11945   };
11946
11947   if (SDValue FMA = FuseFADD(N0, N1, Flags))
11948     return FMA;
11949   if (SDValue FMA = FuseFADD(N1, N0, Flags))
11950     return FMA;
11951
11952   // fold (fmul (fsub +1.0, x1), y) -> (fma (fneg x1), y, y)
11953   // fold (fmul (fsub -1.0, x1), y) -> (fma (fneg x1), y, (fneg y))
11954   // fold (fmul (fsub x0, +1.0), y) -> (fma x0, y, (fneg y))
11955   // fold (fmul (fsub x0, -1.0), y) -> (fma x0, y, y)
11956   auto FuseFSUB = [&](SDValue X, SDValue Y, const SDNodeFlags Flags) {
11957     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
11958       if (auto *C0 = isConstOrConstSplatFP(X.getOperand(0), true)) {
11959         if (C0->isExactlyValue(+1.0))
11960           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11961                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11962                              Y, Flags);
11963         if (C0->isExactlyValue(-1.0))
11964           return DAG.getNode(PreferredFusedOpcode, SL, VT,
11965                              DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
11966                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11967       }
11968       if (auto *C1 = isConstOrConstSplatFP(X.getOperand(1), true)) {
11969         if (C1->isExactlyValue(+1.0))
11970           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11971                              DAG.getNode(ISD::FNEG, SL, VT, Y), Flags);
11972         if (C1->isExactlyValue(-1.0))
11973           return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
11974                              Y, Flags);
11975       }
11976     }
11977     return SDValue();
11978   };
11979
11980   if (SDValue FMA = FuseFSUB(N0, N1, Flags))
11981     return FMA;
11982   if (SDValue FMA = FuseFSUB(N1, N0, Flags))
11983     return FMA;
11984
11985   return SDValue();
11986 }
11987
11988 SDValue DAGCombiner::visitFADD(SDNode *N) {
11989   SDValue N0 = N->getOperand(0);
11990   SDValue N1 = N->getOperand(1);
11991   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
11992   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
11993   EVT VT = N->getValueType(0);
11994   SDLoc DL(N);
11995   const TargetOptions &Options = DAG.getTarget().Options;
11996   const SDNodeFlags Flags = N->getFlags();
11997
11998   // fold vector ops
11999   if (VT.isVector())
12000     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12001       return FoldedVOp;
12002
12003   // fold (fadd c1, c2) -> c1 + c2
12004   if (N0CFP && N1CFP)
12005     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
12006
12007   // canonicalize constant to RHS
12008   if (N0CFP && !N1CFP)
12009     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
12010
12011   // N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
12012   ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
12013   if (N1C && N1C->isZero())
12014     if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
12015       return N0;
12016
12017   if (SDValue NewSel = foldBinOpIntoSelect(N))
12018     return NewSel;
12019
12020   // fold (fadd A, (fneg B)) -> (fsub A, B)
12021   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12022       isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
12023     return DAG.getNode(ISD::FSUB, DL, VT, N0,
12024                        GetNegatedExpression(N1, DAG, LegalOperations,
12025                                             ForCodeSize), Flags);
12026
12027   // fold (fadd (fneg A), B) -> (fsub B, A)
12028   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
12029       isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
12030     return DAG.getNode(ISD::FSUB, DL, VT, N1,
12031                        GetNegatedExpression(N0, DAG, LegalOperations,
12032                                             ForCodeSize), Flags);
12033
12034   auto isFMulNegTwo = [](SDValue FMul) {
12035     if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
12036       return false;
12037     auto *C = isConstOrConstSplatFP(FMul.getOperand(1), true);
12038     return C && C->isExactlyValue(-2.0);
12039   };
12040
12041   // fadd (fmul B, -2.0), A --> fsub A, (fadd B, B)
12042   if (isFMulNegTwo(N0)) {
12043     SDValue B = N0.getOperand(0);
12044     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12045     return DAG.getNode(ISD::FSUB, DL, VT, N1, Add, Flags);
12046   }
12047   // fadd A, (fmul B, -2.0) --> fsub A, (fadd B, B)
12048   if (isFMulNegTwo(N1)) {
12049     SDValue B = N1.getOperand(0);
12050     SDValue Add = DAG.getNode(ISD::FADD, DL, VT, B, B, Flags);
12051     return DAG.getNode(ISD::FSUB, DL, VT, N0, Add, Flags);
12052   }
12053
12054   // No FP constant should be created after legalization as Instruction
12055   // Selection pass has a hard time dealing with FP constants.
12056   bool AllowNewConst = (Level < AfterLegalizeDAG);
12057
12058   // If nnan is enabled, fold lots of things.
12059   if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
12060     // If allowed, fold (fadd (fneg x), x) -> 0.0
12061     if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
12062       return DAG.getConstantFP(0.0, DL, VT);
12063
12064     // If allowed, fold (fadd x, (fneg x)) -> 0.0
12065     if (N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
12066       return DAG.getConstantFP(0.0, DL, VT);
12067   }
12068
12069   // If 'unsafe math' or reassoc and nsz, fold lots of things.
12070   // TODO: break out portions of the transformations below for which Unsafe is
12071   //       considered and which do not require both nsz and reassoc
12072   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12073        (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
12074       AllowNewConst) {
12075     // fadd (fadd x, c1), c2 -> fadd x, c1 + c2
12076     if (N1CFP && N0.getOpcode() == ISD::FADD &&
12077         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12078       SDValue NewC = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1, Flags);
12079       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0), NewC, Flags);
12080     }
12081
12082     // We can fold chains of FADD's of the same value into multiplications.
12083     // This transform is not safe in general because we are reducing the number
12084     // of rounding steps.
12085     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
12086       if (N0.getOpcode() == ISD::FMUL) {
12087         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12088         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
12089
12090         // (fadd (fmul x, c), x) -> (fmul x, c+1)
12091         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
12092           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12093                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12094           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
12095         }
12096
12097         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
12098         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
12099             N1.getOperand(0) == N1.getOperand(1) &&
12100             N0.getOperand(0) == N1.getOperand(0)) {
12101           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
12102                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12103           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
12104         }
12105       }
12106
12107       if (N1.getOpcode() == ISD::FMUL) {
12108         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12109         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
12110
12111         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
12112         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
12113           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12114                                        DAG.getConstantFP(1.0, DL, VT), Flags);
12115           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
12116         }
12117
12118         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
12119         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
12120             N0.getOperand(0) == N0.getOperand(1) &&
12121             N1.getOperand(0) == N0.getOperand(0)) {
12122           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
12123                                        DAG.getConstantFP(2.0, DL, VT), Flags);
12124           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
12125         }
12126       }
12127
12128       if (N0.getOpcode() == ISD::FADD) {
12129         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
12130         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
12131         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
12132             (N0.getOperand(0) == N1)) {
12133           return DAG.getNode(ISD::FMUL, DL, VT,
12134                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
12135         }
12136       }
12137
12138       if (N1.getOpcode() == ISD::FADD) {
12139         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
12140         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
12141         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
12142             N1.getOperand(0) == N0) {
12143           return DAG.getNode(ISD::FMUL, DL, VT,
12144                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
12145         }
12146       }
12147
12148       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
12149       if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
12150           N0.getOperand(0) == N0.getOperand(1) &&
12151           N1.getOperand(0) == N1.getOperand(1) &&
12152           N0.getOperand(0) == N1.getOperand(0)) {
12153         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
12154                            DAG.getConstantFP(4.0, DL, VT), Flags);
12155       }
12156     }
12157   } // enable-unsafe-fp-math
12158
12159   // FADD -> FMA combines:
12160   if (SDValue Fused = visitFADDForFMACombine(N)) {
12161     AddToWorklist(Fused.getNode());
12162     return Fused;
12163   }
12164   return SDValue();
12165 }
12166
12167 SDValue DAGCombiner::visitFSUB(SDNode *N) {
12168   SDValue N0 = N->getOperand(0);
12169   SDValue N1 = N->getOperand(1);
12170   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12171   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12172   EVT VT = N->getValueType(0);
12173   SDLoc DL(N);
12174   const TargetOptions &Options = DAG.getTarget().Options;
12175   const SDNodeFlags Flags = N->getFlags();
12176
12177   // fold vector ops
12178   if (VT.isVector())
12179     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12180       return FoldedVOp;
12181
12182   // fold (fsub c1, c2) -> c1-c2
12183   if (N0CFP && N1CFP)
12184     return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
12185
12186   if (SDValue NewSel = foldBinOpIntoSelect(N))
12187     return NewSel;
12188
12189   // (fsub A, 0) -> A
12190   if (N1CFP && N1CFP->isZero()) {
12191     if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
12192         Flags.hasNoSignedZeros()) {
12193       return N0;
12194     }
12195   }
12196
12197   if (N0 == N1) {
12198     // (fsub x, x) -> 0.0
12199     if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
12200       return DAG.getConstantFP(0.0f, DL, VT);
12201   }
12202
12203   // (fsub -0.0, N1) -> -N1
12204   // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
12205   //       FSUB does not specify the sign bit of a NaN. Also note that for
12206   //       the same reason, the inverse transform is not safe, unless fast math
12207   //       flags are in play.
12208   if (N0CFP && N0CFP->isZero()) {
12209     if (N0CFP->isNegative() ||
12210         (Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
12211       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
12212         return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12213       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12214         return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
12215     }
12216   }
12217
12218   if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
12219       (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
12220       && N1.getOpcode() == ISD::FADD) {
12221     // X - (X + Y) -> -Y
12222     if (N0 == N1->getOperand(0))
12223       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
12224     // X - (Y + X) -> -Y
12225     if (N0 == N1->getOperand(1))
12226       return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(0), Flags);
12227   }
12228
12229   // fold (fsub A, (fneg B)) -> (fadd A, B)
12230   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
12231     return DAG.getNode(ISD::FADD, DL, VT, N0,
12232                        GetNegatedExpression(N1, DAG, LegalOperations,
12233                                             ForCodeSize), Flags);
12234
12235   // FSUB -> FMA combines:
12236   if (SDValue Fused = visitFSUBForFMACombine(N)) {
12237     AddToWorklist(Fused.getNode());
12238     return Fused;
12239   }
12240
12241   return SDValue();
12242 }
12243
12244 /// Return true if both inputs are at least as cheap in negated form and at
12245 /// least one input is strictly cheaper in negated form.
12246 bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
12247   const TargetOptions &Options = DAG.getTarget().Options;
12248   if (char LHSNeg = isNegatibleForFree(X, LegalOperations, TLI, &Options,
12249                                    ForCodeSize))
12250     if (char RHSNeg = isNegatibleForFree(Y, LegalOperations, TLI, &Options,
12251                                          ForCodeSize))
12252       // Both negated operands are at least as cheap as their counterparts.
12253       // Check to see if at least one is cheaper negated.
12254       if (LHSNeg == 2 || RHSNeg == 2)
12255         return true;
12256
12257   return false;
12258 }
12259
12260 SDValue DAGCombiner::visitFMUL(SDNode *N) {
12261   SDValue N0 = N->getOperand(0);
12262   SDValue N1 = N->getOperand(1);
12263   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, true);
12264   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1, true);
12265   EVT VT = N->getValueType(0);
12266   SDLoc DL(N);
12267   const TargetOptions &Options = DAG.getTarget().Options;
12268   const SDNodeFlags Flags = N->getFlags();
12269
12270   // fold vector ops
12271   if (VT.isVector()) {
12272     // This just handles C1 * C2 for vectors. Other vector folds are below.
12273     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12274       return FoldedVOp;
12275   }
12276
12277   // fold (fmul c1, c2) -> c1*c2
12278   if (N0CFP && N1CFP)
12279     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
12280
12281   // canonicalize constant to RHS
12282   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12283      !isConstantFPBuildVectorOrConstantFP(N1))
12284     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
12285
12286   if (SDValue NewSel = foldBinOpIntoSelect(N))
12287     return NewSel;
12288
12289   if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
12290       (Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
12291     // fold (fmul A, 0) -> 0
12292     if (N1CFP && N1CFP->isZero())
12293       return N1;
12294   }
12295
12296   if (Options.UnsafeFPMath || Flags.hasAllowReassociation()) {
12297     // fmul (fmul X, C1), C2 -> fmul X, C1 * C2
12298     if (isConstantFPBuildVectorOrConstantFP(N1) &&
12299         N0.getOpcode() == ISD::FMUL) {
12300       SDValue N00 = N0.getOperand(0);
12301       SDValue N01 = N0.getOperand(1);
12302       // Avoid an infinite loop by making sure that N00 is not a constant
12303       // (the inner multiply has not been constant folded yet).
12304       if (isConstantFPBuildVectorOrConstantFP(N01) &&
12305           !isConstantFPBuildVectorOrConstantFP(N00)) {
12306         SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
12307         return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
12308       }
12309     }
12310
12311     // Match a special-case: we convert X * 2.0 into fadd.
12312     // fmul (fadd X, X), C -> fmul X, 2.0 * C
12313     if (N0.getOpcode() == ISD::FADD && N0.hasOneUse() &&
12314         N0.getOperand(0) == N0.getOperand(1)) {
12315       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
12316       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
12317       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
12318     }
12319   }
12320
12321   // fold (fmul X, 2.0) -> (fadd X, X)
12322   if (N1CFP && N1CFP->isExactlyValue(+2.0))
12323     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
12324
12325   // fold (fmul X, -1.0) -> (fneg X)
12326   if (N1CFP && N1CFP->isExactlyValue(-1.0))
12327     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12328       return DAG.getNode(ISD::FNEG, DL, VT, N0);
12329
12330   // -N0 * -N1 --> N0 * N1
12331   if (isCheaperToUseNegatedFPOps(N0, N1)) {
12332     SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12333     SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12334     return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
12335   }
12336
12337   // fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
12338   // fold (fmul X, (select (fcmp X > 0.0), 1.0, -1.0)) -> (fabs X)
12339   if (Flags.hasNoNaNs() && Flags.hasNoSignedZeros() &&
12340       (N0.getOpcode() == ISD::SELECT || N1.getOpcode() == ISD::SELECT) &&
12341       TLI.isOperationLegal(ISD::FABS, VT)) {
12342     SDValue Select = N0, X = N1;
12343     if (Select.getOpcode() != ISD::SELECT)
12344       std::swap(Select, X);
12345
12346     SDValue Cond = Select.getOperand(0);
12347     auto TrueOpnd  = dyn_cast<ConstantFPSDNode>(Select.getOperand(1));
12348     auto FalseOpnd = dyn_cast<ConstantFPSDNode>(Select.getOperand(2));
12349
12350     if (TrueOpnd && FalseOpnd &&
12351         Cond.getOpcode() == ISD::SETCC && Cond.getOperand(0) == X &&
12352         isa<ConstantFPSDNode>(Cond.getOperand(1)) &&
12353         cast<ConstantFPSDNode>(Cond.getOperand(1))->isExactlyValue(0.0)) {
12354       ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
12355       switch (CC) {
12356       default: break;
12357       case ISD::SETOLT:
12358       case ISD::SETULT:
12359       case ISD::SETOLE:
12360       case ISD::SETULE:
12361       case ISD::SETLT:
12362       case ISD::SETLE:
12363         std::swap(TrueOpnd, FalseOpnd);
12364         LLVM_FALLTHROUGH;
12365       case ISD::SETOGT:
12366       case ISD::SETUGT:
12367       case ISD::SETOGE:
12368       case ISD::SETUGE:
12369       case ISD::SETGT:
12370       case ISD::SETGE:
12371         if (TrueOpnd->isExactlyValue(-1.0) && FalseOpnd->isExactlyValue(1.0) &&
12372             TLI.isOperationLegal(ISD::FNEG, VT))
12373           return DAG.getNode(ISD::FNEG, DL, VT,
12374                    DAG.getNode(ISD::FABS, DL, VT, X));
12375         if (TrueOpnd->isExactlyValue(1.0) && FalseOpnd->isExactlyValue(-1.0))
12376           return DAG.getNode(ISD::FABS, DL, VT, X);
12377
12378         break;
12379       }
12380     }
12381   }
12382
12383   // FMUL -> FMA combines:
12384   if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
12385     AddToWorklist(Fused.getNode());
12386     return Fused;
12387   }
12388
12389   return SDValue();
12390 }
12391
12392 SDValue DAGCombiner::visitFMA(SDNode *N) {
12393   SDValue N0 = N->getOperand(0);
12394   SDValue N1 = N->getOperand(1);
12395   SDValue N2 = N->getOperand(2);
12396   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12397   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12398   EVT VT = N->getValueType(0);
12399   SDLoc DL(N);
12400   const TargetOptions &Options = DAG.getTarget().Options;
12401
12402   // FMA nodes have flags that propagate to the created nodes.
12403   const SDNodeFlags Flags = N->getFlags();
12404   bool UnsafeFPMath = Options.UnsafeFPMath || isContractable(N);
12405
12406   // Constant fold FMA.
12407   if (isa<ConstantFPSDNode>(N0) &&
12408       isa<ConstantFPSDNode>(N1) &&
12409       isa<ConstantFPSDNode>(N2)) {
12410     return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
12411   }
12412
12413   // (-N0 * -N1) + N2 --> (N0 * N1) + N2
12414   if (isCheaperToUseNegatedFPOps(N0, N1)) {
12415     SDValue NegN0 = GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
12416     SDValue NegN1 = GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
12417     return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
12418   }
12419
12420   if (UnsafeFPMath) {
12421     if (N0CFP && N0CFP->isZero())
12422       return N2;
12423     if (N1CFP && N1CFP->isZero())
12424       return N2;
12425   }
12426   // TODO: The FMA node should have flags that propagate to these nodes.
12427   if (N0CFP && N0CFP->isExactlyValue(1.0))
12428     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
12429   if (N1CFP && N1CFP->isExactlyValue(1.0))
12430     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
12431
12432   // Canonicalize (fma c, x, y) -> (fma x, c, y)
12433   if (isConstantFPBuildVectorOrConstantFP(N0) &&
12434      !isConstantFPBuildVectorOrConstantFP(N1))
12435     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
12436
12437   if (UnsafeFPMath) {
12438     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
12439     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
12440         isConstantFPBuildVectorOrConstantFP(N1) &&
12441         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
12442       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12443                          DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
12444                                      Flags), Flags);
12445     }
12446
12447     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
12448     if (N0.getOpcode() == ISD::FMUL &&
12449         isConstantFPBuildVectorOrConstantFP(N1) &&
12450         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
12451       return DAG.getNode(ISD::FMA, DL, VT,
12452                          N0.getOperand(0),
12453                          DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
12454                                      Flags),
12455                          N2);
12456     }
12457   }
12458
12459   // (fma x, 1, y) -> (fadd x, y)
12460   // (fma x, -1, y) -> (fadd (fneg x), y)
12461   if (N1CFP) {
12462     if (N1CFP->isExactlyValue(1.0))
12463       // TODO: The FMA node should have flags that propagate to this node.
12464       return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
12465
12466     if (N1CFP->isExactlyValue(-1.0) &&
12467         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
12468       SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
12469       AddToWorklist(RHSNeg.getNode());
12470       // TODO: The FMA node should have flags that propagate to this node.
12471       return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
12472     }
12473
12474     // fma (fneg x), K, y -> fma x -K, y
12475     if (N0.getOpcode() == ISD::FNEG &&
12476         (TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12477          (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
12478                                               ForCodeSize)))) {
12479       return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
12480                          DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
12481     }
12482   }
12483
12484   if (UnsafeFPMath) {
12485     // (fma x, c, x) -> (fmul x, (c+1))
12486     if (N1CFP && N0 == N2) {
12487       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12488                          DAG.getNode(ISD::FADD, DL, VT, N1,
12489                                      DAG.getConstantFP(1.0, DL, VT), Flags),
12490                          Flags);
12491     }
12492
12493     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
12494     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
12495       return DAG.getNode(ISD::FMUL, DL, VT, N0,
12496                          DAG.getNode(ISD::FADD, DL, VT, N1,
12497                                      DAG.getConstantFP(-1.0, DL, VT), Flags),
12498                          Flags);
12499     }
12500   }
12501
12502   return SDValue();
12503 }
12504
12505 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
12506 // reciprocal.
12507 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
12508 // Notice that this is not always beneficial. One reason is different targets
12509 // may have different costs for FDIV and FMUL, so sometimes the cost of two
12510 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
12511 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
12512 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
12513   // TODO: Limit this transform based on optsize/minsize - it always creates at
12514   //       least 1 extra instruction. But the perf win may be substantial enough
12515   //       that only minsize should restrict this.
12516   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
12517   const SDNodeFlags Flags = N->getFlags();
12518   if (!UnsafeMath && !Flags.hasAllowReciprocal())
12519     return SDValue();
12520
12521   // Skip if current node is a reciprocal/fneg-reciprocal.
12522   SDValue N0 = N->getOperand(0);
12523   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
12524   if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
12525     return SDValue();
12526
12527   // Exit early if the target does not want this transform or if there can't
12528   // possibly be enough uses of the divisor to make the transform worthwhile.
12529   SDValue N1 = N->getOperand(1);
12530   unsigned MinUses = TLI.combineRepeatedFPDivisors();
12531
12532   // For splat vectors, scale the number of uses by the splat factor. If we can
12533   // convert the division into a scalar op, that will likely be much faster.
12534   unsigned NumElts = 1;
12535   EVT VT = N->getValueType(0);
12536   if (VT.isVector() && DAG.isSplatValue(N1))
12537     NumElts = VT.getVectorNumElements();
12538
12539   if (!MinUses || (N1->use_size() * NumElts) < MinUses)
12540     return SDValue();
12541
12542   // Find all FDIV users of the same divisor.
12543   // Use a set because duplicates may be present in the user list.
12544   SetVector<SDNode *> Users;
12545   for (auto *U : N1->uses()) {
12546     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
12547       // This division is eligible for optimization only if global unsafe math
12548       // is enabled or if this division allows reciprocal formation.
12549       if (UnsafeMath || U->getFlags().hasAllowReciprocal())
12550         Users.insert(U);
12551     }
12552   }
12553
12554   // Now that we have the actual number of divisor uses, make sure it meets
12555   // the minimum threshold specified by the target.
12556   if ((Users.size() * NumElts) < MinUses)
12557     return SDValue();
12558
12559   SDLoc DL(N);
12560   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
12561   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
12562
12563   // Dividend / Divisor -> Dividend * Reciprocal
12564   for (auto *U : Users) {
12565     SDValue Dividend = U->getOperand(0);
12566     if (Dividend != FPOne) {
12567       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
12568                                     Reciprocal, Flags);
12569       CombineTo(U, NewNode);
12570     } else if (U != Reciprocal.getNode()) {
12571       // In the absence of fast-math-flags, this user node is always the
12572       // same node as Reciprocal, but with FMF they may be different nodes.
12573       CombineTo(U, Reciprocal);
12574     }
12575   }
12576   return SDValue(N, 0);  // N was replaced.
12577 }
12578
12579 SDValue DAGCombiner::visitFDIV(SDNode *N) {
12580   SDValue N0 = N->getOperand(0);
12581   SDValue N1 = N->getOperand(1);
12582   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12583   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12584   EVT VT = N->getValueType(0);
12585   SDLoc DL(N);
12586   const TargetOptions &Options = DAG.getTarget().Options;
12587   SDNodeFlags Flags = N->getFlags();
12588
12589   // fold vector ops
12590   if (VT.isVector())
12591     if (SDValue FoldedVOp = SimplifyVBinOp(N))
12592       return FoldedVOp;
12593
12594   // fold (fdiv c1, c2) -> c1/c2
12595   if (N0CFP && N1CFP)
12596     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
12597
12598   if (SDValue NewSel = foldBinOpIntoSelect(N))
12599     return NewSel;
12600
12601   if (SDValue V = combineRepeatedFPDivisors(N))
12602     return V;
12603
12604   if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
12605     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
12606     if (N1CFP) {
12607       // Compute the reciprocal 1.0 / c2.
12608       const APFloat &N1APF = N1CFP->getValueAPF();
12609       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
12610       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
12611       // Only do the transform if the reciprocal is a legal fp immediate that
12612       // isn't too nasty (eg NaN, denormal, ...).
12613       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
12614           (!LegalOperations ||
12615            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
12616            // backend)... we should handle this gracefully after Legalize.
12617            // TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
12618            TLI.isOperationLegal(ISD::ConstantFP, VT) ||
12619            TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
12620         return DAG.getNode(ISD::FMUL, DL, VT, N0,
12621                            DAG.getConstantFP(Recip, DL, VT), Flags);
12622     }
12623
12624     // If this FDIV is part of a reciprocal square root, it may be folded
12625     // into a target-specific square root estimate instruction.
12626     if (N1.getOpcode() == ISD::FSQRT) {
12627       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
12628         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12629     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
12630                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12631       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12632                                           Flags)) {
12633         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
12634         AddToWorklist(RV.getNode());
12635         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12636       }
12637     } else if (N1.getOpcode() == ISD::FP_ROUND &&
12638                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12639       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
12640                                           Flags)) {
12641         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
12642         AddToWorklist(RV.getNode());
12643         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12644       }
12645     } else if (N1.getOpcode() == ISD::FMUL) {
12646       // Look through an FMUL. Even though this won't remove the FDIV directly,
12647       // it's still worthwhile to get rid of the FSQRT if possible.
12648       SDValue SqrtOp;
12649       SDValue OtherOp;
12650       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
12651         SqrtOp = N1.getOperand(0);
12652         OtherOp = N1.getOperand(1);
12653       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
12654         SqrtOp = N1.getOperand(1);
12655         OtherOp = N1.getOperand(0);
12656       }
12657       if (SqrtOp.getNode()) {
12658         // We found a FSQRT, so try to make this fold:
12659         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
12660         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
12661           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
12662           AddToWorklist(RV.getNode());
12663           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12664         }
12665       }
12666     }
12667
12668     // Fold into a reciprocal estimate and multiply instead of a real divide.
12669     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
12670       AddToWorklist(RV.getNode());
12671       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
12672     }
12673   }
12674
12675   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
12676   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
12677                                        ForCodeSize)) {
12678     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
12679                                          ForCodeSize)) {
12680       // Both can be negated for free, check to see if at least one is cheaper
12681       // negated.
12682       if (LHSNeg == 2 || RHSNeg == 2)
12683         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
12684                            GetNegatedExpression(N0, DAG, LegalOperations,
12685                                                 ForCodeSize),
12686                            GetNegatedExpression(N1, DAG, LegalOperations,
12687                                                 ForCodeSize),
12688                            Flags);
12689     }
12690   }
12691
12692   return SDValue();
12693 }
12694
12695 SDValue DAGCombiner::visitFREM(SDNode *N) {
12696   SDValue N0 = N->getOperand(0);
12697   SDValue N1 = N->getOperand(1);
12698   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
12699   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
12700   EVT VT = N->getValueType(0);
12701
12702   // fold (frem c1, c2) -> fmod(c1,c2)
12703   if (N0CFP && N1CFP)
12704     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1, N->getFlags());
12705
12706   if (SDValue NewSel = foldBinOpIntoSelect(N))
12707     return NewSel;
12708
12709   return SDValue();
12710 }
12711
12712 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
12713   SDNodeFlags Flags = N->getFlags();
12714   if (!DAG.getTarget().Options.UnsafeFPMath &&
12715       !Flags.hasApproximateFuncs())
12716     return SDValue();
12717
12718   SDValue N0 = N->getOperand(0);
12719   if (TLI.isFsqrtCheap(N0, DAG))
12720     return SDValue();
12721
12722   // FSQRT nodes have flags that propagate to the created nodes.
12723   return buildSqrtEstimate(N0, Flags);
12724 }
12725
12726 /// copysign(x, fp_extend(y)) -> copysign(x, y)
12727 /// copysign(x, fp_round(y)) -> copysign(x, y)
12728 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
12729   SDValue N1 = N->getOperand(1);
12730   if ((N1.getOpcode() == ISD::FP_EXTEND ||
12731        N1.getOpcode() == ISD::FP_ROUND)) {
12732     // Do not optimize out type conversion of f128 type yet.
12733     // For some targets like x86_64, configuration is changed to keep one f128
12734     // value in one SSE register, but instruction selection cannot handle
12735     // FCOPYSIGN on SSE registers yet.
12736     EVT N1VT = N1->getValueType(0);
12737     EVT N1Op0VT = N1->getOperand(0).getValueType();
12738     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
12739   }
12740   return false;
12741 }
12742
12743 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
12744   SDValue N0 = N->getOperand(0);
12745   SDValue N1 = N->getOperand(1);
12746   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
12747   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
12748   EVT VT = N->getValueType(0);
12749
12750   if (N0CFP && N1CFP) // Constant fold
12751     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
12752
12753   if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N->getOperand(1))) {
12754     const APFloat &V = N1C->getValueAPF();
12755     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
12756     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
12757     if (!V.isNegative()) {
12758       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
12759         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12760     } else {
12761       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
12762         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
12763                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
12764     }
12765   }
12766
12767   // copysign(fabs(x), y) -> copysign(x, y)
12768   // copysign(fneg(x), y) -> copysign(x, y)
12769   // copysign(copysign(x,z), y) -> copysign(x, y)
12770   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
12771       N0.getOpcode() == ISD::FCOPYSIGN)
12772     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
12773
12774   // copysign(x, abs(y)) -> abs(x)
12775   if (N1.getOpcode() == ISD::FABS)
12776     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
12777
12778   // copysign(x, copysign(y,z)) -> copysign(x, z)
12779   if (N1.getOpcode() == ISD::FCOPYSIGN)
12780     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
12781
12782   // copysign(x, fp_extend(y)) -> copysign(x, y)
12783   // copysign(x, fp_round(y)) -> copysign(x, y)
12784   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
12785     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
12786
12787   return SDValue();
12788 }
12789
12790 SDValue DAGCombiner::visitFPOW(SDNode *N) {
12791   ConstantFPSDNode *ExponentC = isConstOrConstSplatFP(N->getOperand(1));
12792   if (!ExponentC)
12793     return SDValue();
12794
12795   // Try to convert x ** (1/3) into cube root.
12796   // TODO: Handle the various flavors of long double.
12797   // TODO: Since we're approximating, we don't need an exact 1/3 exponent.
12798   //       Some range near 1/3 should be fine.
12799   EVT VT = N->getValueType(0);
12800   if ((VT == MVT::f32 && ExponentC->getValueAPF().isExactlyValue(1.0f/3.0f)) ||
12801       (VT == MVT::f64 && ExponentC->getValueAPF().isExactlyValue(1.0/3.0))) {
12802     // pow(-0.0, 1/3) = +0.0; cbrt(-0.0) = -0.0.
12803     // pow(-inf, 1/3) = +inf; cbrt(-inf) = -inf.
12804     // pow(-val, 1/3) =  nan; cbrt(-val) = -num.
12805     // For regular numbers, rounding may cause the results to differ.
12806     // Therefore, we require { nsz ninf nnan afn } for this transform.
12807     // TODO: We could select out the special cases if we don't have nsz/ninf.
12808     SDNodeFlags Flags = N->getFlags();
12809     if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() || !Flags.hasNoNaNs() ||
12810         !Flags.hasApproximateFuncs())
12811       return SDValue();
12812
12813     // Do not create a cbrt() libcall if the target does not have it, and do not
12814     // turn a pow that has lowering support into a cbrt() libcall.
12815     if (!DAG.getLibInfo().has(LibFunc_cbrt) ||
12816         (!DAG.getTargetLoweringInfo().isOperationExpand(ISD::FPOW, VT) &&
12817          DAG.getTargetLoweringInfo().isOperationExpand(ISD::FCBRT, VT)))
12818       return SDValue();
12819
12820     return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
12821   }
12822
12823   // Try to convert x ** (1/4) and x ** (3/4) into square roots.
12824   // x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
12825   // TODO: This could be extended (using a target hook) to handle smaller
12826   // power-of-2 fractional exponents.
12827   bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
12828   bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
12829   if (ExponentIs025 || ExponentIs075) {
12830     // pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
12831     // pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) =  NaN.
12832     // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
12833     // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) =  NaN.
12834     // For regular numbers, rounding may cause the results to differ.
12835     // Therefore, we require { nsz ninf afn } for this transform.
12836     // TODO: We could select out the special cases if we don't have nsz/ninf.
12837     SDNodeFlags Flags = N->getFlags();
12838
12839     // We only need no signed zeros for the 0.25 case.
12840     if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
12841         !Flags.hasApproximateFuncs())
12842       return SDValue();
12843
12844     // Don't double the number of libcalls. We are trying to inline fast code.
12845     if (!DAG.getTargetLoweringInfo().isOperationLegalOrCustom(ISD::FSQRT, VT))
12846       return SDValue();
12847
12848     // Assume that libcalls are the smallest code.
12849     // TODO: This restriction should probably be lifted for vectors.
12850     if (DAG.getMachineFunction().getFunction().hasOptSize())
12851       return SDValue();
12852
12853     // pow(X, 0.25) --> sqrt(sqrt(X))
12854     SDLoc DL(N);
12855     SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
12856     SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
12857     if (ExponentIs025)
12858       return SqrtSqrt;
12859     // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
12860     return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
12861   }
12862
12863   return SDValue();
12864 }
12865
12866 static SDValue foldFPToIntToFP(SDNode *N, SelectionDAG &DAG,
12867                                const TargetLowering &TLI) {
12868   // This optimization is guarded by a function attribute because it may produce
12869   // unexpected results. Ie, programs may be relying on the platform-specific
12870   // undefined behavior when the float-to-int conversion overflows.
12871   const Function &F = DAG.getMachineFunction().getFunction();
12872   Attribute StrictOverflow = F.getFnAttribute("strict-float-cast-overflow");
12873   if (StrictOverflow.getValueAsString().equals("false"))
12874     return SDValue();
12875
12876   // We only do this if the target has legal ftrunc. Otherwise, we'd likely be
12877   // replacing casts with a libcall. We also must be allowed to ignore -0.0
12878   // because FTRUNC will return -0.0 for (-1.0, -0.0), but using integer
12879   // conversions would return +0.0.
12880   // FIXME: We should be able to use node-level FMF here.
12881   // TODO: If strict math, should we use FABS (+ range check for signed cast)?
12882   EVT VT = N->getValueType(0);
12883   if (!TLI.isOperationLegal(ISD::FTRUNC, VT) ||
12884       !DAG.getTarget().Options.NoSignedZerosFPMath)
12885     return SDValue();
12886
12887   // fptosi/fptoui round towards zero, so converting from FP to integer and
12888   // back is the same as an 'ftrunc': [us]itofp (fpto[us]i X) --> ftrunc X
12889   SDValue N0 = N->getOperand(0);
12890   if (N->getOpcode() == ISD::SINT_TO_FP && N0.getOpcode() == ISD::FP_TO_SINT &&
12891       N0.getOperand(0).getValueType() == VT)
12892     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12893
12894   if (N->getOpcode() == ISD::UINT_TO_FP && N0.getOpcode() == ISD::FP_TO_UINT &&
12895       N0.getOperand(0).getValueType() == VT)
12896     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0.getOperand(0));
12897
12898   return SDValue();
12899 }
12900
12901 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
12902   SDValue N0 = N->getOperand(0);
12903   EVT VT = N->getValueType(0);
12904   EVT OpVT = N0.getValueType();
12905
12906   // [us]itofp(undef) = 0, because the result value is bounded.
12907   if (N0.isUndef())
12908     return DAG.getConstantFP(0.0, SDLoc(N), VT);
12909
12910   // fold (sint_to_fp c1) -> c1fp
12911   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12912       // ...but only if the target supports immediate floating-point values
12913       (!LegalOperations ||
12914        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12915     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12916
12917   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
12918   // but UINT_TO_FP is legal on this target, try to convert.
12919   if (!hasOperation(ISD::SINT_TO_FP, OpVT) &&
12920       hasOperation(ISD::UINT_TO_FP, OpVT)) {
12921     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
12922     if (DAG.SignBitIsZero(N0))
12923       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12924   }
12925
12926   // The next optimizations are desirable only if SELECT_CC can be lowered.
12927   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12928     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12929     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
12930         !VT.isVector() &&
12931         (!LegalOperations ||
12932          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12933       SDLoc DL(N);
12934       SDValue Ops[] =
12935         { N0.getOperand(0), N0.getOperand(1),
12936           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12937           N0.getOperand(2) };
12938       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12939     }
12940
12941     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
12942     //      (select_cc x, y, 1.0, 0.0,, cc)
12943     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
12944         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
12945         (!LegalOperations ||
12946          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12947       SDLoc DL(N);
12948       SDValue Ops[] =
12949         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
12950           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12951           N0.getOperand(0).getOperand(2) };
12952       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12953     }
12954   }
12955
12956   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
12957     return FTrunc;
12958
12959   return SDValue();
12960 }
12961
12962 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
12963   SDValue N0 = N->getOperand(0);
12964   EVT VT = N->getValueType(0);
12965   EVT OpVT = N0.getValueType();
12966
12967   // [us]itofp(undef) = 0, because the result value is bounded.
12968   if (N0.isUndef())
12969     return DAG.getConstantFP(0.0, SDLoc(N), VT);
12970
12971   // fold (uint_to_fp c1) -> c1fp
12972   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
12973       // ...but only if the target supports immediate floating-point values
12974       (!LegalOperations ||
12975        TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT)))
12976     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
12977
12978   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
12979   // but SINT_TO_FP is legal on this target, try to convert.
12980   if (!hasOperation(ISD::UINT_TO_FP, OpVT) &&
12981       hasOperation(ISD::SINT_TO_FP, OpVT)) {
12982     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
12983     if (DAG.SignBitIsZero(N0))
12984       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
12985   }
12986
12987   // The next optimizations are desirable only if SELECT_CC can be lowered.
12988   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
12989     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
12990     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
12991         (!LegalOperations ||
12992          TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT))) {
12993       SDLoc DL(N);
12994       SDValue Ops[] =
12995         { N0.getOperand(0), N0.getOperand(1),
12996           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
12997           N0.getOperand(2) };
12998       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
12999     }
13000   }
13001
13002   if (SDValue FTrunc = foldFPToIntToFP(N, DAG, TLI))
13003     return FTrunc;
13004
13005   return SDValue();
13006 }
13007
13008 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
13009 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
13010   SDValue N0 = N->getOperand(0);
13011   EVT VT = N->getValueType(0);
13012
13013   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
13014     return SDValue();
13015
13016   SDValue Src = N0.getOperand(0);
13017   EVT SrcVT = Src.getValueType();
13018   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
13019   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
13020
13021   // We can safely assume the conversion won't overflow the output range,
13022   // because (for example) (uint8_t)18293.f is undefined behavior.
13023
13024   // Since we can assume the conversion won't overflow, our decision as to
13025   // whether the input will fit in the float should depend on the minimum
13026   // of the input range and output range.
13027
13028   // This means this is also safe for a signed input and unsigned output, since
13029   // a negative input would lead to undefined behavior.
13030   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
13031   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
13032   unsigned ActualSize = std::min(InputSize, OutputSize);
13033   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
13034
13035   // We can only fold away the float conversion if the input range can be
13036   // represented exactly in the float range.
13037   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
13038     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
13039       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
13040                                                        : ISD::ZERO_EXTEND;
13041       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
13042     }
13043     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
13044       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
13045     return DAG.getBitcast(VT, Src);
13046   }
13047   return SDValue();
13048 }
13049
13050 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
13051   SDValue N0 = N->getOperand(0);
13052   EVT VT = N->getValueType(0);
13053
13054   // fold (fp_to_sint undef) -> undef
13055   if (N0.isUndef())
13056     return DAG.getUNDEF(VT);
13057
13058   // fold (fp_to_sint c1fp) -> c1
13059   if (isConstantFPBuildVectorOrConstantFP(N0))
13060     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
13061
13062   return FoldIntToFPToInt(N, DAG);
13063 }
13064
13065 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
13066   SDValue N0 = N->getOperand(0);
13067   EVT VT = N->getValueType(0);
13068
13069   // fold (fp_to_uint undef) -> undef
13070   if (N0.isUndef())
13071     return DAG.getUNDEF(VT);
13072
13073   // fold (fp_to_uint c1fp) -> c1
13074   if (isConstantFPBuildVectorOrConstantFP(N0))
13075     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
13076
13077   return FoldIntToFPToInt(N, DAG);
13078 }
13079
13080 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
13081   SDValue N0 = N->getOperand(0);
13082   SDValue N1 = N->getOperand(1);
13083   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13084   EVT VT = N->getValueType(0);
13085
13086   // fold (fp_round c1fp) -> c1fp
13087   if (N0CFP)
13088     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
13089
13090   // fold (fp_round (fp_extend x)) -> x
13091   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
13092     return N0.getOperand(0);
13093
13094   // fold (fp_round (fp_round x)) -> (fp_round x)
13095   if (N0.getOpcode() == ISD::FP_ROUND) {
13096     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
13097     const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
13098
13099     // Skip this folding if it results in an fp_round from f80 to f16.
13100     //
13101     // f80 to f16 always generates an expensive (and as yet, unimplemented)
13102     // libcall to __truncxfhf2 instead of selecting native f16 conversion
13103     // instructions from f32 or f64.  Moreover, the first (value-preserving)
13104     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
13105     // x86.
13106     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
13107       return SDValue();
13108
13109     // If the first fp_round isn't a value preserving truncation, it might
13110     // introduce a tie in the second fp_round, that wouldn't occur in the
13111     // single-step fp_round we want to fold to.
13112     // In other words, double rounding isn't the same as rounding.
13113     // Also, this is a value preserving truncation iff both fp_round's are.
13114     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
13115       SDLoc DL(N);
13116       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
13117                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
13118     }
13119   }
13120
13121   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
13122   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
13123     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
13124                               N0.getOperand(0), N1);
13125     AddToWorklist(Tmp.getNode());
13126     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
13127                        Tmp, N0.getOperand(1));
13128   }
13129
13130   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13131     return NewVSel;
13132
13133   return SDValue();
13134 }
13135
13136 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
13137   SDValue N0 = N->getOperand(0);
13138   EVT VT = N->getValueType(0);
13139   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
13140   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
13141
13142   // fold (fp_round_inreg c1fp) -> c1fp
13143   if (N0CFP && isTypeLegal(EVT)) {
13144     SDLoc DL(N);
13145     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
13146     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
13147   }
13148
13149   return SDValue();
13150 }
13151
13152 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
13153   SDValue N0 = N->getOperand(0);
13154   EVT VT = N->getValueType(0);
13155
13156   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
13157   if (N->hasOneUse() &&
13158       N->use_begin()->getOpcode() == ISD::FP_ROUND)
13159     return SDValue();
13160
13161   // fold (fp_extend c1fp) -> c1fp
13162   if (isConstantFPBuildVectorOrConstantFP(N0))
13163     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
13164
13165   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
13166   if (N0.getOpcode() == ISD::FP16_TO_FP &&
13167       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
13168     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
13169
13170   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
13171   // value of X.
13172   if (N0.getOpcode() == ISD::FP_ROUND
13173       && N0.getConstantOperandVal(1) == 1) {
13174     SDValue In = N0.getOperand(0);
13175     if (In.getValueType() == VT) return In;
13176     if (VT.bitsLT(In.getValueType()))
13177       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
13178                          In, N0.getOperand(1));
13179     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
13180   }
13181
13182   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
13183   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
13184        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
13185     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
13186     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
13187                                      LN0->getChain(),
13188                                      LN0->getBasePtr(), N0.getValueType(),
13189                                      LN0->getMemOperand());
13190     CombineTo(N, ExtLoad);
13191     CombineTo(N0.getNode(),
13192               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
13193                           N0.getValueType(), ExtLoad,
13194                           DAG.getIntPtrConstant(1, SDLoc(N0))),
13195               ExtLoad.getValue(1));
13196     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
13197   }
13198
13199   if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
13200     return NewVSel;
13201
13202   return SDValue();
13203 }
13204
13205 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
13206   SDValue N0 = N->getOperand(0);
13207   EVT VT = N->getValueType(0);
13208
13209   // fold (fceil c1) -> fceil(c1)
13210   if (isConstantFPBuildVectorOrConstantFP(N0))
13211     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
13212
13213   return SDValue();
13214 }
13215
13216 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
13217   SDValue N0 = N->getOperand(0);
13218   EVT VT = N->getValueType(0);
13219
13220   // fold (ftrunc c1) -> ftrunc(c1)
13221   if (isConstantFPBuildVectorOrConstantFP(N0))
13222     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
13223
13224   // fold ftrunc (known rounded int x) -> x
13225   // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is
13226   // likely to be generated to extract integer from a rounded floating value.
13227   switch (N0.getOpcode()) {
13228   default: break;
13229   case ISD::FRINT:
13230   case ISD::FTRUNC:
13231   case ISD::FNEARBYINT:
13232   case ISD::FFLOOR:
13233   case ISD::FCEIL:
13234     return N0;
13235   }
13236
13237   return SDValue();
13238 }
13239
13240 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
13241   SDValue N0 = N->getOperand(0);
13242   EVT VT = N->getValueType(0);
13243
13244   // fold (ffloor c1) -> ffloor(c1)
13245   if (isConstantFPBuildVectorOrConstantFP(N0))
13246     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
13247
13248   return SDValue();
13249 }
13250
13251 // FIXME: FNEG and FABS have a lot in common; refactor.
13252 SDValue DAGCombiner::visitFNEG(SDNode *N) {
13253   SDValue N0 = N->getOperand(0);
13254   EVT VT = N->getValueType(0);
13255
13256   // Constant fold FNEG.
13257   if (isConstantFPBuildVectorOrConstantFP(N0))
13258     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
13259
13260   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
13261                          &DAG.getTarget().Options, ForCodeSize))
13262     return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
13263
13264   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
13265   // constant pool values.
13266   if (!TLI.isFNegFree(VT) &&
13267       N0.getOpcode() == ISD::BITCAST &&
13268       N0.getNode()->hasOneUse()) {
13269     SDValue Int = N0.getOperand(0);
13270     EVT IntVT = Int.getValueType();
13271     if (IntVT.isInteger() && !IntVT.isVector()) {
13272       APInt SignMask;
13273       if (N0.getValueType().isVector()) {
13274         // For a vector, get a mask such as 0x80... per scalar element
13275         // and splat it.
13276         SignMask = APInt::getSignMask(N0.getScalarValueSizeInBits());
13277         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13278       } else {
13279         // For a scalar, just generate 0x80...
13280         SignMask = APInt::getSignMask(IntVT.getSizeInBits());
13281       }
13282       SDLoc DL0(N0);
13283       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
13284                         DAG.getConstant(SignMask, DL0, IntVT));
13285       AddToWorklist(Int.getNode());
13286       return DAG.getBitcast(VT, Int);
13287     }
13288   }
13289
13290   // (fneg (fmul c, x)) -> (fmul -c, x)
13291   if (N0.getOpcode() == ISD::FMUL &&
13292       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
13293     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
13294     if (CFP1) {
13295       APFloat CVal = CFP1->getValueAPF();
13296       CVal.changeSign();
13297       if (Level >= AfterLegalizeDAG &&
13298           (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
13299            TLI.isOperationLegal(ISD::ConstantFP, VT)))
13300         return DAG.getNode(
13301             ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
13302             DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1)),
13303             N0->getFlags());
13304     }
13305   }
13306
13307   return SDValue();
13308 }
13309
13310 static SDValue visitFMinMax(SelectionDAG &DAG, SDNode *N,
13311                             APFloat (*Op)(const APFloat &, const APFloat &)) {
13312   SDValue N0 = N->getOperand(0);
13313   SDValue N1 = N->getOperand(1);
13314   EVT VT = N->getValueType(0);
13315   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
13316   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
13317
13318   if (N0CFP && N1CFP) {
13319     const APFloat &C0 = N0CFP->getValueAPF();
13320     const APFloat &C1 = N1CFP->getValueAPF();
13321     return DAG.getConstantFP(Op(C0, C1), SDLoc(N), VT);
13322   }
13323
13324   // Canonicalize to constant on RHS.
13325   if (isConstantFPBuildVectorOrConstantFP(N0) &&
13326       !isConstantFPBuildVectorOrConstantFP(N1))
13327     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
13328
13329   return SDValue();
13330 }
13331
13332 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
13333   return visitFMinMax(DAG, N, minnum);
13334 }
13335
13336 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
13337   return visitFMinMax(DAG, N, maxnum);
13338 }
13339
13340 SDValue DAGCombiner::visitFMINIMUM(SDNode *N) {
13341   return visitFMinMax(DAG, N, minimum);
13342 }
13343
13344 SDValue DAGCombiner::visitFMAXIMUM(SDNode *N) {
13345   return visitFMinMax(DAG, N, maximum);
13346 }
13347
13348 SDValue DAGCombiner::visitFABS(SDNode *N) {
13349   SDValue N0 = N->getOperand(0);
13350   EVT VT = N->getValueType(0);
13351
13352   // fold (fabs c1) -> fabs(c1)
13353   if (isConstantFPBuildVectorOrConstantFP(N0))
13354     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
13355
13356   // fold (fabs (fabs x)) -> (fabs x)
13357   if (N0.getOpcode() == ISD::FABS)
13358     return N->getOperand(0);
13359
13360   // fold (fabs (fneg x)) -> (fabs x)
13361   // fold (fabs (fcopysign x, y)) -> (fabs x)
13362   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
13363     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
13364
13365   // fabs(bitcast(x)) -> bitcast(x & ~sign) to avoid constant pool loads.
13366   if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.hasOneUse()) {
13367     SDValue Int = N0.getOperand(0);
13368     EVT IntVT = Int.getValueType();
13369     if (IntVT.isInteger() && !IntVT.isVector()) {
13370       APInt SignMask;
13371       if (N0.getValueType().isVector()) {
13372         // For a vector, get a mask such as 0x7f... per scalar element
13373         // and splat it.
13374         SignMask = ~APInt::getSignMask(N0.getScalarValueSizeInBits());
13375         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
13376       } else {
13377         // For a scalar, just generate 0x7f...
13378         SignMask = ~APInt::getSignMask(IntVT.getSizeInBits());
13379       }
13380       SDLoc DL(N0);
13381       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
13382                         DAG.getConstant(SignMask, DL, IntVT));
13383       AddToWorklist(Int.getNode());
13384       return DAG.getBitcast(N->getValueType(0), Int);
13385     }
13386   }
13387
13388   return SDValue();
13389 }
13390
13391 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
13392   SDValue Chain = N->getOperand(0);
13393   SDValue N1 = N->getOperand(1);
13394   SDValue N2 = N->getOperand(2);
13395
13396   // If N is a constant we could fold this into a fallthrough or unconditional
13397   // branch. However that doesn't happen very often in normal code, because
13398   // Instcombine/SimplifyCFG should have handled the available opportunities.
13399   // If we did this folding here, it would be necessary to update the
13400   // MachineBasicBlock CFG, which is awkward.
13401
13402   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
13403   // on the target.
13404   if (N1.getOpcode() == ISD::SETCC &&
13405       TLI.isOperationLegalOrCustom(ISD::BR_CC,
13406                                    N1.getOperand(0).getValueType())) {
13407     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13408                        Chain, N1.getOperand(2),
13409                        N1.getOperand(0), N1.getOperand(1), N2);
13410   }
13411
13412   if (N1.hasOneUse()) {
13413     if (SDValue NewN1 = rebuildSetCC(N1))
13414       return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, NewN1, N2);
13415   }
13416
13417   return SDValue();
13418 }
13419
13420 SDValue DAGCombiner::rebuildSetCC(SDValue N) {
13421   if (N.getOpcode() == ISD::SRL ||
13422       (N.getOpcode() == ISD::TRUNCATE &&
13423        (N.getOperand(0).hasOneUse() &&
13424         N.getOperand(0).getOpcode() == ISD::SRL))) {
13425     // Look pass the truncate.
13426     if (N.getOpcode() == ISD::TRUNCATE)
13427       N = N.getOperand(0);
13428
13429     // Match this pattern so that we can generate simpler code:
13430     //
13431     //   %a = ...
13432     //   %b = and i32 %a, 2
13433     //   %c = srl i32 %b, 1
13434     //   brcond i32 %c ...
13435     //
13436     // into
13437     //
13438     //   %a = ...
13439     //   %b = and i32 %a, 2
13440     //   %c = setcc eq %b, 0
13441     //   brcond %c ...
13442     //
13443     // This applies only when the AND constant value has one bit set and the
13444     // SRL constant is equal to the log2 of the AND constant. The back-end is
13445     // smart enough to convert the result into a TEST/JMP sequence.
13446     SDValue Op0 = N.getOperand(0);
13447     SDValue Op1 = N.getOperand(1);
13448
13449     if (Op0.getOpcode() == ISD::AND && Op1.getOpcode() == ISD::Constant) {
13450       SDValue AndOp1 = Op0.getOperand(1);
13451
13452       if (AndOp1.getOpcode() == ISD::Constant) {
13453         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
13454
13455         if (AndConst.isPowerOf2() &&
13456             cast<ConstantSDNode>(Op1)->getAPIntValue() == AndConst.logBase2()) {
13457           SDLoc DL(N);
13458           return DAG.getSetCC(DL, getSetCCResultType(Op0.getValueType()),
13459                               Op0, DAG.getConstant(0, DL, Op0.getValueType()),
13460                               ISD::SETNE);
13461         }
13462       }
13463     }
13464   }
13465
13466   // Transform br(xor(x, y)) -> br(x != y)
13467   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
13468   if (N.getOpcode() == ISD::XOR) {
13469     // Because we may call this on a speculatively constructed
13470     // SimplifiedSetCC Node, we need to simplify this node first.
13471     // Ideally this should be folded into SimplifySetCC and not
13472     // here. For now, grab a handle to N so we don't lose it from
13473     // replacements interal to the visit.
13474     HandleSDNode XORHandle(N);
13475     while (N.getOpcode() == ISD::XOR) {
13476       SDValue Tmp = visitXOR(N.getNode());
13477       // No simplification done.
13478       if (!Tmp.getNode())
13479         break;
13480       // Returning N is form in-visit replacement that may invalidated
13481       // N. Grab value from Handle.
13482       if (Tmp.getNode() == N.getNode())
13483         N = XORHandle.getValue();
13484       else // Node simplified. Try simplifying again.
13485         N = Tmp;
13486     }
13487
13488     if (N.getOpcode() != ISD::XOR)
13489       return N;
13490
13491     SDNode *TheXor = N.getNode();
13492
13493     SDValue Op0 = TheXor->getOperand(0);
13494     SDValue Op1 = TheXor->getOperand(1);
13495
13496     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
13497       bool Equal = false;
13498       if (isOneConstant(Op0) && Op0.hasOneUse() &&
13499           Op0.getOpcode() == ISD::XOR) {
13500         TheXor = Op0.getNode();
13501         Equal = true;
13502       }
13503
13504       EVT SetCCVT = N.getValueType();
13505       if (LegalTypes)
13506         SetCCVT = getSetCCResultType(SetCCVT);
13507       // Replace the uses of XOR with SETCC
13508       return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
13509                           Equal ? ISD::SETEQ : ISD::SETNE);
13510     }
13511   }
13512
13513   return SDValue();
13514 }
13515
13516 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
13517 //
13518 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
13519   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
13520   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
13521
13522   // If N is a constant we could fold this into a fallthrough or unconditional
13523   // branch. However that doesn't happen very often in normal code, because
13524   // Instcombine/SimplifyCFG should have handled the available opportunities.
13525   // If we did this folding here, it would be necessary to update the
13526   // MachineBasicBlock CFG, which is awkward.
13527
13528   // Use SimplifySetCC to simplify SETCC's.
13529   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
13530                                CondLHS, CondRHS, CC->get(), SDLoc(N),
13531                                false);
13532   if (Simp.getNode()) AddToWorklist(Simp.getNode());
13533
13534   // fold to a simpler setcc
13535   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
13536     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
13537                        N->getOperand(0), Simp.getOperand(2),
13538                        Simp.getOperand(0), Simp.getOperand(1),
13539                        N->getOperand(4));
13540
13541   return SDValue();
13542 }
13543
13544 /// Return true if 'Use' is a load or a store that uses N as its base pointer
13545 /// and that N may be folded in the load / store addressing mode.
13546 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
13547                                     SelectionDAG &DAG,
13548                                     const TargetLowering &TLI) {
13549   EVT VT;
13550   unsigned AS;
13551
13552   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
13553     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
13554       return false;
13555     VT = LD->getMemoryVT();
13556     AS = LD->getAddressSpace();
13557   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
13558     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
13559       return false;
13560     VT = ST->getMemoryVT();
13561     AS = ST->getAddressSpace();
13562   } else
13563     return false;
13564
13565   TargetLowering::AddrMode AM;
13566   if (N->getOpcode() == ISD::ADD) {
13567     AM.HasBaseReg = true;
13568     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13569     if (Offset)
13570       // [reg +/- imm]
13571       AM.BaseOffs = Offset->getSExtValue();
13572     else
13573       // [reg +/- reg]
13574       AM.Scale = 1;
13575   } else if (N->getOpcode() == ISD::SUB) {
13576     AM.HasBaseReg = true;
13577     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
13578     if (Offset)
13579       // [reg +/- imm]
13580       AM.BaseOffs = -Offset->getSExtValue();
13581     else
13582       // [reg +/- reg]
13583       AM.Scale = 1;
13584   } else
13585     return false;
13586
13587   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
13588                                    VT.getTypeForEVT(*DAG.getContext()), AS);
13589 }
13590
13591 /// Try turning a load/store into a pre-indexed load/store when the base
13592 /// pointer is an add or subtract and it has other uses besides the load/store.
13593 /// After the transformation, the new indexed load/store has effectively folded
13594 /// the add/subtract in and all of its other uses are redirected to the
13595 /// new load/store.
13596 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
13597   if (Level < AfterLegalizeDAG)
13598     return false;
13599
13600   bool isLoad = true;
13601   SDValue Ptr;
13602   EVT VT;
13603   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13604     if (LD->isIndexed())
13605       return false;
13606     VT = LD->getMemoryVT();
13607     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
13608         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
13609       return false;
13610     Ptr = LD->getBasePtr();
13611   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13612     if (ST->isIndexed())
13613       return false;
13614     VT = ST->getMemoryVT();
13615     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
13616         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
13617       return false;
13618     Ptr = ST->getBasePtr();
13619     isLoad = false;
13620   } else {
13621     return false;
13622   }
13623
13624   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
13625   // out.  There is no reason to make this a preinc/predec.
13626   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
13627       Ptr.getNode()->hasOneUse())
13628     return false;
13629
13630   // Ask the target to do addressing mode selection.
13631   SDValue BasePtr;
13632   SDValue Offset;
13633   ISD::MemIndexedMode AM = ISD::UNINDEXED;
13634   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
13635     return false;
13636
13637   // Backends without true r+i pre-indexed forms may need to pass a
13638   // constant base with a variable offset so that constant coercion
13639   // will work with the patterns in canonical form.
13640   bool Swapped = false;
13641   if (isa<ConstantSDNode>(BasePtr)) {
13642     std::swap(BasePtr, Offset);
13643     Swapped = true;
13644   }
13645
13646   // Don't create a indexed load / store with zero offset.
13647   if (isNullConstant(Offset))
13648     return false;
13649
13650   // Try turning it into a pre-indexed load / store except when:
13651   // 1) The new base ptr is a frame index.
13652   // 2) If N is a store and the new base ptr is either the same as or is a
13653   //    predecessor of the value being stored.
13654   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
13655   //    that would create a cycle.
13656   // 4) All uses are load / store ops that use it as old base ptr.
13657
13658   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
13659   // (plus the implicit offset) to a register to preinc anyway.
13660   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13661     return false;
13662
13663   // Check #2.
13664   if (!isLoad) {
13665     SDValue Val = cast<StoreSDNode>(N)->getValue();
13666
13667     // Would require a copy.
13668     if (Val == BasePtr)
13669       return false;
13670
13671     // Would create a cycle.
13672     if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
13673       return false;
13674   }
13675
13676   // Caches for hasPredecessorHelper.
13677   SmallPtrSet<const SDNode *, 32> Visited;
13678   SmallVector<const SDNode *, 16> Worklist;
13679   Worklist.push_back(N);
13680
13681   // If the offset is a constant, there may be other adds of constants that
13682   // can be folded with this one. We should do this to avoid having to keep
13683   // a copy of the original base pointer.
13684   SmallVector<SDNode *, 16> OtherUses;
13685   if (isa<ConstantSDNode>(Offset))
13686     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
13687                               UE = BasePtr.getNode()->use_end();
13688          UI != UE; ++UI) {
13689       SDUse &Use = UI.getUse();
13690       // Skip the use that is Ptr and uses of other results from BasePtr's
13691       // node (important for nodes that return multiple results).
13692       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
13693         continue;
13694
13695       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
13696         continue;
13697
13698       if (Use.getUser()->getOpcode() != ISD::ADD &&
13699           Use.getUser()->getOpcode() != ISD::SUB) {
13700         OtherUses.clear();
13701         break;
13702       }
13703
13704       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
13705       if (!isa<ConstantSDNode>(Op1)) {
13706         OtherUses.clear();
13707         break;
13708       }
13709
13710       // FIXME: In some cases, we can be smarter about this.
13711       if (Op1.getValueType() != Offset.getValueType()) {
13712         OtherUses.clear();
13713         break;
13714       }
13715
13716       OtherUses.push_back(Use.getUser());
13717     }
13718
13719   if (Swapped)
13720     std::swap(BasePtr, Offset);
13721
13722   // Now check for #3 and #4.
13723   bool RealUse = false;
13724
13725   for (SDNode *Use : Ptr.getNode()->uses()) {
13726     if (Use == N)
13727       continue;
13728     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
13729       return false;
13730
13731     // If Ptr may be folded in addressing mode of other use, then it's
13732     // not profitable to do this transformation.
13733     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
13734       RealUse = true;
13735   }
13736
13737   if (!RealUse)
13738     return false;
13739
13740   SDValue Result;
13741   if (isLoad)
13742     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13743                                 BasePtr, Offset, AM);
13744   else
13745     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13746                                  BasePtr, Offset, AM);
13747   ++PreIndexedNodes;
13748   ++NodesCombined;
13749   LLVM_DEBUG(dbgs() << "\nReplacing.4 "; N->dump(&DAG); dbgs() << "\nWith: ";
13750              Result.getNode()->dump(&DAG); dbgs() << '\n');
13751   WorklistRemover DeadNodes(*this);
13752   if (isLoad) {
13753     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13754     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13755   } else {
13756     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13757   }
13758
13759   // Finally, since the node is now dead, remove it from the graph.
13760   deleteAndRecombine(N);
13761
13762   if (Swapped)
13763     std::swap(BasePtr, Offset);
13764
13765   // Replace other uses of BasePtr that can be updated to use Ptr
13766   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
13767     unsigned OffsetIdx = 1;
13768     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
13769       OffsetIdx = 0;
13770     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
13771            BasePtr.getNode() && "Expected BasePtr operand");
13772
13773     // We need to replace ptr0 in the following expression:
13774     //   x0 * offset0 + y0 * ptr0 = t0
13775     // knowing that
13776     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
13777     //
13778     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
13779     // indexed load/store and the expression that needs to be re-written.
13780     //
13781     // Therefore, we have:
13782     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
13783
13784     ConstantSDNode *CN =
13785       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
13786     int X0, X1, Y0, Y1;
13787     const APInt &Offset0 = CN->getAPIntValue();
13788     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
13789
13790     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
13791     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
13792     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
13793     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
13794
13795     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
13796
13797     APInt CNV = Offset0;
13798     if (X0 < 0) CNV = -CNV;
13799     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
13800     else CNV = CNV - Offset1;
13801
13802     SDLoc DL(OtherUses[i]);
13803
13804     // We can now generate the new expression.
13805     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
13806     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
13807
13808     SDValue NewUse = DAG.getNode(Opcode,
13809                                  DL,
13810                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
13811     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
13812     deleteAndRecombine(OtherUses[i]);
13813   }
13814
13815   // Replace the uses of Ptr with uses of the updated base value.
13816   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
13817   deleteAndRecombine(Ptr.getNode());
13818   AddToWorklist(Result.getNode());
13819
13820   return true;
13821 }
13822
13823 /// Try to combine a load/store with a add/sub of the base pointer node into a
13824 /// post-indexed load/store. The transformation folded the add/subtract into the
13825 /// new indexed load/store effectively and all of its uses are redirected to the
13826 /// new load/store.
13827 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
13828   if (Level < AfterLegalizeDAG)
13829     return false;
13830
13831   bool isLoad = true;
13832   SDValue Ptr;
13833   EVT VT;
13834   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
13835     if (LD->isIndexed())
13836       return false;
13837     VT = LD->getMemoryVT();
13838     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
13839         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
13840       return false;
13841     Ptr = LD->getBasePtr();
13842   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
13843     if (ST->isIndexed())
13844       return false;
13845     VT = ST->getMemoryVT();
13846     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
13847         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
13848       return false;
13849     Ptr = ST->getBasePtr();
13850     isLoad = false;
13851   } else {
13852     return false;
13853   }
13854
13855   if (Ptr.getNode()->hasOneUse())
13856     return false;
13857
13858   for (SDNode *Op : Ptr.getNode()->uses()) {
13859     if (Op == N ||
13860         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
13861       continue;
13862
13863     SDValue BasePtr;
13864     SDValue Offset;
13865     ISD::MemIndexedMode AM = ISD::UNINDEXED;
13866     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
13867       // Don't create a indexed load / store with zero offset.
13868       if (isNullConstant(Offset))
13869         continue;
13870
13871       // Try turning it into a post-indexed load / store except when
13872       // 1) All uses are load / store ops that use it as base ptr (and
13873       //    it may be folded as addressing mmode).
13874       // 2) Op must be independent of N, i.e. Op is neither a predecessor
13875       //    nor a successor of N. Otherwise, if Op is folded that would
13876       //    create a cycle.
13877
13878       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
13879         continue;
13880
13881       // Check for #1.
13882       bool TryNext = false;
13883       for (SDNode *Use : BasePtr.getNode()->uses()) {
13884         if (Use == Ptr.getNode())
13885           continue;
13886
13887         // If all the uses are load / store addresses, then don't do the
13888         // transformation.
13889         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
13890           bool RealUse = false;
13891           for (SDNode *UseUse : Use->uses()) {
13892             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
13893               RealUse = true;
13894           }
13895
13896           if (!RealUse) {
13897             TryNext = true;
13898             break;
13899           }
13900         }
13901       }
13902
13903       if (TryNext)
13904         continue;
13905
13906       // Check for #2.
13907       SmallPtrSet<const SDNode *, 32> Visited;
13908       SmallVector<const SDNode *, 8> Worklist;
13909       // Ptr is predecessor to both N and Op.
13910       Visited.insert(Ptr.getNode());
13911       Worklist.push_back(N);
13912       Worklist.push_back(Op);
13913       if (!SDNode::hasPredecessorHelper(N, Visited, Worklist) &&
13914           !SDNode::hasPredecessorHelper(Op, Visited, Worklist)) {
13915         SDValue Result = isLoad
13916           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
13917                                BasePtr, Offset, AM)
13918           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
13919                                 BasePtr, Offset, AM);
13920         ++PostIndexedNodes;
13921         ++NodesCombined;
13922         LLVM_DEBUG(dbgs() << "\nReplacing.5 "; N->dump(&DAG);
13923                    dbgs() << "\nWith: "; Result.getNode()->dump(&DAG);
13924                    dbgs() << '\n');
13925         WorklistRemover DeadNodes(*this);
13926         if (isLoad) {
13927           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
13928           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
13929         } else {
13930           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
13931         }
13932
13933         // Finally, since the node is now dead, remove it from the graph.
13934         deleteAndRecombine(N);
13935
13936         // Replace the uses of Use with uses of the updated base value.
13937         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
13938                                       Result.getValue(isLoad ? 1 : 0));
13939         deleteAndRecombine(Op);
13940         return true;
13941       }
13942     }
13943   }
13944
13945   return false;
13946 }
13947
13948 /// Return the base-pointer arithmetic from an indexed \p LD.
13949 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
13950   ISD::MemIndexedMode AM = LD->getAddressingMode();
13951   assert(AM != ISD::UNINDEXED);
13952   SDValue BP = LD->getOperand(1);
13953   SDValue Inc = LD->getOperand(2);
13954
13955   // Some backends use TargetConstants for load offsets, but don't expect
13956   // TargetConstants in general ADD nodes. We can convert these constants into
13957   // regular Constants (if the constant is not opaque).
13958   assert((Inc.getOpcode() != ISD::TargetConstant ||
13959           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
13960          "Cannot split out indexing using opaque target constants");
13961   if (Inc.getOpcode() == ISD::TargetConstant) {
13962     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
13963     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
13964                           ConstInc->getValueType(0));
13965   }
13966
13967   unsigned Opc =
13968       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
13969   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
13970 }
13971
13972 static inline int numVectorEltsOrZero(EVT T) {
13973   return T.isVector() ? T.getVectorNumElements() : 0;
13974 }
13975
13976 bool DAGCombiner::getTruncatedStoreValue(StoreSDNode *ST, SDValue &Val) {
13977   Val = ST->getValue();
13978   EVT STType = Val.getValueType();
13979   EVT STMemType = ST->getMemoryVT();
13980   if (STType == STMemType)
13981     return true;
13982   if (isTypeLegal(STMemType))
13983     return false; // fail.
13984   if (STType.isFloatingPoint() && STMemType.isFloatingPoint() &&
13985       TLI.isOperationLegal(ISD::FTRUNC, STMemType)) {
13986     Val = DAG.getNode(ISD::FTRUNC, SDLoc(ST), STMemType, Val);
13987     return true;
13988   }
13989   if (numVectorEltsOrZero(STType) == numVectorEltsOrZero(STMemType) &&
13990       STType.isInteger() && STMemType.isInteger()) {
13991     Val = DAG.getNode(ISD::TRUNCATE, SDLoc(ST), STMemType, Val);
13992     return true;
13993   }
13994   if (STType.getSizeInBits() == STMemType.getSizeInBits()) {
13995     Val = DAG.getBitcast(STMemType, Val);
13996     return true;
13997   }
13998   return false; // fail.
13999 }
14000
14001 bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
14002   EVT LDMemType = LD->getMemoryVT();
14003   EVT LDType = LD->getValueType(0);
14004   assert(Val.getValueType() == LDMemType &&
14005          "Attempting to extend value of non-matching type");
14006   if (LDType == LDMemType)
14007     return true;
14008   if (LDMemType.isInteger() && LDType.isInteger()) {
14009     switch (LD->getExtensionType()) {
14010     case ISD::NON_EXTLOAD:
14011       Val = DAG.getBitcast(LDType, Val);
14012       return true;
14013     case ISD::EXTLOAD:
14014       Val = DAG.getNode(ISD::ANY_EXTEND, SDLoc(LD), LDType, Val);
14015       return true;
14016     case ISD::SEXTLOAD:
14017       Val = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(LD), LDType, Val);
14018       return true;
14019     case ISD::ZEXTLOAD:
14020       Val = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(LD), LDType, Val);
14021       return true;
14022     }
14023   }
14024   return false;
14025 }
14026
14027 SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
14028   if (OptLevel == CodeGenOpt::None || LD->isVolatile())
14029     return SDValue();
14030   SDValue Chain = LD->getOperand(0);
14031   StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
14032   if (!ST || ST->isVolatile())
14033     return SDValue();
14034
14035   EVT LDType = LD->getValueType(0);
14036   EVT LDMemType = LD->getMemoryVT();
14037   EVT STMemType = ST->getMemoryVT();
14038   EVT STType = ST->getValue().getValueType();
14039
14040   BaseIndexOffset BasePtrLD = BaseIndexOffset::match(LD, DAG);
14041   BaseIndexOffset BasePtrST = BaseIndexOffset::match(ST, DAG);
14042   int64_t Offset;
14043   if (!BasePtrST.equalBaseIndex(BasePtrLD, DAG, Offset))
14044     return SDValue();
14045
14046   // Normalize for Endianness. After this Offset=0 will denote that the least
14047   // significant bit in the loaded value maps to the least significant bit in
14048   // the stored value). With Offset=n (for n > 0) the loaded value starts at the
14049   // n:th least significant byte of the stored value.
14050   if (DAG.getDataLayout().isBigEndian())
14051     Offset = (STMemType.getStoreSizeInBits() -
14052               LDMemType.getStoreSizeInBits()) / 8 - Offset;
14053
14054   // Check that the stored value cover all bits that are loaded.
14055   bool STCoversLD =
14056       (Offset >= 0) &&
14057       (Offset * 8 + LDMemType.getSizeInBits() <= STMemType.getSizeInBits());
14058
14059   auto ReplaceLd = [&](LoadSDNode *LD, SDValue Val, SDValue Chain) -> SDValue {
14060     if (LD->isIndexed()) {
14061       bool IsSub = (LD->getAddressingMode() == ISD::PRE_DEC ||
14062                     LD->getAddressingMode() == ISD::POST_DEC);
14063       unsigned Opc = IsSub ? ISD::SUB : ISD::ADD;
14064       SDValue Idx = DAG.getNode(Opc, SDLoc(LD), LD->getOperand(1).getValueType(),
14065                              LD->getOperand(1), LD->getOperand(2));
14066       SDValue Ops[] = {Val, Idx, Chain};
14067       return CombineTo(LD, Ops, 3);
14068     }
14069     return CombineTo(LD, Val, Chain);
14070   };
14071
14072   if (!STCoversLD)
14073     return SDValue();
14074
14075   // Memory as copy space (potentially masked).
14076   if (Offset == 0 && LDType == STType && STMemType == LDMemType) {
14077     // Simple case: Direct non-truncating forwarding
14078     if (LDType.getSizeInBits() == LDMemType.getSizeInBits())
14079       return ReplaceLd(LD, ST->getValue(), Chain);
14080     // Can we model the truncate and extension with an and mask?
14081     if (STType.isInteger() && LDMemType.isInteger() && !STType.isVector() &&
14082         !LDMemType.isVector() && LD->getExtensionType() != ISD::SEXTLOAD) {
14083       // Mask to size of LDMemType
14084       auto Mask =
14085           DAG.getConstant(APInt::getLowBitsSet(STType.getSizeInBits(),
14086                                                STMemType.getSizeInBits()),
14087                           SDLoc(ST), STType);
14088       auto Val = DAG.getNode(ISD::AND, SDLoc(LD), LDType, ST->getValue(), Mask);
14089       return ReplaceLd(LD, Val, Chain);
14090     }
14091   }
14092
14093   // TODO: Deal with nonzero offset.
14094   if (LD->getBasePtr().isUndef() || Offset != 0)
14095     return SDValue();
14096   // Model necessary truncations / extenstions.
14097   SDValue Val;
14098   // Truncate Value To Stored Memory Size.
14099   do {
14100     if (!getTruncatedStoreValue(ST, Val))
14101       continue;
14102     if (!isTypeLegal(LDMemType))
14103       continue;
14104     if (STMemType != LDMemType) {
14105       // TODO: Support vectors? This requires extract_subvector/bitcast.
14106       if (!STMemType.isVector() && !LDMemType.isVector() &&
14107           STMemType.isInteger() && LDMemType.isInteger())
14108         Val = DAG.getNode(ISD::TRUNCATE, SDLoc(LD), LDMemType, Val);
14109       else
14110         continue;
14111     }
14112     if (!extendLoadedValueToExtension(LD, Val))
14113       continue;
14114     return ReplaceLd(LD, Val, Chain);
14115   } while (false);
14116
14117   // On failure, cleanup dead nodes we may have created.
14118   if (Val->use_empty())
14119     deleteAndRecombine(Val.getNode());
14120   return SDValue();
14121 }
14122
14123 SDValue DAGCombiner::visitLOAD(SDNode *N) {
14124   LoadSDNode *LD  = cast<LoadSDNode>(N);
14125   SDValue Chain = LD->getChain();
14126   SDValue Ptr   = LD->getBasePtr();
14127
14128   // If load is not volatile and there are no uses of the loaded value (and
14129   // the updated indexed value in case of indexed loads), change uses of the
14130   // chain value into uses of the chain input (i.e. delete the dead load).
14131   if (!LD->isVolatile()) {
14132     if (N->getValueType(1) == MVT::Other) {
14133       // Unindexed loads.
14134       if (!N->hasAnyUseOfValue(0)) {
14135         // It's not safe to use the two value CombineTo variant here. e.g.
14136         // v1, chain2 = load chain1, loc
14137         // v2, chain3 = load chain2, loc
14138         // v3         = add v2, c
14139         // Now we replace use of chain2 with chain1.  This makes the second load
14140         // isomorphic to the one we are deleting, and thus makes this load live.
14141         LLVM_DEBUG(dbgs() << "\nReplacing.6 "; N->dump(&DAG);
14142                    dbgs() << "\nWith chain: "; Chain.getNode()->dump(&DAG);
14143                    dbgs() << "\n");
14144         WorklistRemover DeadNodes(*this);
14145         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14146         AddUsersToWorklist(Chain.getNode());
14147         if (N->use_empty())
14148           deleteAndRecombine(N);
14149
14150         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14151       }
14152     } else {
14153       // Indexed loads.
14154       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
14155
14156       // If this load has an opaque TargetConstant offset, then we cannot split
14157       // the indexing into an add/sub directly (that TargetConstant may not be
14158       // valid for a different type of node, and we cannot convert an opaque
14159       // target constant into a regular constant).
14160       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
14161                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
14162
14163       if (!N->hasAnyUseOfValue(0) &&
14164           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
14165         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
14166         SDValue Index;
14167         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
14168           Index = SplitIndexingFromLoad(LD);
14169           // Try to fold the base pointer arithmetic into subsequent loads and
14170           // stores.
14171           AddUsersToWorklist(N);
14172         } else
14173           Index = DAG.getUNDEF(N->getValueType(1));
14174         LLVM_DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG);
14175                    dbgs() << "\nWith: "; Undef.getNode()->dump(&DAG);
14176                    dbgs() << " and 2 other values\n");
14177         WorklistRemover DeadNodes(*this);
14178         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
14179         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
14180         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
14181         deleteAndRecombine(N);
14182         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
14183       }
14184     }
14185   }
14186
14187   // If this load is directly stored, replace the load value with the stored
14188   // value.
14189   if (auto V = ForwardStoreValueToDirectLoad(LD))
14190     return V;
14191
14192   // Try to infer better alignment information than the load already has.
14193   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
14194     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
14195       if (Align > LD->getAlignment() && LD->getSrcValueOffset() % Align == 0) {
14196         SDValue NewLoad = DAG.getExtLoad(
14197             LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr,
14198             LD->getPointerInfo(), LD->getMemoryVT(), Align,
14199             LD->getMemOperand()->getFlags(), LD->getAAInfo());
14200         // NewLoad will always be N as we are only refining the alignment
14201         assert(NewLoad.getNode() == N);
14202         (void)NewLoad;
14203       }
14204     }
14205   }
14206
14207   if (LD->isUnindexed()) {
14208     // Walk up chain skipping non-aliasing memory nodes.
14209     SDValue BetterChain = FindBetterChain(LD, Chain);
14210
14211     // If there is a better chain.
14212     if (Chain != BetterChain) {
14213       SDValue ReplLoad;
14214
14215       // Replace the chain to void dependency.
14216       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
14217         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
14218                                BetterChain, Ptr, LD->getMemOperand());
14219       } else {
14220         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
14221                                   LD->getValueType(0),
14222                                   BetterChain, Ptr, LD->getMemoryVT(),
14223                                   LD->getMemOperand());
14224       }
14225
14226       // Create token factor to keep old chain connected.
14227       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
14228                                   MVT::Other, Chain, ReplLoad.getValue(1));
14229
14230       // Replace uses with load result and token factor
14231       return CombineTo(N, ReplLoad.getValue(0), Token);
14232     }
14233   }
14234
14235   // Try transforming N to an indexed load.
14236   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
14237     return SDValue(N, 0);
14238
14239   // Try to slice up N to more direct loads if the slices are mapped to
14240   // different register banks or pairing can take place.
14241   if (SliceUpLoad(N))
14242     return SDValue(N, 0);
14243
14244   return SDValue();
14245 }
14246
14247 namespace {
14248
14249 /// Helper structure used to slice a load in smaller loads.
14250 /// Basically a slice is obtained from the following sequence:
14251 /// Origin = load Ty1, Base
14252 /// Shift = srl Ty1 Origin, CstTy Amount
14253 /// Inst = trunc Shift to Ty2
14254 ///
14255 /// Then, it will be rewritten into:
14256 /// Slice = load SliceTy, Base + SliceOffset
14257 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
14258 ///
14259 /// SliceTy is deduced from the number of bits that are actually used to
14260 /// build Inst.
14261 struct LoadedSlice {
14262   /// Helper structure used to compute the cost of a slice.
14263   struct Cost {
14264     /// Are we optimizing for code size.
14265     bool ForCodeSize = false;
14266
14267     /// Various cost.
14268     unsigned Loads = 0;
14269     unsigned Truncates = 0;
14270     unsigned CrossRegisterBanksCopies = 0;
14271     unsigned ZExts = 0;
14272     unsigned Shift = 0;
14273
14274     explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
14275
14276     /// Get the cost of one isolated slice.
14277     Cost(const LoadedSlice &LS, bool ForCodeSize)
14278         : ForCodeSize(ForCodeSize), Loads(1) {
14279       EVT TruncType = LS.Inst->getValueType(0);
14280       EVT LoadedType = LS.getLoadedType();
14281       if (TruncType != LoadedType &&
14282           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
14283         ZExts = 1;
14284     }
14285
14286     /// Account for slicing gain in the current cost.
14287     /// Slicing provide a few gains like removing a shift or a
14288     /// truncate. This method allows to grow the cost of the original
14289     /// load with the gain from this slice.
14290     void addSliceGain(const LoadedSlice &LS) {
14291       // Each slice saves a truncate.
14292       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
14293       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
14294                               LS.Inst->getValueType(0)))
14295         ++Truncates;
14296       // If there is a shift amount, this slice gets rid of it.
14297       if (LS.Shift)
14298         ++Shift;
14299       // If this slice can merge a cross register bank copy, account for it.
14300       if (LS.canMergeExpensiveCrossRegisterBankCopy())
14301         ++CrossRegisterBanksCopies;
14302     }
14303
14304     Cost &operator+=(const Cost &RHS) {
14305       Loads += RHS.Loads;
14306       Truncates += RHS.Truncates;
14307       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
14308       ZExts += RHS.ZExts;
14309       Shift += RHS.Shift;
14310       return *this;
14311     }
14312
14313     bool operator==(const Cost &RHS) const {
14314       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
14315              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
14316              ZExts == RHS.ZExts && Shift == RHS.Shift;
14317     }
14318
14319     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
14320
14321     bool operator<(const Cost &RHS) const {
14322       // Assume cross register banks copies are as expensive as loads.
14323       // FIXME: Do we want some more target hooks?
14324       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
14325       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
14326       // Unless we are optimizing for code size, consider the
14327       // expensive operation first.
14328       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
14329         return ExpensiveOpsLHS < ExpensiveOpsRHS;
14330       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
14331              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
14332     }
14333
14334     bool operator>(const Cost &RHS) const { return RHS < *this; }
14335
14336     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
14337
14338     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
14339   };
14340
14341   // The last instruction that represent the slice. This should be a
14342   // truncate instruction.
14343   SDNode *Inst;
14344
14345   // The original load instruction.
14346   LoadSDNode *Origin;
14347
14348   // The right shift amount in bits from the original load.
14349   unsigned Shift;
14350
14351   // The DAG from which Origin came from.
14352   // This is used to get some contextual information about legal types, etc.
14353   SelectionDAG *DAG;
14354
14355   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
14356               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
14357       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
14358
14359   /// Get the bits used in a chunk of bits \p BitWidth large.
14360   /// \return Result is \p BitWidth and has used bits set to 1 and
14361   ///         not used bits set to 0.
14362   APInt getUsedBits() const {
14363     // Reproduce the trunc(lshr) sequence:
14364     // - Start from the truncated value.
14365     // - Zero extend to the desired bit width.
14366     // - Shift left.
14367     assert(Origin && "No original load to compare against.");
14368     unsigned BitWidth = Origin->getValueSizeInBits(0);
14369     assert(Inst && "This slice is not bound to an instruction");
14370     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
14371            "Extracted slice is bigger than the whole type!");
14372     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
14373     UsedBits.setAllBits();
14374     UsedBits = UsedBits.zext(BitWidth);
14375     UsedBits <<= Shift;
14376     return UsedBits;
14377   }
14378
14379   /// Get the size of the slice to be loaded in bytes.
14380   unsigned getLoadedSize() const {
14381     unsigned SliceSize = getUsedBits().countPopulation();
14382     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
14383     return SliceSize / 8;
14384   }
14385
14386   /// Get the type that will be loaded for this slice.
14387   /// Note: This may not be the final type for the slice.
14388   EVT getLoadedType() const {
14389     assert(DAG && "Missing context");
14390     LLVMContext &Ctxt = *DAG->getContext();
14391     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
14392   }
14393
14394   /// Get the alignment of the load used for this slice.
14395   unsigned getAlignment() const {
14396     unsigned Alignment = Origin->getAlignment();
14397     uint64_t Offset = getOffsetFromBase();
14398     if (Offset != 0)
14399       Alignment = MinAlign(Alignment, Alignment + Offset);
14400     return Alignment;
14401   }
14402
14403   /// Check if this slice can be rewritten with legal operations.
14404   bool isLegal() const {
14405     // An invalid slice is not legal.
14406     if (!Origin || !Inst || !DAG)
14407       return false;
14408
14409     // Offsets are for indexed load only, we do not handle that.
14410     if (!Origin->getOffset().isUndef())
14411       return false;
14412
14413     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14414
14415     // Check that the type is legal.
14416     EVT SliceType = getLoadedType();
14417     if (!TLI.isTypeLegal(SliceType))
14418       return false;
14419
14420     // Check that the load is legal for this type.
14421     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
14422       return false;
14423
14424     // Check that the offset can be computed.
14425     // 1. Check its type.
14426     EVT PtrType = Origin->getBasePtr().getValueType();
14427     if (PtrType == MVT::Untyped || PtrType.isExtended())
14428       return false;
14429
14430     // 2. Check that it fits in the immediate.
14431     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
14432       return false;
14433
14434     // 3. Check that the computation is legal.
14435     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
14436       return false;
14437
14438     // Check that the zext is legal if it needs one.
14439     EVT TruncateType = Inst->getValueType(0);
14440     if (TruncateType != SliceType &&
14441         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
14442       return false;
14443
14444     return true;
14445   }
14446
14447   /// Get the offset in bytes of this slice in the original chunk of
14448   /// bits.
14449   /// \pre DAG != nullptr.
14450   uint64_t getOffsetFromBase() const {
14451     assert(DAG && "Missing context.");
14452     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
14453     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
14454     uint64_t Offset = Shift / 8;
14455     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
14456     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
14457            "The size of the original loaded type is not a multiple of a"
14458            " byte.");
14459     // If Offset is bigger than TySizeInBytes, it means we are loading all
14460     // zeros. This should have been optimized before in the process.
14461     assert(TySizeInBytes > Offset &&
14462            "Invalid shift amount for given loaded size");
14463     if (IsBigEndian)
14464       Offset = TySizeInBytes - Offset - getLoadedSize();
14465     return Offset;
14466   }
14467
14468   /// Generate the sequence of instructions to load the slice
14469   /// represented by this object and redirect the uses of this slice to
14470   /// this new sequence of instructions.
14471   /// \pre this->Inst && this->Origin are valid Instructions and this
14472   /// object passed the legal check: LoadedSlice::isLegal returned true.
14473   /// \return The last instruction of the sequence used to load the slice.
14474   SDValue loadSlice() const {
14475     assert(Inst && Origin && "Unable to replace a non-existing slice.");
14476     const SDValue &OldBaseAddr = Origin->getBasePtr();
14477     SDValue BaseAddr = OldBaseAddr;
14478     // Get the offset in that chunk of bytes w.r.t. the endianness.
14479     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
14480     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
14481     if (Offset) {
14482       // BaseAddr = BaseAddr + Offset.
14483       EVT ArithType = BaseAddr.getValueType();
14484       SDLoc DL(Origin);
14485       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
14486                               DAG->getConstant(Offset, DL, ArithType));
14487     }
14488
14489     // Create the type of the loaded slice according to its size.
14490     EVT SliceType = getLoadedType();
14491
14492     // Create the load for the slice.
14493     SDValue LastInst =
14494         DAG->getLoad(SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
14495                      Origin->getPointerInfo().getWithOffset(Offset),
14496                      getAlignment(), Origin->getMemOperand()->getFlags());
14497     // If the final type is not the same as the loaded type, this means that
14498     // we have to pad with zero. Create a zero extend for that.
14499     EVT FinalType = Inst->getValueType(0);
14500     if (SliceType != FinalType)
14501       LastInst =
14502           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
14503     return LastInst;
14504   }
14505
14506   /// Check if this slice can be merged with an expensive cross register
14507   /// bank copy. E.g.,
14508   /// i = load i32
14509   /// f = bitcast i32 i to float
14510   bool canMergeExpensiveCrossRegisterBankCopy() const {
14511     if (!Inst || !Inst->hasOneUse())
14512       return false;
14513     SDNode *Use = *Inst->use_begin();
14514     if (Use->getOpcode() != ISD::BITCAST)
14515       return false;
14516     assert(DAG && "Missing context");
14517     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
14518     EVT ResVT = Use->getValueType(0);
14519     const TargetRegisterClass *ResRC =
14520         TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
14521     const TargetRegisterClass *ArgRC =
14522         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
14523                            Use->getOperand(0)->isDivergent());
14524     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
14525       return false;
14526
14527     // At this point, we know that we perform a cross-register-bank copy.
14528     // Check if it is expensive.
14529     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
14530     // Assume bitcasts are cheap, unless both register classes do not
14531     // explicitly share a common sub class.
14532     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
14533       return false;
14534
14535     // Check if it will be merged with the load.
14536     // 1. Check the alignment constraint.
14537     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
14538         ResVT.getTypeForEVT(*DAG->getContext()));
14539
14540     if (RequiredAlignment > getAlignment())
14541       return false;
14542
14543     // 2. Check that the load is a legal operation for that type.
14544     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
14545       return false;
14546
14547     // 3. Check that we do not have a zext in the way.
14548     if (Inst->getValueType(0) != getLoadedType())
14549       return false;
14550
14551     return true;
14552   }
14553 };
14554
14555 } // end anonymous namespace
14556
14557 /// Check that all bits set in \p UsedBits form a dense region, i.e.,
14558 /// \p UsedBits looks like 0..0 1..1 0..0.
14559 static bool areUsedBitsDense(const APInt &UsedBits) {
14560   // If all the bits are one, this is dense!
14561   if (UsedBits.isAllOnesValue())
14562     return true;
14563
14564   // Get rid of the unused bits on the right.
14565   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
14566   // Get rid of the unused bits on the left.
14567   if (NarrowedUsedBits.countLeadingZeros())
14568     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
14569   // Check that the chunk of bits is completely used.
14570   return NarrowedUsedBits.isAllOnesValue();
14571 }
14572
14573 /// Check whether or not \p First and \p Second are next to each other
14574 /// in memory. This means that there is no hole between the bits loaded
14575 /// by \p First and the bits loaded by \p Second.
14576 static bool areSlicesNextToEachOther(const LoadedSlice &First,
14577                                      const LoadedSlice &Second) {
14578   assert(First.Origin == Second.Origin && First.Origin &&
14579          "Unable to match different memory origins.");
14580   APInt UsedBits = First.getUsedBits();
14581   assert((UsedBits & Second.getUsedBits()) == 0 &&
14582          "Slices are not supposed to overlap.");
14583   UsedBits |= Second.getUsedBits();
14584   return areUsedBitsDense(UsedBits);
14585 }
14586
14587 /// Adjust the \p GlobalLSCost according to the target
14588 /// paring capabilities and the layout of the slices.
14589 /// \pre \p GlobalLSCost should account for at least as many loads as
14590 /// there is in the slices in \p LoadedSlices.
14591 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14592                                  LoadedSlice::Cost &GlobalLSCost) {
14593   unsigned NumberOfSlices = LoadedSlices.size();
14594   // If there is less than 2 elements, no pairing is possible.
14595   if (NumberOfSlices < 2)
14596     return;
14597
14598   // Sort the slices so that elements that are likely to be next to each
14599   // other in memory are next to each other in the list.
14600   llvm::sort(LoadedSlices, [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
14601     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
14602     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
14603   });
14604   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
14605   // First (resp. Second) is the first (resp. Second) potentially candidate
14606   // to be placed in a paired load.
14607   const LoadedSlice *First = nullptr;
14608   const LoadedSlice *Second = nullptr;
14609   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
14610                 // Set the beginning of the pair.
14611                                                            First = Second) {
14612     Second = &LoadedSlices[CurrSlice];
14613
14614     // If First is NULL, it means we start a new pair.
14615     // Get to the next slice.
14616     if (!First)
14617       continue;
14618
14619     EVT LoadedType = First->getLoadedType();
14620
14621     // If the types of the slices are different, we cannot pair them.
14622     if (LoadedType != Second->getLoadedType())
14623       continue;
14624
14625     // Check if the target supplies paired loads for this type.
14626     unsigned RequiredAlignment = 0;
14627     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
14628       // move to the next pair, this type is hopeless.
14629       Second = nullptr;
14630       continue;
14631     }
14632     // Check if we meet the alignment requirement.
14633     if (RequiredAlignment > First->getAlignment())
14634       continue;
14635
14636     // Check that both loads are next to each other in memory.
14637     if (!areSlicesNextToEachOther(*First, *Second))
14638       continue;
14639
14640     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
14641     --GlobalLSCost.Loads;
14642     // Move to the next pair.
14643     Second = nullptr;
14644   }
14645 }
14646
14647 /// Check the profitability of all involved LoadedSlice.
14648 /// Currently, it is considered profitable if there is exactly two
14649 /// involved slices (1) which are (2) next to each other in memory, and
14650 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
14651 ///
14652 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
14653 /// the elements themselves.
14654 ///
14655 /// FIXME: When the cost model will be mature enough, we can relax
14656 /// constraints (1) and (2).
14657 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
14658                                 const APInt &UsedBits, bool ForCodeSize) {
14659   unsigned NumberOfSlices = LoadedSlices.size();
14660   if (StressLoadSlicing)
14661     return NumberOfSlices > 1;
14662
14663   // Check (1).
14664   if (NumberOfSlices != 2)
14665     return false;
14666
14667   // Check (2).
14668   if (!areUsedBitsDense(UsedBits))
14669     return false;
14670
14671   // Check (3).
14672   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
14673   // The original code has one big load.
14674   OrigCost.Loads = 1;
14675   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
14676     const LoadedSlice &LS = LoadedSlices[CurrSlice];
14677     // Accumulate the cost of all the slices.
14678     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
14679     GlobalSlicingCost += SliceCost;
14680
14681     // Account as cost in the original configuration the gain obtained
14682     // with the current slices.
14683     OrigCost.addSliceGain(LS);
14684   }
14685
14686   // If the target supports paired load, adjust the cost accordingly.
14687   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
14688   return OrigCost > GlobalSlicingCost;
14689 }
14690
14691 /// If the given load, \p LI, is used only by trunc or trunc(lshr)
14692 /// operations, split it in the various pieces being extracted.
14693 ///
14694 /// This sort of thing is introduced by SROA.
14695 /// This slicing takes care not to insert overlapping loads.
14696 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
14697 bool DAGCombiner::SliceUpLoad(SDNode *N) {
14698   if (Level < AfterLegalizeDAG)
14699     return false;
14700
14701   LoadSDNode *LD = cast<LoadSDNode>(N);
14702   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
14703       !LD->getValueType(0).isInteger())
14704     return false;
14705
14706   // Keep track of already used bits to detect overlapping values.
14707   // In that case, we will just abort the transformation.
14708   APInt UsedBits(LD->getValueSizeInBits(0), 0);
14709
14710   SmallVector<LoadedSlice, 4> LoadedSlices;
14711
14712   // Check if this load is used as several smaller chunks of bits.
14713   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
14714   // of computation for each trunc.
14715   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
14716        UI != UIEnd; ++UI) {
14717     // Skip the uses of the chain.
14718     if (UI.getUse().getResNo() != 0)
14719       continue;
14720
14721     SDNode *User = *UI;
14722     unsigned Shift = 0;
14723
14724     // Check if this is a trunc(lshr).
14725     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
14726         isa<ConstantSDNode>(User->getOperand(1))) {
14727       Shift = User->getConstantOperandVal(1);
14728       User = *User->use_begin();
14729     }
14730
14731     // At this point, User is a Truncate, iff we encountered, trunc or
14732     // trunc(lshr).
14733     if (User->getOpcode() != ISD::TRUNCATE)
14734       return false;
14735
14736     // The width of the type must be a power of 2 and greater than 8-bits.
14737     // Otherwise the load cannot be represented in LLVM IR.
14738     // Moreover, if we shifted with a non-8-bits multiple, the slice
14739     // will be across several bytes. We do not support that.
14740     unsigned Width = User->getValueSizeInBits(0);
14741     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
14742       return false;
14743
14744     // Build the slice for this chain of computations.
14745     LoadedSlice LS(User, LD, Shift, &DAG);
14746     APInt CurrentUsedBits = LS.getUsedBits();
14747
14748     // Check if this slice overlaps with another.
14749     if ((CurrentUsedBits & UsedBits) != 0)
14750       return false;
14751     // Update the bits used globally.
14752     UsedBits |= CurrentUsedBits;
14753
14754     // Check if the new slice would be legal.
14755     if (!LS.isLegal())
14756       return false;
14757
14758     // Record the slice.
14759     LoadedSlices.push_back(LS);
14760   }
14761
14762   // Abort slicing if it does not seem to be profitable.
14763   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
14764     return false;
14765
14766   ++SlicedLoads;
14767
14768   // Rewrite each chain to use an independent load.
14769   // By construction, each chain can be represented by a unique load.
14770
14771   // Prepare the argument for the new token factor for all the slices.
14772   SmallVector<SDValue, 8> ArgChains;
14773   for (SmallVectorImpl<LoadedSlice>::const_iterator
14774            LSIt = LoadedSlices.begin(),
14775            LSItEnd = LoadedSlices.end();
14776        LSIt != LSItEnd; ++LSIt) {
14777     SDValue SliceInst = LSIt->loadSlice();
14778     CombineTo(LSIt->Inst, SliceInst, true);
14779     if (SliceInst.getOpcode() != ISD::LOAD)
14780       SliceInst = SliceInst.getOperand(0);
14781     assert(SliceInst->getOpcode() == ISD::LOAD &&
14782            "It takes more than a zext to get to the loaded slice!!");
14783     ArgChains.push_back(SliceInst.getValue(1));
14784   }
14785
14786   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
14787                               ArgChains);
14788   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
14789   AddToWorklist(Chain.getNode());
14790   return true;
14791 }
14792
14793 /// Check to see if V is (and load (ptr), imm), where the load is having
14794 /// specific bytes cleared out.  If so, return the byte size being masked out
14795 /// and the shift amount.
14796 static std::pair<unsigned, unsigned>
14797 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
14798   std::pair<unsigned, unsigned> Result(0, 0);
14799
14800   // Check for the structure we're looking for.
14801   if (V->getOpcode() != ISD::AND ||
14802       !isa<ConstantSDNode>(V->getOperand(1)) ||
14803       !ISD::isNormalLoad(V->getOperand(0).getNode()))
14804     return Result;
14805
14806   // Check the chain and pointer.
14807   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
14808   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
14809
14810   // This only handles simple types.
14811   if (V.getValueType() != MVT::i16 &&
14812       V.getValueType() != MVT::i32 &&
14813       V.getValueType() != MVT::i64)
14814     return Result;
14815
14816   // Check the constant mask.  Invert it so that the bits being masked out are
14817   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
14818   // follow the sign bit for uniformity.
14819   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
14820   unsigned NotMaskLZ = countLeadingZeros(NotMask);
14821   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
14822   unsigned NotMaskTZ = countTrailingZeros(NotMask);
14823   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
14824   if (NotMaskLZ == 64) return Result;  // All zero mask.
14825
14826   // See if we have a continuous run of bits.  If so, we have 0*1+0*
14827   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
14828     return Result;
14829
14830   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
14831   if (V.getValueType() != MVT::i64 && NotMaskLZ)
14832     NotMaskLZ -= 64-V.getValueSizeInBits();
14833
14834   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
14835   switch (MaskedBytes) {
14836   case 1:
14837   case 2:
14838   case 4: break;
14839   default: return Result; // All one mask, or 5-byte mask.
14840   }
14841
14842   // Verify that the first bit starts at a multiple of mask so that the access
14843   // is aligned the same as the access width.
14844   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
14845
14846   // For narrowing to be valid, it must be the case that the load the
14847   // immediately preceding memory operation before the store.
14848   if (LD == Chain.getNode())
14849     ; // ok.
14850   else if (Chain->getOpcode() == ISD::TokenFactor &&
14851            SDValue(LD, 1).hasOneUse()) {
14852     // LD has only 1 chain use so they are no indirect dependencies.
14853     if (!LD->isOperandOf(Chain.getNode()))
14854       return Result;
14855   } else
14856     return Result; // Fail.
14857
14858   Result.first = MaskedBytes;
14859   Result.second = NotMaskTZ/8;
14860   return Result;
14861 }
14862
14863 /// Check to see if IVal is something that provides a value as specified by
14864 /// MaskInfo. If so, replace the specified store with a narrower store of
14865 /// truncated IVal.
14866 static SDValue
14867 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
14868                                 SDValue IVal, StoreSDNode *St,
14869                                 DAGCombiner *DC) {
14870   unsigned NumBytes = MaskInfo.first;
14871   unsigned ByteShift = MaskInfo.second;
14872   SelectionDAG &DAG = DC->getDAG();
14873
14874   // Check to see if IVal is all zeros in the part being masked in by the 'or'
14875   // that uses this.  If not, this is not a replacement.
14876   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
14877                                   ByteShift*8, (ByteShift+NumBytes)*8);
14878   if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
14879
14880   // Check that it is legal on the target to do this.  It is legal if the new
14881   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
14882   // legalization (and the target doesn't explicitly think this is a bad idea).
14883   MVT VT = MVT::getIntegerVT(NumBytes * 8);
14884   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
14885   if (!DC->isTypeLegal(VT))
14886     return SDValue();
14887   if (St->getMemOperand() &&
14888       !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
14889                               *St->getMemOperand()))
14890     return SDValue();
14891
14892   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
14893   // shifted by ByteShift and truncated down to NumBytes.
14894   if (ByteShift) {
14895     SDLoc DL(IVal);
14896     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
14897                        DAG.getConstant(ByteShift*8, DL,
14898                                     DC->getShiftAmountTy(IVal.getValueType())));
14899   }
14900
14901   // Figure out the offset for the store and the alignment of the access.
14902   unsigned StOffset;
14903   unsigned NewAlign = St->getAlignment();
14904
14905   if (DAG.getDataLayout().isLittleEndian())
14906     StOffset = ByteShift;
14907   else
14908     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
14909
14910   SDValue Ptr = St->getBasePtr();
14911   if (StOffset) {
14912     SDLoc DL(IVal);
14913     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
14914                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
14915     NewAlign = MinAlign(NewAlign, StOffset);
14916   }
14917
14918   // Truncate down to the new size.
14919   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
14920
14921   ++OpsNarrowed;
14922   return DAG
14923       .getStore(St->getChain(), SDLoc(St), IVal, Ptr,
14924                 St->getPointerInfo().getWithOffset(StOffset), NewAlign);
14925 }
14926
14927 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
14928 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
14929 /// narrowing the load and store if it would end up being a win for performance
14930 /// or code size.
14931 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
14932   StoreSDNode *ST  = cast<StoreSDNode>(N);
14933   if (ST->isVolatile())
14934     return SDValue();
14935
14936   SDValue Chain = ST->getChain();
14937   SDValue Value = ST->getValue();
14938   SDValue Ptr   = ST->getBasePtr();
14939   EVT VT = Value.getValueType();
14940
14941   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
14942     return SDValue();
14943
14944   unsigned Opc = Value.getOpcode();
14945
14946   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
14947   // is a byte mask indicating a consecutive number of bytes, check to see if
14948   // Y is known to provide just those bytes.  If so, we try to replace the
14949   // load + replace + store sequence with a single (narrower) store, which makes
14950   // the load dead.
14951   if (Opc == ISD::OR) {
14952     std::pair<unsigned, unsigned> MaskedLoad;
14953     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
14954     if (MaskedLoad.first)
14955       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14956                                                   Value.getOperand(1), ST,this))
14957         return NewST;
14958
14959     // Or is commutative, so try swapping X and Y.
14960     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
14961     if (MaskedLoad.first)
14962       if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
14963                                                   Value.getOperand(0), ST,this))
14964         return NewST;
14965   }
14966
14967   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
14968       Value.getOperand(1).getOpcode() != ISD::Constant)
14969     return SDValue();
14970
14971   SDValue N0 = Value.getOperand(0);
14972   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
14973       Chain == SDValue(N0.getNode(), 1)) {
14974     LoadSDNode *LD = cast<LoadSDNode>(N0);
14975     if (LD->getBasePtr() != Ptr ||
14976         LD->getPointerInfo().getAddrSpace() !=
14977         ST->getPointerInfo().getAddrSpace())
14978       return SDValue();
14979
14980     // Find the type to narrow it the load / op / store to.
14981     SDValue N1 = Value.getOperand(1);
14982     unsigned BitWidth = N1.getValueSizeInBits();
14983     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
14984     if (Opc == ISD::AND)
14985       Imm ^= APInt::getAllOnesValue(BitWidth);
14986     if (Imm == 0 || Imm.isAllOnesValue())
14987       return SDValue();
14988     unsigned ShAmt = Imm.countTrailingZeros();
14989     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
14990     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
14991     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
14992     // The narrowing should be profitable, the load/store operation should be
14993     // legal (or custom) and the store size should be equal to the NewVT width.
14994     while (NewBW < BitWidth &&
14995            (NewVT.getStoreSizeInBits() != NewBW ||
14996             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
14997             !TLI.isNarrowingProfitable(VT, NewVT))) {
14998       NewBW = NextPowerOf2(NewBW);
14999       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
15000     }
15001     if (NewBW >= BitWidth)
15002       return SDValue();
15003
15004     // If the lsb changed does not start at the type bitwidth boundary,
15005     // start at the previous one.
15006     if (ShAmt % NewBW)
15007       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
15008     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
15009                                    std::min(BitWidth, ShAmt + NewBW));
15010     if ((Imm & Mask) == Imm) {
15011       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
15012       if (Opc == ISD::AND)
15013         NewImm ^= APInt::getAllOnesValue(NewBW);
15014       uint64_t PtrOff = ShAmt / 8;
15015       // For big endian targets, we need to adjust the offset to the pointer to
15016       // load the correct bytes.
15017       if (DAG.getDataLayout().isBigEndian())
15018         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
15019
15020       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
15021       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
15022       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
15023         return SDValue();
15024
15025       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
15026                                    Ptr.getValueType(), Ptr,
15027                                    DAG.getConstant(PtrOff, SDLoc(LD),
15028                                                    Ptr.getValueType()));
15029       SDValue NewLD =
15030           DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr,
15031                       LD->getPointerInfo().getWithOffset(PtrOff), NewAlign,
15032                       LD->getMemOperand()->getFlags(), LD->getAAInfo());
15033       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
15034                                    DAG.getConstant(NewImm, SDLoc(Value),
15035                                                    NewVT));
15036       SDValue NewST =
15037           DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr,
15038                        ST->getPointerInfo().getWithOffset(PtrOff), NewAlign);
15039
15040       AddToWorklist(NewPtr.getNode());
15041       AddToWorklist(NewLD.getNode());
15042       AddToWorklist(NewVal.getNode());
15043       WorklistRemover DeadNodes(*this);
15044       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
15045       ++OpsNarrowed;
15046       return NewST;
15047     }
15048   }
15049
15050   return SDValue();
15051 }
15052
15053 /// For a given floating point load / store pair, if the load value isn't used
15054 /// by any other operations, then consider transforming the pair to integer
15055 /// load / store operations if the target deems the transformation profitable.
15056 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
15057   StoreSDNode *ST  = cast<StoreSDNode>(N);
15058   SDValue Value = ST->getValue();
15059   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
15060       Value.hasOneUse()) {
15061     LoadSDNode *LD = cast<LoadSDNode>(Value);
15062     EVT VT = LD->getMemoryVT();
15063     if (!VT.isFloatingPoint() ||
15064         VT != ST->getMemoryVT() ||
15065         LD->isNonTemporal() ||
15066         ST->isNonTemporal() ||
15067         LD->getPointerInfo().getAddrSpace() != 0 ||
15068         ST->getPointerInfo().getAddrSpace() != 0)
15069       return SDValue();
15070
15071     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
15072     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
15073         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
15074         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
15075         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
15076       return SDValue();
15077
15078     unsigned LDAlign = LD->getAlignment();
15079     unsigned STAlign = ST->getAlignment();
15080     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
15081     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
15082     if (LDAlign < ABIAlign || STAlign < ABIAlign)
15083       return SDValue();
15084
15085     SDValue NewLD =
15086         DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(),
15087                     LD->getPointerInfo(), LDAlign);
15088
15089     SDValue NewST =
15090         DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
15091                      ST->getPointerInfo(), STAlign);
15092
15093     AddToWorklist(NewLD.getNode());
15094     AddToWorklist(NewST.getNode());
15095     WorklistRemover DeadNodes(*this);
15096     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
15097     ++LdStFP2Int;
15098     return NewST;
15099   }
15100
15101   return SDValue();
15102 }
15103
15104 // This is a helper function for visitMUL to check the profitability
15105 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
15106 // MulNode is the original multiply, AddNode is (add x, c1),
15107 // and ConstNode is c2.
15108 //
15109 // If the (add x, c1) has multiple uses, we could increase
15110 // the number of adds if we make this transformation.
15111 // It would only be worth doing this if we can remove a
15112 // multiply in the process. Check for that here.
15113 // To illustrate:
15114 //     (A + c1) * c3
15115 //     (A + c2) * c3
15116 // We're checking for cases where we have common "c3 * A" expressions.
15117 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
15118                                               SDValue &AddNode,
15119                                               SDValue &ConstNode) {
15120   APInt Val;
15121
15122   // If the add only has one use, this would be OK to do.
15123   if (AddNode.getNode()->hasOneUse())
15124     return true;
15125
15126   // Walk all the users of the constant with which we're multiplying.
15127   for (SDNode *Use : ConstNode->uses()) {
15128     if (Use == MulNode) // This use is the one we're on right now. Skip it.
15129       continue;
15130
15131     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
15132       SDNode *OtherOp;
15133       SDNode *MulVar = AddNode.getOperand(0).getNode();
15134
15135       // OtherOp is what we're multiplying against the constant.
15136       if (Use->getOperand(0) == ConstNode)
15137         OtherOp = Use->getOperand(1).getNode();
15138       else
15139         OtherOp = Use->getOperand(0).getNode();
15140
15141       // Check to see if multiply is with the same operand of our "add".
15142       //
15143       //     ConstNode  = CONST
15144       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
15145       //     ...
15146       //     AddNode  = (A + c1)  <-- MulVar is A.
15147       //         = AddNode * ConstNode   <-- current visiting instruction.
15148       //
15149       // If we make this transformation, we will have a common
15150       // multiply (ConstNode * A) that we can save.
15151       if (OtherOp == MulVar)
15152         return true;
15153
15154       // Now check to see if a future expansion will give us a common
15155       // multiply.
15156       //
15157       //     ConstNode  = CONST
15158       //     AddNode    = (A + c1)
15159       //     ...   = AddNode * ConstNode <-- current visiting instruction.
15160       //     ...
15161       //     OtherOp = (A + c2)
15162       //     Use     = OtherOp * ConstNode <-- visiting Use.
15163       //
15164       // If we make this transformation, we will have a common
15165       // multiply (CONST * A) after we also do the same transformation
15166       // to the "t2" instruction.
15167       if (OtherOp->getOpcode() == ISD::ADD &&
15168           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
15169           OtherOp->getOperand(0).getNode() == MulVar)
15170         return true;
15171     }
15172   }
15173
15174   // Didn't find a case where this would be profitable.
15175   return false;
15176 }
15177
15178 SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
15179                                          unsigned NumStores) {
15180   SmallVector<SDValue, 8> Chains;
15181   SmallPtrSet<const SDNode *, 8> Visited;
15182   SDLoc StoreDL(StoreNodes[0].MemNode);
15183
15184   for (unsigned i = 0; i < NumStores; ++i) {
15185     Visited.insert(StoreNodes[i].MemNode);
15186   }
15187
15188   // don't include nodes that are children or repeated nodes.
15189   for (unsigned i = 0; i < NumStores; ++i) {
15190     if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
15191       Chains.push_back(StoreNodes[i].MemNode->getChain());
15192   }
15193
15194   assert(Chains.size() > 0 && "Chain should have generated a chain");
15195   return DAG.getTokenFactor(StoreDL, Chains);
15196 }
15197
15198 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
15199     SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
15200     bool IsConstantSrc, bool UseVector, bool UseTrunc) {
15201   // Make sure we have something to merge.
15202   if (NumStores < 2)
15203     return false;
15204
15205   // The latest Node in the DAG.
15206   SDLoc DL(StoreNodes[0].MemNode);
15207
15208   int64_t ElementSizeBits = MemVT.getStoreSizeInBits();
15209   unsigned SizeInBits = NumStores * ElementSizeBits;
15210   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15211
15212   EVT StoreTy;
15213   if (UseVector) {
15214     unsigned Elts = NumStores * NumMemElts;
15215     // Get the type for the merged vector store.
15216     StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15217   } else
15218     StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
15219
15220   SDValue StoredVal;
15221   if (UseVector) {
15222     if (IsConstantSrc) {
15223       SmallVector<SDValue, 8> BuildVector;
15224       for (unsigned I = 0; I != NumStores; ++I) {
15225         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[I].MemNode);
15226         SDValue Val = St->getValue();
15227         // If constant is of the wrong type, convert it now.
15228         if (MemVT != Val.getValueType()) {
15229           Val = peekThroughBitcasts(Val);
15230           // Deal with constants of wrong size.
15231           if (ElementSizeBits != Val.getValueSizeInBits()) {
15232             EVT IntMemVT =
15233                 EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
15234             if (isa<ConstantFPSDNode>(Val)) {
15235               // Not clear how to truncate FP values.
15236               return false;
15237             } else if (auto *C = dyn_cast<ConstantSDNode>(Val))
15238               Val = DAG.getConstant(C->getAPIntValue()
15239                                         .zextOrTrunc(Val.getValueSizeInBits())
15240                                         .zextOrTrunc(ElementSizeBits),
15241                                     SDLoc(C), IntMemVT);
15242           }
15243           // Make sure correctly size type is the correct type.
15244           Val = DAG.getBitcast(MemVT, Val);
15245         }
15246         BuildVector.push_back(Val);
15247       }
15248       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15249                                                : ISD::BUILD_VECTOR,
15250                               DL, StoreTy, BuildVector);
15251     } else {
15252       SmallVector<SDValue, 8> Ops;
15253       for (unsigned i = 0; i < NumStores; ++i) {
15254         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15255         SDValue Val = peekThroughBitcasts(St->getValue());
15256         // All operands of BUILD_VECTOR / CONCAT_VECTOR must be of
15257         // type MemVT. If the underlying value is not the correct
15258         // type, but it is an extraction of an appropriate vector we
15259         // can recast Val to be of the correct type. This may require
15260         // converting between EXTRACT_VECTOR_ELT and
15261         // EXTRACT_SUBVECTOR.
15262         if ((MemVT != Val.getValueType()) &&
15263             (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15264              Val.getOpcode() == ISD::EXTRACT_SUBVECTOR)) {
15265           EVT MemVTScalarTy = MemVT.getScalarType();
15266           // We may need to add a bitcast here to get types to line up.
15267           if (MemVTScalarTy != Val.getValueType().getScalarType()) {
15268             Val = DAG.getBitcast(MemVT, Val);
15269           } else {
15270             unsigned OpC = MemVT.isVector() ? ISD::EXTRACT_SUBVECTOR
15271                                             : ISD::EXTRACT_VECTOR_ELT;
15272             SDValue Vec = Val.getOperand(0);
15273             SDValue Idx = Val.getOperand(1);
15274             Val = DAG.getNode(OpC, SDLoc(Val), MemVT, Vec, Idx);
15275           }
15276         }
15277         Ops.push_back(Val);
15278       }
15279
15280       // Build the extracted vector elements back into a vector.
15281       StoredVal = DAG.getNode(MemVT.isVector() ? ISD::CONCAT_VECTORS
15282                                                : ISD::BUILD_VECTOR,
15283                               DL, StoreTy, Ops);
15284     }
15285   } else {
15286     // We should always use a vector store when merging extracted vector
15287     // elements, so this path implies a store of constants.
15288     assert(IsConstantSrc && "Merged vector elements should use vector store");
15289
15290     APInt StoreInt(SizeInBits, 0);
15291
15292     // Construct a single integer constant which is made of the smaller
15293     // constant inputs.
15294     bool IsLE = DAG.getDataLayout().isLittleEndian();
15295     for (unsigned i = 0; i < NumStores; ++i) {
15296       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
15297       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
15298
15299       SDValue Val = St->getValue();
15300       Val = peekThroughBitcasts(Val);
15301       StoreInt <<= ElementSizeBits;
15302       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
15303         StoreInt |= C->getAPIntValue()
15304                         .zextOrTrunc(ElementSizeBits)
15305                         .zextOrTrunc(SizeInBits);
15306       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
15307         StoreInt |= C->getValueAPF()
15308                         .bitcastToAPInt()
15309                         .zextOrTrunc(ElementSizeBits)
15310                         .zextOrTrunc(SizeInBits);
15311         // If fp truncation is necessary give up for now.
15312         if (MemVT.getSizeInBits() != ElementSizeBits)
15313           return false;
15314       } else {
15315         llvm_unreachable("Invalid constant element type");
15316       }
15317     }
15318
15319     // Create the new Load and Store operations.
15320     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
15321   }
15322
15323   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15324   SDValue NewChain = getMergeStoreChains(StoreNodes, NumStores);
15325
15326   // make sure we use trunc store if it's necessary to be legal.
15327   SDValue NewStore;
15328   if (!UseTrunc) {
15329     NewStore = DAG.getStore(NewChain, DL, StoredVal, FirstInChain->getBasePtr(),
15330                             FirstInChain->getPointerInfo(),
15331                             FirstInChain->getAlignment());
15332   } else { // Must be realized as a trunc store
15333     EVT LegalizedStoredValTy =
15334         TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType());
15335     unsigned LegalizedStoreSize = LegalizedStoredValTy.getSizeInBits();
15336     ConstantSDNode *C = cast<ConstantSDNode>(StoredVal);
15337     SDValue ExtendedStoreVal =
15338         DAG.getConstant(C->getAPIntValue().zextOrTrunc(LegalizedStoreSize), DL,
15339                         LegalizedStoredValTy);
15340     NewStore = DAG.getTruncStore(
15341         NewChain, DL, ExtendedStoreVal, FirstInChain->getBasePtr(),
15342         FirstInChain->getPointerInfo(), StoredVal.getValueType() /*TVT*/,
15343         FirstInChain->getAlignment(),
15344         FirstInChain->getMemOperand()->getFlags());
15345   }
15346
15347   // Replace all merged stores with the new store.
15348   for (unsigned i = 0; i < NumStores; ++i)
15349     CombineTo(StoreNodes[i].MemNode, NewStore);
15350
15351   AddToWorklist(NewChain.getNode());
15352   return true;
15353 }
15354
15355 void DAGCombiner::getStoreMergeCandidates(
15356     StoreSDNode *St, SmallVectorImpl<MemOpLink> &StoreNodes,
15357     SDNode *&RootNode) {
15358   // This holds the base pointer, index, and the offset in bytes from the base
15359   // pointer.
15360   BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
15361   EVT MemVT = St->getMemoryVT();
15362
15363   SDValue Val = peekThroughBitcasts(St->getValue());
15364   // We must have a base and an offset.
15365   if (!BasePtr.getBase().getNode())
15366     return;
15367
15368   // Do not handle stores to undef base pointers.
15369   if (BasePtr.getBase().isUndef())
15370     return;
15371
15372   bool IsConstantSrc = isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val);
15373   bool IsExtractVecSrc = (Val.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15374                           Val.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15375   bool IsLoadSrc = isa<LoadSDNode>(Val);
15376   BaseIndexOffset LBasePtr;
15377   // Match on loadbaseptr if relevant.
15378   EVT LoadVT;
15379   if (IsLoadSrc) {
15380     auto *Ld = cast<LoadSDNode>(Val);
15381     LBasePtr = BaseIndexOffset::match(Ld, DAG);
15382     LoadVT = Ld->getMemoryVT();
15383     // Load and store should be the same type.
15384     if (MemVT != LoadVT)
15385       return;
15386     // Loads must only have one use.
15387     if (!Ld->hasNUsesOfValue(1, 0))
15388       return;
15389     // The memory operands must not be volatile/indexed.
15390     if (Ld->isVolatile() || Ld->isIndexed())
15391       return;
15392   }
15393   auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
15394                             int64_t &Offset) -> bool {
15395     // The memory operands must not be volatile/indexed.
15396     if (Other->isVolatile() || Other->isIndexed())
15397       return false;
15398     // Don't mix temporal stores with non-temporal stores.
15399     if (St->isNonTemporal() != Other->isNonTemporal())
15400       return false;
15401     SDValue OtherBC = peekThroughBitcasts(Other->getValue());
15402     // Allow merging constants of different types as integers.
15403     bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
15404                                            : Other->getMemoryVT() != MemVT;
15405     if (IsLoadSrc) {
15406       if (NoTypeMatch)
15407         return false;
15408       // The Load's Base Ptr must also match
15409       if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
15410         BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
15411         if (LoadVT != OtherLd->getMemoryVT())
15412           return false;
15413         // Loads must only have one use.
15414         if (!OtherLd->hasNUsesOfValue(1, 0))
15415           return false;
15416         // The memory operands must not be volatile/indexed.
15417         if (OtherLd->isVolatile() || OtherLd->isIndexed())
15418           return false;
15419         // Don't mix temporal loads with non-temporal loads.
15420         if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
15421           return false;
15422         if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
15423           return false;
15424       } else
15425         return false;
15426     }
15427     if (IsConstantSrc) {
15428       if (NoTypeMatch)
15429         return false;
15430       if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
15431         return false;
15432     }
15433     if (IsExtractVecSrc) {
15434       // Do not merge truncated stores here.
15435       if (Other->isTruncatingStore())
15436         return false;
15437       if (!MemVT.bitsEq(OtherBC.getValueType()))
15438         return false;
15439       if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
15440           OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
15441         return false;
15442     }
15443     Ptr = BaseIndexOffset::match(Other, DAG);
15444     return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
15445   };
15446
15447   // Check if the pair of StoreNode and the RootNode already bail out many
15448   // times which is over the limit in dependence check.
15449   auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
15450                                         SDNode *RootNode) -> bool {
15451     auto RootCount = StoreRootCountMap.find(StoreNode);
15452     if (RootCount != StoreRootCountMap.end() &&
15453         RootCount->second.first == RootNode &&
15454         RootCount->second.second > StoreMergeDependenceLimit)
15455       return true;
15456     return false;
15457   };
15458
15459   // We looking for a root node which is an ancestor to all mergable
15460   // stores. We search up through a load, to our root and then down
15461   // through all children. For instance we will find Store{1,2,3} if
15462   // St is Store1, Store2. or Store3 where the root is not a load
15463   // which always true for nonvolatile ops. TODO: Expand
15464   // the search to find all valid candidates through multiple layers of loads.
15465   //
15466   // Root
15467   // |-------|-------|
15468   // Load    Load    Store3
15469   // |       |
15470   // Store1   Store2
15471   //
15472   // FIXME: We should be able to climb and
15473   // descend TokenFactors to find candidates as well.
15474
15475   RootNode = St->getChain().getNode();
15476
15477   unsigned NumNodesExplored = 0;
15478   if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
15479     RootNode = Ldn->getChain().getNode();
15480     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15481          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15482       if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
15483         for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
15484           if (I2.getOperandNo() == 0)
15485             if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
15486               BaseIndexOffset Ptr;
15487               int64_t PtrDiff;
15488               if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15489                   !OverLimitInDependenceCheck(OtherST, RootNode))
15490                 StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15491             }
15492   } else
15493     for (auto I = RootNode->use_begin(), E = RootNode->use_end();
15494          I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
15495       if (I.getOperandNo() == 0)
15496         if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
15497           BaseIndexOffset Ptr;
15498           int64_t PtrDiff;
15499           if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
15500               !OverLimitInDependenceCheck(OtherST, RootNode))
15501             StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
15502         }
15503 }
15504
15505 // We need to check that merging these stores does not cause a loop in
15506 // the DAG. Any store candidate may depend on another candidate
15507 // indirectly through its operand (we already consider dependencies
15508 // through the chain). Check in parallel by searching up from
15509 // non-chain operands of candidates.
15510 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
15511     SmallVectorImpl<MemOpLink> &StoreNodes, unsigned NumStores,
15512     SDNode *RootNode) {
15513   // FIXME: We should be able to truncate a full search of
15514   // predecessors by doing a BFS and keeping tabs the originating
15515   // stores from which worklist nodes come from in a similar way to
15516   // TokenFactor simplfication.
15517
15518   SmallPtrSet<const SDNode *, 32> Visited;
15519   SmallVector<const SDNode *, 8> Worklist;
15520
15521   // RootNode is a predecessor to all candidates so we need not search
15522   // past it. Add RootNode (peeking through TokenFactors). Do not count
15523   // these towards size check.
15524
15525   Worklist.push_back(RootNode);
15526   while (!Worklist.empty()) {
15527     auto N = Worklist.pop_back_val();
15528     if (!Visited.insert(N).second)
15529       continue; // Already present in Visited.
15530     if (N->getOpcode() == ISD::TokenFactor) {
15531       for (SDValue Op : N->ops())
15532         Worklist.push_back(Op.getNode());
15533     }
15534   }
15535
15536   // Don't count pruning nodes towards max.
15537   unsigned int Max = 1024 + Visited.size();
15538   // Search Ops of store candidates.
15539   for (unsigned i = 0; i < NumStores; ++i) {
15540     SDNode *N = StoreNodes[i].MemNode;
15541     // Of the 4 Store Operands:
15542     //   * Chain (Op 0) -> We have already considered these
15543     //                    in candidate selection and can be
15544     //                    safely ignored
15545     //   * Value (Op 1) -> Cycles may happen (e.g. through load chains)
15546     //   * Address (Op 2) -> Merged addresses may only vary by a fixed constant,
15547     //                       but aren't necessarily fromt the same base node, so
15548     //                       cycles possible (e.g. via indexed store).
15549     //   * (Op 3) -> Represents the pre or post-indexing offset (or undef for
15550     //               non-indexed stores). Not constant on all targets (e.g. ARM)
15551     //               and so can participate in a cycle.
15552     for (unsigned j = 1; j < N->getNumOperands(); ++j)
15553       Worklist.push_back(N->getOperand(j).getNode());
15554   }
15555   // Search through DAG. We can stop early if we find a store node.
15556   for (unsigned i = 0; i < NumStores; ++i)
15557     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
15558                                      Max)) {
15559       // If the searching bail out, record the StoreNode and RootNode in the
15560       // StoreRootCountMap. If we have seen the pair many times over a limit,
15561       // we won't add the StoreNode into StoreNodes set again.
15562       if (Visited.size() >= Max) {
15563         auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
15564         if (RootCount.first == RootNode)
15565           RootCount.second++;
15566         else
15567           RootCount = {RootNode, 1};
15568       }
15569       return false;
15570     }
15571   return true;
15572 }
15573
15574 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
15575   if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
15576     return false;
15577
15578   EVT MemVT = St->getMemoryVT();
15579   int64_t ElementSizeBytes = MemVT.getStoreSize();
15580   unsigned NumMemElts = MemVT.isVector() ? MemVT.getVectorNumElements() : 1;
15581
15582   if (MemVT.getSizeInBits() * 2 > MaximumLegalStoreInBits)
15583     return false;
15584
15585   bool NoVectors = DAG.getMachineFunction().getFunction().hasFnAttribute(
15586       Attribute::NoImplicitFloat);
15587
15588   // This function cannot currently deal with non-byte-sized memory sizes.
15589   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
15590     return false;
15591
15592   if (!MemVT.isSimple())
15593     return false;
15594
15595   // Perform an early exit check. Do not bother looking at stored values that
15596   // are not constants, loads, or extracted vector elements.
15597   SDValue StoredVal = peekThroughBitcasts(St->getValue());
15598   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
15599   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
15600                        isa<ConstantFPSDNode>(StoredVal);
15601   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
15602                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
15603   bool IsNonTemporalStore = St->isNonTemporal();
15604   bool IsNonTemporalLoad =
15605       IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
15606
15607   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
15608     return false;
15609
15610   SmallVector<MemOpLink, 8> StoreNodes;
15611   SDNode *RootNode;
15612   // Find potential store merge candidates by searching through chain sub-DAG
15613   getStoreMergeCandidates(St, StoreNodes, RootNode);
15614
15615   // Check if there is anything to merge.
15616   if (StoreNodes.size() < 2)
15617     return false;
15618
15619   // Sort the memory operands according to their distance from the
15620   // base pointer.
15621   llvm::sort(StoreNodes, [](MemOpLink LHS, MemOpLink RHS) {
15622     return LHS.OffsetFromBase < RHS.OffsetFromBase;
15623   });
15624
15625   // Store Merge attempts to merge the lowest stores. This generally
15626   // works out as if successful, as the remaining stores are checked
15627   // after the first collection of stores is merged. However, in the
15628   // case that a non-mergeable store is found first, e.g., {p[-2],
15629   // p[0], p[1], p[2], p[3]}, we would fail and miss the subsequent
15630   // mergeable cases. To prevent this, we prune such stores from the
15631   // front of StoreNodes here.
15632
15633   bool RV = false;
15634   while (StoreNodes.size() > 1) {
15635     size_t StartIdx = 0;
15636     while ((StartIdx + 1 < StoreNodes.size()) &&
15637            StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
15638                StoreNodes[StartIdx + 1].OffsetFromBase)
15639       ++StartIdx;
15640
15641     // Bail if we don't have enough candidates to merge.
15642     if (StartIdx + 1 >= StoreNodes.size())
15643       return RV;
15644
15645     if (StartIdx)
15646       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + StartIdx);
15647
15648     // Scan the memory operations on the chain and find the first
15649     // non-consecutive store memory address.
15650     unsigned NumConsecutiveStores = 1;
15651     int64_t StartAddress = StoreNodes[0].OffsetFromBase;
15652     // Check that the addresses are consecutive starting from the second
15653     // element in the list of stores.
15654     for (unsigned i = 1, e = StoreNodes.size(); i < e; ++i) {
15655       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
15656       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15657         break;
15658       NumConsecutiveStores = i + 1;
15659     }
15660
15661     if (NumConsecutiveStores < 2) {
15662       StoreNodes.erase(StoreNodes.begin(),
15663                        StoreNodes.begin() + NumConsecutiveStores);
15664       continue;
15665     }
15666
15667     // The node with the lowest store address.
15668     LLVMContext &Context = *DAG.getContext();
15669     const DataLayout &DL = DAG.getDataLayout();
15670
15671     // Store the constants into memory as one consecutive store.
15672     if (IsConstantSrc) {
15673       while (NumConsecutiveStores >= 2) {
15674         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15675         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15676         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15677         unsigned LastLegalType = 1;
15678         unsigned LastLegalVectorType = 1;
15679         bool LastIntegerTrunc = false;
15680         bool NonZero = false;
15681         unsigned FirstZeroAfterNonZero = NumConsecutiveStores;
15682         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15683           StoreSDNode *ST = cast<StoreSDNode>(StoreNodes[i].MemNode);
15684           SDValue StoredVal = ST->getValue();
15685           bool IsElementZero = false;
15686           if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal))
15687             IsElementZero = C->isNullValue();
15688           else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal))
15689             IsElementZero = C->getConstantFPValue()->isNullValue();
15690           if (IsElementZero) {
15691             if (NonZero && FirstZeroAfterNonZero == NumConsecutiveStores)
15692               FirstZeroAfterNonZero = i;
15693           }
15694           NonZero |= !IsElementZero;
15695
15696           // Find a legal type for the constant store.
15697           unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15698           EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15699           bool IsFast = false;
15700
15701           // Break early when size is too large to be legal.
15702           if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15703             break;
15704
15705           if (TLI.isTypeLegal(StoreTy) &&
15706               TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15707               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15708                                      *FirstInChain->getMemOperand(), &IsFast) &&
15709               IsFast) {
15710             LastIntegerTrunc = false;
15711             LastLegalType = i + 1;
15712             // Or check whether a truncstore is legal.
15713           } else if (TLI.getTypeAction(Context, StoreTy) ==
15714                      TargetLowering::TypePromoteInteger) {
15715             EVT LegalizedStoredValTy =
15716                 TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
15717             if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15718                 TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15719                 TLI.allowsMemoryAccess(Context, DL, StoreTy,
15720                                        *FirstInChain->getMemOperand(),
15721                                        &IsFast) &&
15722                 IsFast) {
15723               LastIntegerTrunc = true;
15724               LastLegalType = i + 1;
15725             }
15726           }
15727
15728           // We only use vectors if the constant is known to be zero or the
15729           // target allows it and the function is not marked with the
15730           // noimplicitfloat attribute.
15731           if ((!NonZero ||
15732                TLI.storeOfVectorConstantIsCheap(MemVT, i + 1, FirstStoreAS)) &&
15733               !NoVectors) {
15734             // Find a legal type for the vector store.
15735             unsigned Elts = (i + 1) * NumMemElts;
15736             EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15737             if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
15738                 TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15739                 TLI.allowsMemoryAccess(
15740                     Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
15741                 IsFast)
15742               LastLegalVectorType = i + 1;
15743           }
15744         }
15745
15746         bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
15747         unsigned NumElem = (UseVector) ? LastLegalVectorType : LastLegalType;
15748
15749         // Check if we found a legal integer type that creates a meaningful
15750         // merge.
15751         if (NumElem < 2) {
15752           // We know that candidate stores are in order and of correct
15753           // shape. While there is no mergeable sequence from the
15754           // beginning one may start later in the sequence. The only
15755           // reason a merge of size N could have failed where another of
15756           // the same size would not have, is if the alignment has
15757           // improved or we've dropped a non-zero value. Drop as many
15758           // candidates as we can here.
15759           unsigned NumSkip = 1;
15760           while (
15761               (NumSkip < NumConsecutiveStores) &&
15762               (NumSkip < FirstZeroAfterNonZero) &&
15763               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15764             NumSkip++;
15765
15766           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15767           NumConsecutiveStores -= NumSkip;
15768           continue;
15769         }
15770
15771         // Check that we can merge these candidates without causing a cycle.
15772         if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
15773                                                       RootNode)) {
15774           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15775           NumConsecutiveStores -= NumElem;
15776           continue;
15777         }
15778
15779         RV |= MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem, true,
15780                                               UseVector, LastIntegerTrunc);
15781
15782         // Remove merged stores for next iteration.
15783         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
15784         NumConsecutiveStores -= NumElem;
15785       }
15786       continue;
15787     }
15788
15789     // When extracting multiple vector elements, try to store them
15790     // in one vector store rather than a sequence of scalar stores.
15791     if (IsExtractVecSrc) {
15792       // Loop on Consecutive Stores on success.
15793       while (NumConsecutiveStores >= 2) {
15794         LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15795         unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15796         unsigned FirstStoreAlign = FirstInChain->getAlignment();
15797         unsigned NumStoresToMerge = 1;
15798         for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15799           // Find a legal type for the vector store.
15800           unsigned Elts = (i + 1) * NumMemElts;
15801           EVT Ty =
15802               EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
15803           bool IsFast;
15804
15805           // Break early when size is too large to be legal.
15806           if (Ty.getSizeInBits() > MaximumLegalStoreInBits)
15807             break;
15808
15809           if (TLI.isTypeLegal(Ty) &&
15810               TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
15811               TLI.allowsMemoryAccess(Context, DL, Ty,
15812                                      *FirstInChain->getMemOperand(), &IsFast) &&
15813               IsFast)
15814             NumStoresToMerge = i + 1;
15815         }
15816
15817         // Check if we found a legal integer type creating a meaningful
15818         // merge.
15819         if (NumStoresToMerge < 2) {
15820           // We know that candidate stores are in order and of correct
15821           // shape. While there is no mergeable sequence from the
15822           // beginning one may start later in the sequence. The only
15823           // reason a merge of size N could have failed where another of
15824           // the same size would not have, is if the alignment has
15825           // improved. Drop as many candidates as we can here.
15826           unsigned NumSkip = 1;
15827           while (
15828               (NumSkip < NumConsecutiveStores) &&
15829               (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
15830             NumSkip++;
15831
15832           StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
15833           NumConsecutiveStores -= NumSkip;
15834           continue;
15835         }
15836
15837         // Check that we can merge these candidates without causing a cycle.
15838         if (!checkMergeStoreCandidatesForDependencies(
15839                 StoreNodes, NumStoresToMerge, RootNode)) {
15840           StoreNodes.erase(StoreNodes.begin(),
15841                            StoreNodes.begin() + NumStoresToMerge);
15842           NumConsecutiveStores -= NumStoresToMerge;
15843           continue;
15844         }
15845
15846         RV |= MergeStoresOfConstantsOrVecElts(
15847             StoreNodes, MemVT, NumStoresToMerge, false, true, false);
15848
15849         StoreNodes.erase(StoreNodes.begin(),
15850                          StoreNodes.begin() + NumStoresToMerge);
15851         NumConsecutiveStores -= NumStoresToMerge;
15852       }
15853       continue;
15854     }
15855
15856     // Below we handle the case of multiple consecutive stores that
15857     // come from multiple consecutive loads. We merge them into a single
15858     // wide load and a single wide store.
15859
15860     // Look for load nodes which are used by the stored values.
15861     SmallVector<MemOpLink, 8> LoadNodes;
15862
15863     // Find acceptable loads. Loads need to have the same chain (token factor),
15864     // must not be zext, volatile, indexed, and they must be consecutive.
15865     BaseIndexOffset LdBasePtr;
15866
15867     for (unsigned i = 0; i < NumConsecutiveStores; ++i) {
15868       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
15869       SDValue Val = peekThroughBitcasts(St->getValue());
15870       LoadSDNode *Ld = cast<LoadSDNode>(Val);
15871
15872       BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld, DAG);
15873       // If this is not the first ptr that we check.
15874       int64_t LdOffset = 0;
15875       if (LdBasePtr.getBase().getNode()) {
15876         // The base ptr must be the same.
15877         if (!LdBasePtr.equalBaseIndex(LdPtr, DAG, LdOffset))
15878           break;
15879       } else {
15880         // Check that all other base pointers are the same as this one.
15881         LdBasePtr = LdPtr;
15882       }
15883
15884       // We found a potential memory operand to merge.
15885       LoadNodes.push_back(MemOpLink(Ld, LdOffset));
15886     }
15887
15888     while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
15889       // If we have load/store pair instructions and we only have two values,
15890       // don't bother merging.
15891       unsigned RequiredAlignment;
15892       if (LoadNodes.size() == 2 &&
15893           TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
15894           StoreNodes[0].MemNode->getAlignment() >= RequiredAlignment) {
15895         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
15896         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
15897         break;
15898       }
15899       LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
15900       unsigned FirstStoreAS = FirstInChain->getAddressSpace();
15901       unsigned FirstStoreAlign = FirstInChain->getAlignment();
15902       LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
15903       unsigned FirstLoadAlign = FirstLoad->getAlignment();
15904
15905       // Scan the memory operations on the chain and find the first
15906       // non-consecutive load memory address. These variables hold the index in
15907       // the store node array.
15908
15909       unsigned LastConsecutiveLoad = 1;
15910
15911       // This variable refers to the size and not index in the array.
15912       unsigned LastLegalVectorType = 1;
15913       unsigned LastLegalIntegerType = 1;
15914       bool isDereferenceable = true;
15915       bool DoIntegerTruncate = false;
15916       StartAddress = LoadNodes[0].OffsetFromBase;
15917       SDValue FirstChain = FirstLoad->getChain();
15918       for (unsigned i = 1; i < LoadNodes.size(); ++i) {
15919         // All loads must share the same chain.
15920         if (LoadNodes[i].MemNode->getChain() != FirstChain)
15921           break;
15922
15923         int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
15924         if (CurrAddress - StartAddress != (ElementSizeBytes * i))
15925           break;
15926         LastConsecutiveLoad = i;
15927
15928         if (isDereferenceable && !LoadNodes[i].MemNode->isDereferenceable())
15929           isDereferenceable = false;
15930
15931         // Find a legal type for the vector store.
15932         unsigned Elts = (i + 1) * NumMemElts;
15933         EVT StoreTy = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
15934
15935         // Break early when size is too large to be legal.
15936         if (StoreTy.getSizeInBits() > MaximumLegalStoreInBits)
15937           break;
15938
15939         bool IsFastSt, IsFastLd;
15940         if (TLI.isTypeLegal(StoreTy) &&
15941             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15942             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15943                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
15944             IsFastSt &&
15945             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15946                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
15947             IsFastLd) {
15948           LastLegalVectorType = i + 1;
15949         }
15950
15951         // Find a legal type for the integer store.
15952         unsigned SizeInBits = (i + 1) * ElementSizeBytes * 8;
15953         StoreTy = EVT::getIntegerVT(Context, SizeInBits);
15954         if (TLI.isTypeLegal(StoreTy) &&
15955             TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
15956             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15957                                    *FirstInChain->getMemOperand(), &IsFastSt) &&
15958             IsFastSt &&
15959             TLI.allowsMemoryAccess(Context, DL, StoreTy,
15960                                    *FirstLoad->getMemOperand(), &IsFastLd) &&
15961             IsFastLd) {
15962           LastLegalIntegerType = i + 1;
15963           DoIntegerTruncate = false;
15964           // Or check whether a truncstore and extload is legal.
15965         } else if (TLI.getTypeAction(Context, StoreTy) ==
15966                    TargetLowering::TypePromoteInteger) {
15967           EVT LegalizedStoredValTy = TLI.getTypeToTransformTo(Context, StoreTy);
15968           if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
15969               TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
15970               TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValTy,
15971                                  StoreTy) &&
15972               TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
15973                                  StoreTy) &&
15974               TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
15975               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15976                                      *FirstInChain->getMemOperand(),
15977                                      &IsFastSt) &&
15978               IsFastSt &&
15979               TLI.allowsMemoryAccess(Context, DL, StoreTy,
15980                                      *FirstLoad->getMemOperand(), &IsFastLd) &&
15981               IsFastLd) {
15982             LastLegalIntegerType = i + 1;
15983             DoIntegerTruncate = true;
15984           }
15985         }
15986       }
15987
15988       // Only use vector types if the vector type is larger than the integer
15989       // type. If they are the same, use integers.
15990       bool UseVectorTy =
15991           LastLegalVectorType > LastLegalIntegerType && !NoVectors;
15992       unsigned LastLegalType =
15993           std::max(LastLegalVectorType, LastLegalIntegerType);
15994
15995       // We add +1 here because the LastXXX variables refer to location while
15996       // the NumElem refers to array/index size.
15997       unsigned NumElem =
15998           std::min(NumConsecutiveStores, LastConsecutiveLoad + 1);
15999       NumElem = std::min(LastLegalType, NumElem);
16000
16001       if (NumElem < 2) {
16002         // We know that candidate stores are in order and of correct
16003         // shape. While there is no mergeable sequence from the
16004         // beginning one may start later in the sequence. The only
16005         // reason a merge of size N could have failed where another of
16006         // the same size would not have is if the alignment or either
16007         // the load or store has improved. Drop as many candidates as we
16008         // can here.
16009         unsigned NumSkip = 1;
16010         while ((NumSkip < LoadNodes.size()) &&
16011                (LoadNodes[NumSkip].MemNode->getAlignment() <= FirstLoadAlign) &&
16012                (StoreNodes[NumSkip].MemNode->getAlignment() <= FirstStoreAlign))
16013           NumSkip++;
16014         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumSkip);
16015         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumSkip);
16016         NumConsecutiveStores -= NumSkip;
16017         continue;
16018       }
16019
16020       // Check that we can merge these candidates without causing a cycle.
16021       if (!checkMergeStoreCandidatesForDependencies(StoreNodes, NumElem,
16022                                                     RootNode)) {
16023         StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16024         LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16025         NumConsecutiveStores -= NumElem;
16026         continue;
16027       }
16028
16029       // Find if it is better to use vectors or integers to load and store
16030       // to memory.
16031       EVT JointMemOpVT;
16032       if (UseVectorTy) {
16033         // Find a legal type for the vector store.
16034         unsigned Elts = NumElem * NumMemElts;
16035         JointMemOpVT = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
16036       } else {
16037         unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
16038         JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
16039       }
16040
16041       SDLoc LoadDL(LoadNodes[0].MemNode);
16042       SDLoc StoreDL(StoreNodes[0].MemNode);
16043
16044       // The merged loads are required to have the same incoming chain, so
16045       // using the first's chain is acceptable.
16046
16047       SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
16048       AddToWorklist(NewStoreChain.getNode());
16049
16050       MachineMemOperand::Flags LdMMOFlags =
16051           isDereferenceable ? MachineMemOperand::MODereferenceable
16052                             : MachineMemOperand::MONone;
16053       if (IsNonTemporalLoad)
16054         LdMMOFlags |= MachineMemOperand::MONonTemporal;
16055
16056       MachineMemOperand::Flags StMMOFlags =
16057           IsNonTemporalStore ? MachineMemOperand::MONonTemporal
16058                              : MachineMemOperand::MONone;
16059
16060       SDValue NewLoad, NewStore;
16061       if (UseVectorTy || !DoIntegerTruncate) {
16062         NewLoad =
16063             DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
16064                         FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
16065                         FirstLoadAlign, LdMMOFlags);
16066         NewStore = DAG.getStore(
16067             NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
16068             FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
16069       } else { // This must be the truncstore/extload case
16070         EVT ExtendedTy =
16071             TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
16072         NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
16073                                  FirstLoad->getChain(), FirstLoad->getBasePtr(),
16074                                  FirstLoad->getPointerInfo(), JointMemOpVT,
16075                                  FirstLoadAlign, LdMMOFlags);
16076         NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
16077                                      FirstInChain->getBasePtr(),
16078                                      FirstInChain->getPointerInfo(),
16079                                      JointMemOpVT, FirstInChain->getAlignment(),
16080                                      FirstInChain->getMemOperand()->getFlags());
16081       }
16082
16083       // Transfer chain users from old loads to the new load.
16084       for (unsigned i = 0; i < NumElem; ++i) {
16085         LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
16086         DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
16087                                       SDValue(NewLoad.getNode(), 1));
16088       }
16089
16090       // Replace the all stores with the new store. Recursively remove
16091       // corresponding value if its no longer used.
16092       for (unsigned i = 0; i < NumElem; ++i) {
16093         SDValue Val = StoreNodes[i].MemNode->getOperand(1);
16094         CombineTo(StoreNodes[i].MemNode, NewStore);
16095         if (Val.getNode()->use_empty())
16096           recursivelyDeleteUnusedNodes(Val.getNode());
16097       }
16098
16099       RV = true;
16100       StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + NumElem);
16101       LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + NumElem);
16102       NumConsecutiveStores -= NumElem;
16103     }
16104   }
16105   return RV;
16106 }
16107
16108 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
16109   SDLoc SL(ST);
16110   SDValue ReplStore;
16111
16112   // Replace the chain to avoid dependency.
16113   if (ST->isTruncatingStore()) {
16114     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
16115                                   ST->getBasePtr(), ST->getMemoryVT(),
16116                                   ST->getMemOperand());
16117   } else {
16118     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
16119                              ST->getMemOperand());
16120   }
16121
16122   // Create token to keep both nodes around.
16123   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
16124                               MVT::Other, ST->getChain(), ReplStore);
16125
16126   // Make sure the new and old chains are cleaned up.
16127   AddToWorklist(Token.getNode());
16128
16129   // Don't add users to work list.
16130   return CombineTo(ST, Token, false);
16131 }
16132
16133 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
16134   SDValue Value = ST->getValue();
16135   if (Value.getOpcode() == ISD::TargetConstantFP)
16136     return SDValue();
16137
16138   SDLoc DL(ST);
16139
16140   SDValue Chain = ST->getChain();
16141   SDValue Ptr = ST->getBasePtr();
16142
16143   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
16144
16145   // NOTE: If the original store is volatile, this transform must not increase
16146   // the number of stores.  For example, on x86-32 an f64 can be stored in one
16147   // processor operation but an i64 (which is not legal) requires two.  So the
16148   // transform should not be done in this case.
16149
16150   SDValue Tmp;
16151   switch (CFP->getSimpleValueType(0).SimpleTy) {
16152   default:
16153     llvm_unreachable("Unknown FP type");
16154   case MVT::f16:    // We don't do this for these yet.
16155   case MVT::f80:
16156   case MVT::f128:
16157   case MVT::ppcf128:
16158     return SDValue();
16159   case MVT::f32:
16160     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
16161         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16162       ;
16163       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
16164                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
16165                             MVT::i32);
16166       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
16167     }
16168
16169     return SDValue();
16170   case MVT::f64:
16171     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
16172          !ST->isVolatile()) ||
16173         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
16174       ;
16175       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
16176                             getZExtValue(), SDLoc(CFP), MVT::i64);
16177       return DAG.getStore(Chain, DL, Tmp,
16178                           Ptr, ST->getMemOperand());
16179     }
16180
16181     if (!ST->isVolatile() &&
16182         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
16183       // Many FP stores are not made apparent until after legalize, e.g. for
16184       // argument passing.  Since this is so common, custom legalize the
16185       // 64-bit integer store into two 32-bit stores.
16186       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
16187       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
16188       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
16189       if (DAG.getDataLayout().isBigEndian())
16190         std::swap(Lo, Hi);
16191
16192       unsigned Alignment = ST->getAlignment();
16193       MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16194       AAMDNodes AAInfo = ST->getAAInfo();
16195
16196       SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16197                                  ST->getAlignment(), MMOFlags, AAInfo);
16198       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16199                         DAG.getConstant(4, DL, Ptr.getValueType()));
16200       Alignment = MinAlign(Alignment, 4U);
16201       SDValue St1 = DAG.getStore(Chain, DL, Hi, Ptr,
16202                                  ST->getPointerInfo().getWithOffset(4),
16203                                  Alignment, MMOFlags, AAInfo);
16204       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
16205                          St0, St1);
16206     }
16207
16208     return SDValue();
16209   }
16210 }
16211
16212 SDValue DAGCombiner::visitSTORE(SDNode *N) {
16213   StoreSDNode *ST  = cast<StoreSDNode>(N);
16214   SDValue Chain = ST->getChain();
16215   SDValue Value = ST->getValue();
16216   SDValue Ptr   = ST->getBasePtr();
16217
16218   // If this is a store of a bit convert, store the input value if the
16219   // resultant store does not need a higher alignment than the original.
16220   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
16221       ST->isUnindexed()) {
16222     EVT SVT = Value.getOperand(0).getValueType();
16223     // If the store is volatile, we only want to change the store type if the
16224     // resulting store is legal. Otherwise we might increase the number of
16225     // memory accesses. We don't care if the original type was legal or not
16226     // as we assume software couldn't rely on the number of accesses of an
16227     // illegal type.
16228     if (((!LegalOperations && !ST->isVolatile()) ||
16229          TLI.isOperationLegal(ISD::STORE, SVT)) &&
16230         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
16231                                      DAG, *ST->getMemOperand())) {
16232       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
16233                           ST->getPointerInfo(), ST->getAlignment(),
16234                           ST->getMemOperand()->getFlags(), ST->getAAInfo());
16235     }
16236   }
16237
16238   // Turn 'store undef, Ptr' -> nothing.
16239   if (Value.isUndef() && ST->isUnindexed())
16240     return Chain;
16241
16242   // Try to infer better alignment information than the store already has.
16243   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
16244     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
16245       if (Align > ST->getAlignment() && ST->getSrcValueOffset() % Align == 0) {
16246         SDValue NewStore =
16247             DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(),
16248                               ST->getMemoryVT(), Align,
16249                               ST->getMemOperand()->getFlags(), ST->getAAInfo());
16250         // NewStore will always be N as we are only refining the alignment
16251         assert(NewStore.getNode() == N);
16252         (void)NewStore;
16253       }
16254     }
16255   }
16256
16257   // Try transforming a pair floating point load / store ops to integer
16258   // load / store ops.
16259   if (SDValue NewST = TransformFPLoadStorePair(N))
16260     return NewST;
16261
16262   // Try transforming several stores into STORE (BSWAP).
16263   if (SDValue Store = MatchStoreCombine(ST))
16264     return Store;
16265
16266   if (ST->isUnindexed()) {
16267     // Walk up chain skipping non-aliasing memory nodes, on this store and any
16268     // adjacent stores.
16269     if (findBetterNeighborChains(ST)) {
16270       // replaceStoreChain uses CombineTo, which handled all of the worklist
16271       // manipulation. Return the original node to not do anything else.
16272       return SDValue(ST, 0);
16273     }
16274     Chain = ST->getChain();
16275   }
16276
16277   // FIXME: is there such a thing as a truncating indexed store?
16278   if (ST->isTruncatingStore() && ST->isUnindexed() &&
16279       Value.getValueType().isInteger() &&
16280       (!isa<ConstantSDNode>(Value) ||
16281        !cast<ConstantSDNode>(Value)->isOpaque())) {
16282     APInt TruncDemandedBits =
16283         APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
16284                              ST->getMemoryVT().getScalarSizeInBits());
16285
16286     // See if we can simplify the input to this truncstore with knowledge that
16287     // only the low bits are being used.  For example:
16288     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
16289     AddToWorklist(Value.getNode());
16290     if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
16291       return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
16292                                ST->getMemOperand());
16293
16294     // Otherwise, see if we can simplify the operation with
16295     // SimplifyDemandedBits, which only works if the value has a single use.
16296     if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
16297       // Re-visit the store if anything changed and the store hasn't been merged
16298       // with another node (N is deleted) SimplifyDemandedBits will add Value's
16299       // node back to the worklist if necessary, but we also need to re-visit
16300       // the Store node itself.
16301       if (N->getOpcode() != ISD::DELETED_NODE)
16302         AddToWorklist(N);
16303       return SDValue(N, 0);
16304     }
16305   }
16306
16307   // If this is a load followed by a store to the same location, then the store
16308   // is dead/noop.
16309   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
16310     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
16311         ST->isUnindexed() && !ST->isVolatile() &&
16312         // There can't be any side effects between the load and store, such as
16313         // a call or store.
16314         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
16315       // The store is dead, remove it.
16316       return Chain;
16317     }
16318   }
16319
16320   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
16321     if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
16322         !ST1->isVolatile()) {
16323       if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
16324           ST->getMemoryVT() == ST1->getMemoryVT()) {
16325         // If this is a store followed by a store with the same value to the
16326         // same location, then the store is dead/noop.
16327         return Chain;
16328       }
16329
16330       if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
16331           !ST1->getBasePtr().isUndef()) {
16332         const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
16333         const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
16334         unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
16335         unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
16336         // If this is a store who's preceding store to a subset of the current
16337         // location and no one other node is chained to that store we can
16338         // effectively drop the store. Do not remove stores to undef as they may
16339         // be used as data sinks.
16340         if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
16341           CombineTo(ST1, ST1->getChain());
16342           return SDValue();
16343         }
16344
16345         // If ST stores to a subset of preceding store's write set, we may be
16346         // able to fold ST's value into the preceding stored value. As we know
16347         // the other uses of ST1's chain are unconcerned with ST, this folding
16348         // will not affect those nodes.
16349         int64_t BitOffset;
16350         if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
16351                                BitOffset)) {
16352           SDValue ChainValue = ST1->getValue();
16353           if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
16354             if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
16355               APInt Val = C1->getAPIntValue();
16356               APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
16357               // FIXME: Handle Big-endian mode.
16358               if (!DAG.getDataLayout().isBigEndian()) {
16359                 Val.insertBits(InsertVal, BitOffset);
16360                 SDValue NewSDVal =
16361                     DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
16362                                     C1->isTargetOpcode(), C1->isOpaque());
16363                 SDNode *NewST1 = DAG.UpdateNodeOperands(
16364                     ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
16365                     ST1->getOperand(3));
16366                 return CombineTo(ST, SDValue(NewST1, 0));
16367               }
16368             }
16369           }
16370         } // End ST subset of ST1 case.
16371       }
16372     }
16373   }
16374
16375   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
16376   // truncating store.  We can do this even if this is already a truncstore.
16377   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
16378       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
16379       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
16380                             ST->getMemoryVT())) {
16381     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
16382                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
16383   }
16384
16385   // Always perform this optimization before types are legal. If the target
16386   // prefers, also try this after legalization to catch stores that were created
16387   // by intrinsics or other nodes.
16388   if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
16389     while (true) {
16390       // There can be multiple store sequences on the same chain.
16391       // Keep trying to merge store sequences until we are unable to do so
16392       // or until we merge the last store on the chain.
16393       bool Changed = MergeConsecutiveStores(ST);
16394       if (!Changed) break;
16395       // Return N as merge only uses CombineTo and no worklist clean
16396       // up is necessary.
16397       if (N->getOpcode() == ISD::DELETED_NODE || !isa<StoreSDNode>(N))
16398         return SDValue(N, 0);
16399     }
16400   }
16401
16402   // Try transforming N to an indexed store.
16403   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
16404     return SDValue(N, 0);
16405
16406   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
16407   //
16408   // Make sure to do this only after attempting to merge stores in order to
16409   //  avoid changing the types of some subset of stores due to visit order,
16410   //  preventing their merging.
16411   if (isa<ConstantFPSDNode>(ST->getValue())) {
16412     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
16413       return NewSt;
16414   }
16415
16416   if (SDValue NewSt = splitMergedValStore(ST))
16417     return NewSt;
16418
16419   return ReduceLoadOpStoreWidth(N);
16420 }
16421
16422 SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
16423   const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
16424   if (!LifetimeEnd->hasOffset())
16425     return SDValue();
16426
16427   const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
16428                                         LifetimeEnd->getOffset(), false);
16429
16430   // We walk up the chains to find stores.
16431   SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
16432   while (!Chains.empty()) {
16433     SDValue Chain = Chains.back();
16434     Chains.pop_back();
16435     if (!Chain.hasOneUse())
16436       continue;
16437     switch (Chain.getOpcode()) {
16438     case ISD::TokenFactor:
16439       for (unsigned Nops = Chain.getNumOperands(); Nops;)
16440         Chains.push_back(Chain.getOperand(--Nops));
16441       break;
16442     case ISD::LIFETIME_START:
16443     case ISD::LIFETIME_END:
16444       // We can forward past any lifetime start/end that can be proven not to
16445       // alias the node.
16446       if (!isAlias(Chain.getNode(), N))
16447         Chains.push_back(Chain.getOperand(0));
16448       break;
16449     case ISD::STORE: {
16450       StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
16451       if (ST->isVolatile() || ST->isIndexed())
16452         continue;
16453       const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
16454       // If we store purely within object bounds just before its lifetime ends,
16455       // we can remove the store.
16456       if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
16457                                    ST->getMemoryVT().getStoreSizeInBits())) {
16458         LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
16459                    dbgs() << "\nwithin LIFETIME_END of : ";
16460                    LifetimeEndBase.dump(); dbgs() << "\n");
16461         CombineTo(ST, ST->getChain());
16462         return SDValue(N, 0);
16463       }
16464     }
16465     }
16466   }
16467   return SDValue();
16468 }
16469
16470 /// For the instruction sequence of store below, F and I values
16471 /// are bundled together as an i64 value before being stored into memory.
16472 /// Sometimes it is more efficent to generate separate stores for F and I,
16473 /// which can remove the bitwise instructions or sink them to colder places.
16474 ///
16475 ///   (store (or (zext (bitcast F to i32) to i64),
16476 ///              (shl (zext I to i64), 32)), addr)  -->
16477 ///   (store F, addr) and (store I, addr+4)
16478 ///
16479 /// Similarly, splitting for other merged store can also be beneficial, like:
16480 /// For pair of {i32, i32}, i64 store --> two i32 stores.
16481 /// For pair of {i32, i16}, i64 store --> two i32 stores.
16482 /// For pair of {i16, i16}, i32 store --> two i16 stores.
16483 /// For pair of {i16, i8},  i32 store --> two i16 stores.
16484 /// For pair of {i8, i8},   i16 store --> two i8 stores.
16485 ///
16486 /// We allow each target to determine specifically which kind of splitting is
16487 /// supported.
16488 ///
16489 /// The store patterns are commonly seen from the simple code snippet below
16490 /// if only std::make_pair(...) is sroa transformed before inlined into hoo.
16491 ///   void goo(const std::pair<int, float> &);
16492 ///   hoo() {
16493 ///     ...
16494 ///     goo(std::make_pair(tmp, ftmp));
16495 ///     ...
16496 ///   }
16497 ///
16498 SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
16499   if (OptLevel == CodeGenOpt::None)
16500     return SDValue();
16501
16502   SDValue Val = ST->getValue();
16503   SDLoc DL(ST);
16504
16505   // Match OR operand.
16506   if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
16507     return SDValue();
16508
16509   // Match SHL operand and get Lower and Higher parts of Val.
16510   SDValue Op1 = Val.getOperand(0);
16511   SDValue Op2 = Val.getOperand(1);
16512   SDValue Lo, Hi;
16513   if (Op1.getOpcode() != ISD::SHL) {
16514     std::swap(Op1, Op2);
16515     if (Op1.getOpcode() != ISD::SHL)
16516       return SDValue();
16517   }
16518   Lo = Op2;
16519   Hi = Op1.getOperand(0);
16520   if (!Op1.hasOneUse())
16521     return SDValue();
16522
16523   // Match shift amount to HalfValBitSize.
16524   unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
16525   ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
16526   if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
16527     return SDValue();
16528
16529   // Lo and Hi are zero-extended from int with size less equal than 32
16530   // to i64.
16531   if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
16532       !Lo.getOperand(0).getValueType().isScalarInteger() ||
16533       Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
16534       Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
16535       !Hi.getOperand(0).getValueType().isScalarInteger() ||
16536       Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
16537     return SDValue();
16538
16539   // Use the EVT of low and high parts before bitcast as the input
16540   // of target query.
16541   EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
16542                   ? Lo.getOperand(0).getValueType()
16543                   : Lo.getValueType();
16544   EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
16545                    ? Hi.getOperand(0).getValueType()
16546                    : Hi.getValueType();
16547   if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
16548     return SDValue();
16549
16550   // Start to split store.
16551   unsigned Alignment = ST->getAlignment();
16552   MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
16553   AAMDNodes AAInfo = ST->getAAInfo();
16554
16555   // Change the sizes of Lo and Hi's value types to HalfValBitSize.
16556   EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
16557   Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
16558   Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
16559
16560   SDValue Chain = ST->getChain();
16561   SDValue Ptr = ST->getBasePtr();
16562   // Lower value store.
16563   SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
16564                              ST->getAlignment(), MMOFlags, AAInfo);
16565   Ptr =
16566       DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
16567                   DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
16568   // Higher value store.
16569   SDValue St1 =
16570       DAG.getStore(St0, DL, Hi, Ptr,
16571                    ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
16572                    Alignment / 2, MMOFlags, AAInfo);
16573   return St1;
16574 }
16575
16576 /// Convert a disguised subvector insertion into a shuffle:
16577 SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
16578   SDValue InsertVal = N->getOperand(1);
16579   SDValue Vec = N->getOperand(0);
16580
16581   // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
16582   //   --> (vector_shuffle X, Y)
16583   if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
16584       InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16585       isa<ConstantSDNode>(InsertVal.getOperand(1))) {
16586     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
16587     ArrayRef<int> Mask = SVN->getMask();
16588
16589     SDValue X = Vec.getOperand(0);
16590     SDValue Y = Vec.getOperand(1);
16591
16592     // Vec's operand 0 is using indices from 0 to N-1 and
16593     // operand 1 from N to 2N - 1, where N is the number of
16594     // elements in the vectors.
16595     int XOffset = -1;
16596     if (InsertVal.getOperand(0) == X) {
16597       XOffset = 0;
16598     } else if (InsertVal.getOperand(0) == Y) {
16599       XOffset = X.getValueType().getVectorNumElements();
16600     }
16601
16602     if (XOffset != -1) {
16603       SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
16604
16605       auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
16606       NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
16607       assert(NewMask[InsIndex] <
16608                  (int)(2 * Vec.getValueType().getVectorNumElements()) &&
16609              NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
16610
16611       SDValue LegalShuffle =
16612               TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
16613                                           Y, NewMask, DAG);
16614       if (LegalShuffle)
16615         return LegalShuffle;
16616     }
16617   }
16618
16619   // insert_vector_elt V, (bitcast X from vector type), IdxC -->
16620   // bitcast(shuffle (bitcast V), (extended X), Mask)
16621   // Note: We do not use an insert_subvector node because that requires a
16622   // legal subvector type.
16623   if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
16624       !InsertVal.getOperand(0).getValueType().isVector())
16625     return SDValue();
16626
16627   SDValue SubVec = InsertVal.getOperand(0);
16628   SDValue DestVec = N->getOperand(0);
16629   EVT SubVecVT = SubVec.getValueType();
16630   EVT VT = DestVec.getValueType();
16631   unsigned NumSrcElts = SubVecVT.getVectorNumElements();
16632   unsigned ExtendRatio = VT.getSizeInBits() / SubVecVT.getSizeInBits();
16633   unsigned NumMaskVals = ExtendRatio * NumSrcElts;
16634
16635   // Step 1: Create a shuffle mask that implements this insert operation. The
16636   // vector that we are inserting into will be operand 0 of the shuffle, so
16637   // those elements are just 'i'. The inserted subvector is in the first
16638   // positions of operand 1 of the shuffle. Example:
16639   // insert v4i32 V, (v2i16 X), 2 --> shuffle v8i16 V', X', {0,1,2,3,8,9,6,7}
16640   SmallVector<int, 16> Mask(NumMaskVals);
16641   for (unsigned i = 0; i != NumMaskVals; ++i) {
16642     if (i / NumSrcElts == InsIndex)
16643       Mask[i] = (i % NumSrcElts) + NumMaskVals;
16644     else
16645       Mask[i] = i;
16646   }
16647
16648   // Bail out if the target can not handle the shuffle we want to create.
16649   EVT SubVecEltVT = SubVecVT.getVectorElementType();
16650   EVT ShufVT = EVT::getVectorVT(*DAG.getContext(), SubVecEltVT, NumMaskVals);
16651   if (!TLI.isShuffleMaskLegal(Mask, ShufVT))
16652     return SDValue();
16653
16654   // Step 2: Create a wide vector from the inserted source vector by appending
16655   // undefined elements. This is the same size as our destination vector.
16656   SDLoc DL(N);
16657   SmallVector<SDValue, 8> ConcatOps(ExtendRatio, DAG.getUNDEF(SubVecVT));
16658   ConcatOps[0] = SubVec;
16659   SDValue PaddedSubV = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShufVT, ConcatOps);
16660
16661   // Step 3: Shuffle in the padded subvector.
16662   SDValue DestVecBC = DAG.getBitcast(ShufVT, DestVec);
16663   SDValue Shuf = DAG.getVectorShuffle(ShufVT, DL, DestVecBC, PaddedSubV, Mask);
16664   AddToWorklist(PaddedSubV.getNode());
16665   AddToWorklist(DestVecBC.getNode());
16666   AddToWorklist(Shuf.getNode());
16667   return DAG.getBitcast(VT, Shuf);
16668 }
16669
16670 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
16671   SDValue InVec = N->getOperand(0);
16672   SDValue InVal = N->getOperand(1);
16673   SDValue EltNo = N->getOperand(2);
16674   SDLoc DL(N);
16675
16676   // If the inserted element is an UNDEF, just use the input vector.
16677   if (InVal.isUndef())
16678     return InVec;
16679
16680   EVT VT = InVec.getValueType();
16681   unsigned NumElts = VT.getVectorNumElements();
16682
16683   // Remove redundant insertions:
16684   // (insert_vector_elt x (extract_vector_elt x idx) idx) -> x
16685   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
16686       InVec == InVal.getOperand(0) && EltNo == InVal.getOperand(1))
16687     return InVec;
16688
16689   auto *IndexC = dyn_cast<ConstantSDNode>(EltNo);
16690   if (!IndexC) {
16691     // If this is variable insert to undef vector, it might be better to splat:
16692     // inselt undef, InVal, EltNo --> build_vector < InVal, InVal, ... >
16693     if (InVec.isUndef() && TLI.shouldSplatInsEltVarIndex(VT)) {
16694       SmallVector<SDValue, 8> Ops(NumElts, InVal);
16695       return DAG.getBuildVector(VT, DL, Ops);
16696     }
16697     return SDValue();
16698   }
16699
16700   // We must know which element is being inserted for folds below here.
16701   unsigned Elt = IndexC->getZExtValue();
16702   if (SDValue Shuf = combineInsertEltToShuffle(N, Elt))
16703     return Shuf;
16704
16705   // Canonicalize insert_vector_elt dag nodes.
16706   // Example:
16707   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
16708   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
16709   //
16710   // Do this only if the child insert_vector node has one use; also
16711   // do this only if indices are both constants and Idx1 < Idx0.
16712   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
16713       && isa<ConstantSDNode>(InVec.getOperand(2))) {
16714     unsigned OtherElt = InVec.getConstantOperandVal(2);
16715     if (Elt < OtherElt) {
16716       // Swap nodes.
16717       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
16718                                   InVec.getOperand(0), InVal, EltNo);
16719       AddToWorklist(NewOp.getNode());
16720       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
16721                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
16722     }
16723   }
16724
16725   // If we can't generate a legal BUILD_VECTOR, exit
16726   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
16727     return SDValue();
16728
16729   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
16730   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
16731   // vector elements.
16732   SmallVector<SDValue, 8> Ops;
16733   // Do not combine these two vectors if the output vector will not replace
16734   // the input vector.
16735   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
16736     Ops.append(InVec.getNode()->op_begin(),
16737                InVec.getNode()->op_end());
16738   } else if (InVec.isUndef()) {
16739     Ops.append(NumElts, DAG.getUNDEF(InVal.getValueType()));
16740   } else {
16741     return SDValue();
16742   }
16743   assert(Ops.size() == NumElts && "Unexpected vector size");
16744
16745   // Insert the element
16746   if (Elt < Ops.size()) {
16747     // All the operands of BUILD_VECTOR must have the same type;
16748     // we enforce that here.
16749     EVT OpVT = Ops[0].getValueType();
16750     Ops[Elt] = OpVT.isInteger() ? DAG.getAnyExtOrTrunc(InVal, DL, OpVT) : InVal;
16751   }
16752
16753   // Return the new vector
16754   return DAG.getBuildVector(VT, DL, Ops);
16755 }
16756
16757 SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
16758                                                   SDValue EltNo,
16759                                                   LoadSDNode *OriginalLoad) {
16760   assert(!OriginalLoad->isVolatile());
16761
16762   EVT ResultVT = EVE->getValueType(0);
16763   EVT VecEltVT = InVecVT.getVectorElementType();
16764   unsigned Align = OriginalLoad->getAlignment();
16765   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
16766       VecEltVT.getTypeForEVT(*DAG.getContext()));
16767
16768   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
16769     return SDValue();
16770
16771   ISD::LoadExtType ExtTy = ResultVT.bitsGT(VecEltVT) ?
16772     ISD::NON_EXTLOAD : ISD::EXTLOAD;
16773   if (!TLI.shouldReduceLoadWidth(OriginalLoad, ExtTy, VecEltVT))
16774     return SDValue();
16775
16776   Align = NewAlign;
16777
16778   SDValue NewPtr = OriginalLoad->getBasePtr();
16779   SDValue Offset;
16780   EVT PtrType = NewPtr.getValueType();
16781   MachinePointerInfo MPI;
16782   SDLoc DL(EVE);
16783   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
16784     int Elt = ConstEltNo->getZExtValue();
16785     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
16786     Offset = DAG.getConstant(PtrOff, DL, PtrType);
16787     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
16788   } else {
16789     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
16790     Offset = DAG.getNode(
16791         ISD::MUL, DL, PtrType, Offset,
16792         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
16793     // Discard the pointer info except the address space because the memory
16794     // operand can't represent this new access since the offset is variable.
16795     MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
16796   }
16797   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
16798
16799   // The replacement we need to do here is a little tricky: we need to
16800   // replace an extractelement of a load with a load.
16801   // Use ReplaceAllUsesOfValuesWith to do the replacement.
16802   // Note that this replacement assumes that the extractvalue is the only
16803   // use of the load; that's okay because we don't want to perform this
16804   // transformation in other cases anyway.
16805   SDValue Load;
16806   SDValue Chain;
16807   if (ResultVT.bitsGT(VecEltVT)) {
16808     // If the result type of vextract is wider than the load, then issue an
16809     // extending load instead.
16810     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
16811                                                   VecEltVT)
16812                                    ? ISD::ZEXTLOAD
16813                                    : ISD::EXTLOAD;
16814     Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT,
16815                           OriginalLoad->getChain(), NewPtr, MPI, VecEltVT,
16816                           Align, OriginalLoad->getMemOperand()->getFlags(),
16817                           OriginalLoad->getAAInfo());
16818     Chain = Load.getValue(1);
16819   } else {
16820     Load = DAG.getLoad(VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr,
16821                        MPI, Align, OriginalLoad->getMemOperand()->getFlags(),
16822                        OriginalLoad->getAAInfo());
16823     Chain = Load.getValue(1);
16824     if (ResultVT.bitsLT(VecEltVT))
16825       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
16826     else
16827       Load = DAG.getBitcast(ResultVT, Load);
16828   }
16829   WorklistRemover DeadNodes(*this);
16830   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
16831   SDValue To[] = { Load, Chain };
16832   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
16833   // Since we're explicitly calling ReplaceAllUses, add the new node to the
16834   // worklist explicitly as well.
16835   AddToWorklist(Load.getNode());
16836   AddUsersToWorklist(Load.getNode()); // Add users too
16837   // Make sure to revisit this node to clean it up; it will usually be dead.
16838   AddToWorklist(EVE);
16839   ++OpsNarrowed;
16840   return SDValue(EVE, 0);
16841 }
16842
16843 /// Transform a vector binary operation into a scalar binary operation by moving
16844 /// the math/logic after an extract element of a vector.
16845 static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
16846                                        bool LegalOperations) {
16847   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
16848   SDValue Vec = ExtElt->getOperand(0);
16849   SDValue Index = ExtElt->getOperand(1);
16850   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16851   if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
16852       Vec.getNode()->getNumValues() != 1)
16853     return SDValue();
16854
16855   // Targets may want to avoid this to prevent an expensive register transfer.
16856   if (!TLI.shouldScalarizeBinop(Vec))
16857     return SDValue();
16858
16859   // Extracting an element of a vector constant is constant-folded, so this
16860   // transform is just replacing a vector op with a scalar op while moving the
16861   // extract.
16862   SDValue Op0 = Vec.getOperand(0);
16863   SDValue Op1 = Vec.getOperand(1);
16864   if (isAnyConstantBuildVector(Op0, true) ||
16865       isAnyConstantBuildVector(Op1, true)) {
16866     // extractelt (binop X, C), IndexC --> binop (extractelt X, IndexC), C'
16867     // extractelt (binop C, X), IndexC --> binop C', (extractelt X, IndexC)
16868     SDLoc DL(ExtElt);
16869     EVT VT = ExtElt->getValueType(0);
16870     SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op0, Index);
16871     SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op1, Index);
16872     return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1);
16873   }
16874
16875   return SDValue();
16876 }
16877
16878 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
16879   SDValue VecOp = N->getOperand(0);
16880   SDValue Index = N->getOperand(1);
16881   EVT ScalarVT = N->getValueType(0);
16882   EVT VecVT = VecOp.getValueType();
16883   if (VecOp.isUndef())
16884     return DAG.getUNDEF(ScalarVT);
16885
16886   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
16887   //
16888   // This only really matters if the index is non-constant since other combines
16889   // on the constant elements already work.
16890   SDLoc DL(N);
16891   if (VecOp.getOpcode() == ISD::INSERT_VECTOR_ELT &&
16892       Index == VecOp.getOperand(2)) {
16893     SDValue Elt = VecOp.getOperand(1);
16894     return VecVT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, DL, ScalarVT) : Elt;
16895   }
16896
16897   // (vextract (scalar_to_vector val, 0) -> val
16898   if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16899     // Check if the result type doesn't match the inserted element type. A
16900     // SCALAR_TO_VECTOR may truncate the inserted element and the
16901     // EXTRACT_VECTOR_ELT may widen the extracted vector.
16902     SDValue InOp = VecOp.getOperand(0);
16903     if (InOp.getValueType() != ScalarVT) {
16904       assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16905       return DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
16906     }
16907     return InOp;
16908   }
16909
16910   // extract_vector_elt of out-of-bounds element -> UNDEF
16911   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
16912   unsigned NumElts = VecVT.getVectorNumElements();
16913   if (IndexC && IndexC->getAPIntValue().uge(NumElts))
16914     return DAG.getUNDEF(ScalarVT);
16915
16916   // extract_vector_elt (build_vector x, y), 1 -> y
16917   if (IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR &&
16918       TLI.isTypeLegal(VecVT) &&
16919       (VecOp.hasOneUse() || TLI.aggressivelyPreferBuildVectorSources(VecVT))) {
16920     SDValue Elt = VecOp.getOperand(IndexC->getZExtValue());
16921     EVT InEltVT = Elt.getValueType();
16922
16923     // Sometimes build_vector's scalar input types do not match result type.
16924     if (ScalarVT == InEltVT)
16925       return Elt;
16926
16927     // TODO: It may be useful to truncate if free if the build_vector implicitly
16928     // converts.
16929   }
16930
16931   // TODO: These transforms should not require the 'hasOneUse' restriction, but
16932   // there are regressions on multiple targets without it. We can end up with a
16933   // mess of scalar and vector code if we reduce only part of the DAG to scalar.
16934   if (IndexC && VecOp.getOpcode() == ISD::BITCAST && VecVT.isInteger() &&
16935       VecOp.hasOneUse()) {
16936     // The vector index of the LSBs of the source depend on the endian-ness.
16937     bool IsLE = DAG.getDataLayout().isLittleEndian();
16938     unsigned ExtractIndex = IndexC->getZExtValue();
16939     // extract_elt (v2i32 (bitcast i64:x)), BCTruncElt -> i32 (trunc i64:x)
16940     unsigned BCTruncElt = IsLE ? 0 : NumElts - 1;
16941     SDValue BCSrc = VecOp.getOperand(0);
16942     if (ExtractIndex == BCTruncElt && BCSrc.getValueType().isScalarInteger())
16943       return DAG.getNode(ISD::TRUNCATE, DL, ScalarVT, BCSrc);
16944
16945     if (LegalTypes && BCSrc.getValueType().isInteger() &&
16946         BCSrc.getOpcode() == ISD::SCALAR_TO_VECTOR) {
16947       // ext_elt (bitcast (scalar_to_vec i64 X to v2i64) to v4i32), TruncElt -->
16948       // trunc i64 X to i32
16949       SDValue X = BCSrc.getOperand(0);
16950       assert(X.getValueType().isScalarInteger() && ScalarVT.isScalarInteger() &&
16951              "Extract element and scalar to vector can't change element type "
16952              "from FP to integer.");
16953       unsigned XBitWidth = X.getValueSizeInBits();
16954       unsigned VecEltBitWidth = VecVT.getScalarSizeInBits();
16955       BCTruncElt = IsLE ? 0 : XBitWidth / VecEltBitWidth - 1;
16956
16957       // An extract element return value type can be wider than its vector
16958       // operand element type. In that case, the high bits are undefined, so
16959       // it's possible that we may need to extend rather than truncate.
16960       if (ExtractIndex == BCTruncElt && XBitWidth > VecEltBitWidth) {
16961         assert(XBitWidth % VecEltBitWidth == 0 &&
16962                "Scalar bitwidth must be a multiple of vector element bitwidth");
16963         return DAG.getAnyExtOrTrunc(X, DL, ScalarVT);
16964       }
16965     }
16966   }
16967
16968   if (SDValue BO = scalarizeExtractedBinop(N, DAG, LegalOperations))
16969     return BO;
16970
16971   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
16972   // We only perform this optimization before the op legalization phase because
16973   // we may introduce new vector instructions which are not backed by TD
16974   // patterns. For example on AVX, extracting elements from a wide vector
16975   // without using extract_subvector. However, if we can find an underlying
16976   // scalar value, then we can always use that.
16977   if (IndexC && VecOp.getOpcode() == ISD::VECTOR_SHUFFLE) {
16978     auto *Shuf = cast<ShuffleVectorSDNode>(VecOp);
16979     // Find the new index to extract from.
16980     int OrigElt = Shuf->getMaskElt(IndexC->getZExtValue());
16981
16982     // Extracting an undef index is undef.
16983     if (OrigElt == -1)
16984       return DAG.getUNDEF(ScalarVT);
16985
16986     // Select the right vector half to extract from.
16987     SDValue SVInVec;
16988     if (OrigElt < (int)NumElts) {
16989       SVInVec = VecOp.getOperand(0);
16990     } else {
16991       SVInVec = VecOp.getOperand(1);
16992       OrigElt -= NumElts;
16993     }
16994
16995     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
16996       SDValue InOp = SVInVec.getOperand(OrigElt);
16997       if (InOp.getValueType() != ScalarVT) {
16998         assert(InOp.getValueType().isInteger() && ScalarVT.isInteger());
16999         InOp = DAG.getSExtOrTrunc(InOp, DL, ScalarVT);
17000       }
17001
17002       return InOp;
17003     }
17004
17005     // FIXME: We should handle recursing on other vector shuffles and
17006     // scalar_to_vector here as well.
17007
17008     if (!LegalOperations ||
17009         // FIXME: Should really be just isOperationLegalOrCustom.
17010         TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, VecVT) ||
17011         TLI.isOperationExpand(ISD::VECTOR_SHUFFLE, VecVT)) {
17012       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17013       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SVInVec,
17014                          DAG.getConstant(OrigElt, DL, IndexTy));
17015     }
17016   }
17017
17018   // If only EXTRACT_VECTOR_ELT nodes use the source vector we can
17019   // simplify it based on the (valid) extraction indices.
17020   if (llvm::all_of(VecOp->uses(), [&](SDNode *Use) {
17021         return Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17022                Use->getOperand(0) == VecOp &&
17023                isa<ConstantSDNode>(Use->getOperand(1));
17024       })) {
17025     APInt DemandedElts = APInt::getNullValue(NumElts);
17026     for (SDNode *Use : VecOp->uses()) {
17027       auto *CstElt = cast<ConstantSDNode>(Use->getOperand(1));
17028       if (CstElt->getAPIntValue().ult(NumElts))
17029         DemandedElts.setBit(CstElt->getZExtValue());
17030     }
17031     if (SimplifyDemandedVectorElts(VecOp, DemandedElts, true)) {
17032       // We simplified the vector operand of this extract element. If this
17033       // extract is not dead, visit it again so it is folded properly.
17034       if (N->getOpcode() != ISD::DELETED_NODE)
17035         AddToWorklist(N);
17036       return SDValue(N, 0);
17037     }
17038   }
17039
17040   // Everything under here is trying to match an extract of a loaded value.
17041   // If the result of load has to be truncated, then it's not necessarily
17042   // profitable.
17043   bool BCNumEltsChanged = false;
17044   EVT ExtVT = VecVT.getVectorElementType();
17045   EVT LVT = ExtVT;
17046   if (ScalarVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, ScalarVT))
17047     return SDValue();
17048
17049   if (VecOp.getOpcode() == ISD::BITCAST) {
17050     // Don't duplicate a load with other uses.
17051     if (!VecOp.hasOneUse())
17052       return SDValue();
17053
17054     EVT BCVT = VecOp.getOperand(0).getValueType();
17055     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
17056       return SDValue();
17057     if (NumElts != BCVT.getVectorNumElements())
17058       BCNumEltsChanged = true;
17059     VecOp = VecOp.getOperand(0);
17060     ExtVT = BCVT.getVectorElementType();
17061   }
17062
17063   // extract (vector load $addr), i --> load $addr + i * size
17064   if (!LegalOperations && !IndexC && VecOp.hasOneUse() &&
17065       ISD::isNormalLoad(VecOp.getNode()) &&
17066       !Index->hasPredecessor(VecOp.getNode())) {
17067     auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
17068     if (VecLoad && !VecLoad->isVolatile())
17069       return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
17070   }
17071
17072   // Perform only after legalization to ensure build_vector / vector_shuffle
17073   // optimizations have already been done.
17074   if (!LegalOperations || !IndexC)
17075     return SDValue();
17076
17077   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
17078   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
17079   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
17080   int Elt = IndexC->getZExtValue();
17081   LoadSDNode *LN0 = nullptr;
17082   if (ISD::isNormalLoad(VecOp.getNode())) {
17083     LN0 = cast<LoadSDNode>(VecOp);
17084   } else if (VecOp.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17085              VecOp.getOperand(0).getValueType() == ExtVT &&
17086              ISD::isNormalLoad(VecOp.getOperand(0).getNode())) {
17087     // Don't duplicate a load with other uses.
17088     if (!VecOp.hasOneUse())
17089       return SDValue();
17090
17091     LN0 = cast<LoadSDNode>(VecOp.getOperand(0));
17092   }
17093   if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(VecOp)) {
17094     // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
17095     // =>
17096     // (load $addr+1*size)
17097
17098     // Don't duplicate a load with other uses.
17099     if (!VecOp.hasOneUse())
17100       return SDValue();
17101
17102     // If the bit convert changed the number of elements, it is unsafe
17103     // to examine the mask.
17104     if (BCNumEltsChanged)
17105       return SDValue();
17106
17107     // Select the input vector, guarding against out of range extract vector.
17108     int Idx = (Elt > (int)NumElts) ? -1 : Shuf->getMaskElt(Elt);
17109     VecOp = (Idx < (int)NumElts) ? VecOp.getOperand(0) : VecOp.getOperand(1);
17110
17111     if (VecOp.getOpcode() == ISD::BITCAST) {
17112       // Don't duplicate a load with other uses.
17113       if (!VecOp.hasOneUse())
17114         return SDValue();
17115
17116       VecOp = VecOp.getOperand(0);
17117     }
17118     if (ISD::isNormalLoad(VecOp.getNode())) {
17119       LN0 = cast<LoadSDNode>(VecOp);
17120       Elt = (Idx < (int)NumElts) ? Idx : Idx - (int)NumElts;
17121       Index = DAG.getConstant(Elt, DL, Index.getValueType());
17122     }
17123   }
17124
17125   // Make sure we found a non-volatile load and the extractelement is
17126   // the only use.
17127   if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
17128     return SDValue();
17129
17130   // If Idx was -1 above, Elt is going to be -1, so just return undef.
17131   if (Elt == -1)
17132     return DAG.getUNDEF(LVT);
17133
17134   return scalarizeExtractedVectorLoad(N, VecVT, Index, LN0);
17135 }
17136
17137 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
17138 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
17139   // We perform this optimization post type-legalization because
17140   // the type-legalizer often scalarizes integer-promoted vectors.
17141   // Performing this optimization before may create bit-casts which
17142   // will be type-legalized to complex code sequences.
17143   // We perform this optimization only before the operation legalizer because we
17144   // may introduce illegal operations.
17145   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
17146     return SDValue();
17147
17148   unsigned NumInScalars = N->getNumOperands();
17149   SDLoc DL(N);
17150   EVT VT = N->getValueType(0);
17151
17152   // Check to see if this is a BUILD_VECTOR of a bunch of values
17153   // which come from any_extend or zero_extend nodes. If so, we can create
17154   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
17155   // optimizations. We do not handle sign-extend because we can't fill the sign
17156   // using shuffles.
17157   EVT SourceType = MVT::Other;
17158   bool AllAnyExt = true;
17159
17160   for (unsigned i = 0; i != NumInScalars; ++i) {
17161     SDValue In = N->getOperand(i);
17162     // Ignore undef inputs.
17163     if (In.isUndef()) continue;
17164
17165     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
17166     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
17167
17168     // Abort if the element is not an extension.
17169     if (!ZeroExt && !AnyExt) {
17170       SourceType = MVT::Other;
17171       break;
17172     }
17173
17174     // The input is a ZeroExt or AnyExt. Check the original type.
17175     EVT InTy = In.getOperand(0).getValueType();
17176
17177     // Check that all of the widened source types are the same.
17178     if (SourceType == MVT::Other)
17179       // First time.
17180       SourceType = InTy;
17181     else if (InTy != SourceType) {
17182       // Multiple income types. Abort.
17183       SourceType = MVT::Other;
17184       break;
17185     }
17186
17187     // Check if all of the extends are ANY_EXTENDs.
17188     AllAnyExt &= AnyExt;
17189   }
17190
17191   // In order to have valid types, all of the inputs must be extended from the
17192   // same source type and all of the inputs must be any or zero extend.
17193   // Scalar sizes must be a power of two.
17194   EVT OutScalarTy = VT.getScalarType();
17195   bool ValidTypes = SourceType != MVT::Other &&
17196                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
17197                  isPowerOf2_32(SourceType.getSizeInBits());
17198
17199   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
17200   // turn into a single shuffle instruction.
17201   if (!ValidTypes)
17202     return SDValue();
17203
17204   bool isLE = DAG.getDataLayout().isLittleEndian();
17205   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
17206   assert(ElemRatio > 1 && "Invalid element size ratio");
17207   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
17208                                DAG.getConstant(0, DL, SourceType);
17209
17210   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
17211   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
17212
17213   // Populate the new build_vector
17214   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
17215     SDValue Cast = N->getOperand(i);
17216     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
17217             Cast.getOpcode() == ISD::ZERO_EXTEND ||
17218             Cast.isUndef()) && "Invalid cast opcode");
17219     SDValue In;
17220     if (Cast.isUndef())
17221       In = DAG.getUNDEF(SourceType);
17222     else
17223       In = Cast->getOperand(0);
17224     unsigned Index = isLE ? (i * ElemRatio) :
17225                             (i * ElemRatio + (ElemRatio - 1));
17226
17227     assert(Index < Ops.size() && "Invalid index");
17228     Ops[Index] = In;
17229   }
17230
17231   // The type of the new BUILD_VECTOR node.
17232   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
17233   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
17234          "Invalid vector size");
17235   // Check if the new vector type is legal.
17236   if (!isTypeLegal(VecVT) ||
17237       (!TLI.isOperationLegal(ISD::BUILD_VECTOR, VecVT) &&
17238        TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)))
17239     return SDValue();
17240
17241   // Make the new BUILD_VECTOR.
17242   SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
17243
17244   // The new BUILD_VECTOR node has the potential to be further optimized.
17245   AddToWorklist(BV.getNode());
17246   // Bitcast to the desired type.
17247   return DAG.getBitcast(VT, BV);
17248 }
17249
17250 SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
17251                                            ArrayRef<int> VectorMask,
17252                                            SDValue VecIn1, SDValue VecIn2,
17253                                            unsigned LeftIdx, bool DidSplitVec) {
17254   MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17255   SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
17256
17257   EVT VT = N->getValueType(0);
17258   EVT InVT1 = VecIn1.getValueType();
17259   EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
17260
17261   unsigned NumElems = VT.getVectorNumElements();
17262   unsigned ShuffleNumElems = NumElems;
17263
17264   // If we artificially split a vector in two already, then the offsets in the
17265   // operands will all be based off of VecIn1, even those in VecIn2.
17266   unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
17267
17268   // We can't generate a shuffle node with mismatched input and output types.
17269   // Try to make the types match the type of the output.
17270   if (InVT1 != VT || InVT2 != VT) {
17271     if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
17272       // If the output vector length is a multiple of both input lengths,
17273       // we can concatenate them and pad the rest with undefs.
17274       unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
17275       assert(NumConcats >= 2 && "Concat needs at least two inputs!");
17276       SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
17277       ConcatOps[0] = VecIn1;
17278       ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
17279       VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17280       VecIn2 = SDValue();
17281     } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
17282       if (!TLI.isExtractSubvectorCheap(VT, InVT1, NumElems))
17283         return SDValue();
17284
17285       if (!VecIn2.getNode()) {
17286         // If we only have one input vector, and it's twice the size of the
17287         // output, split it in two.
17288         VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
17289                              DAG.getConstant(NumElems, DL, IdxTy));
17290         VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
17291         // Since we now have shorter input vectors, adjust the offset of the
17292         // second vector's start.
17293         Vec2Offset = NumElems;
17294       } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
17295         // VecIn1 is wider than the output, and we have another, possibly
17296         // smaller input. Pad the smaller input with undefs, shuffle at the
17297         // input vector width, and extract the output.
17298         // The shuffle type is different than VT, so check legality again.
17299         if (LegalOperations &&
17300             !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
17301           return SDValue();
17302
17303         // Legalizing INSERT_SUBVECTOR is tricky - you basically have to
17304         // lower it back into a BUILD_VECTOR. So if the inserted type is
17305         // illegal, don't even try.
17306         if (InVT1 != InVT2) {
17307           if (!TLI.isTypeLegal(InVT2))
17308             return SDValue();
17309           VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
17310                                DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
17311         }
17312         ShuffleNumElems = NumElems * 2;
17313       } else {
17314         // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
17315         // than VecIn1. We can't handle this for now - this case will disappear
17316         // when we start sorting the vectors by type.
17317         return SDValue();
17318       }
17319     } else if (InVT2.getSizeInBits() * 2 == VT.getSizeInBits() &&
17320                InVT1.getSizeInBits() == VT.getSizeInBits()) {
17321       SmallVector<SDValue, 2> ConcatOps(2, DAG.getUNDEF(InVT2));
17322       ConcatOps[0] = VecIn2;
17323       VecIn2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
17324     } else {
17325       // TODO: Support cases where the length mismatch isn't exactly by a
17326       // factor of 2.
17327       // TODO: Move this check upwards, so that if we have bad type
17328       // mismatches, we don't create any DAG nodes.
17329       return SDValue();
17330     }
17331   }
17332
17333   // Initialize mask to undef.
17334   SmallVector<int, 8> Mask(ShuffleNumElems, -1);
17335
17336   // Only need to run up to the number of elements actually used, not the
17337   // total number of elements in the shuffle - if we are shuffling a wider
17338   // vector, the high lanes should be set to undef.
17339   for (unsigned i = 0; i != NumElems; ++i) {
17340     if (VectorMask[i] <= 0)
17341       continue;
17342
17343     unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
17344     if (VectorMask[i] == (int)LeftIdx) {
17345       Mask[i] = ExtIndex;
17346     } else if (VectorMask[i] == (int)LeftIdx + 1) {
17347       Mask[i] = Vec2Offset + ExtIndex;
17348     }
17349   }
17350
17351   // The type the input vectors may have changed above.
17352   InVT1 = VecIn1.getValueType();
17353
17354   // If we already have a VecIn2, it should have the same type as VecIn1.
17355   // If we don't, get an undef/zero vector of the appropriate type.
17356   VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
17357   assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
17358
17359   SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
17360   if (ShuffleNumElems > NumElems)
17361     Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
17362
17363   return Shuffle;
17364 }
17365
17366 static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
17367   assert(BV->getOpcode() == ISD::BUILD_VECTOR && "Expected build vector");
17368
17369   // First, determine where the build vector is not undef.
17370   // TODO: We could extend this to handle zero elements as well as undefs.
17371   int NumBVOps = BV->getNumOperands();
17372   int ZextElt = -1;
17373   for (int i = 0; i != NumBVOps; ++i) {
17374     SDValue Op = BV->getOperand(i);
17375     if (Op.isUndef())
17376       continue;
17377     if (ZextElt == -1)
17378       ZextElt = i;
17379     else
17380       return SDValue();
17381   }
17382   // Bail out if there's no non-undef element.
17383   if (ZextElt == -1)
17384     return SDValue();
17385
17386   // The build vector contains some number of undef elements and exactly
17387   // one other element. That other element must be a zero-extended scalar
17388   // extracted from a vector at a constant index to turn this into a shuffle.
17389   // Also, require that the build vector does not implicitly truncate/extend
17390   // its elements.
17391   // TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
17392   EVT VT = BV->getValueType(0);
17393   SDValue Zext = BV->getOperand(ZextElt);
17394   if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
17395       Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17396       !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
17397       Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
17398     return SDValue();
17399
17400   // The zero-extend must be a multiple of the source size, and we must be
17401   // building a vector of the same size as the source of the extract element.
17402   SDValue Extract = Zext.getOperand(0);
17403   unsigned DestSize = Zext.getValueSizeInBits();
17404   unsigned SrcSize = Extract.getValueSizeInBits();
17405   if (DestSize % SrcSize != 0 ||
17406       Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
17407     return SDValue();
17408
17409   // Create a shuffle mask that will combine the extracted element with zeros
17410   // and undefs.
17411   int ZextRatio = DestSize / SrcSize;
17412   int NumMaskElts = NumBVOps * ZextRatio;
17413   SmallVector<int, 32> ShufMask(NumMaskElts, -1);
17414   for (int i = 0; i != NumMaskElts; ++i) {
17415     if (i / ZextRatio == ZextElt) {
17416       // The low bits of the (potentially translated) extracted element map to
17417       // the source vector. The high bits map to zero. We will use a zero vector
17418       // as the 2nd source operand of the shuffle, so use the 1st element of
17419       // that vector (mask value is number-of-elements) for the high bits.
17420       if (i % ZextRatio == 0)
17421         ShufMask[i] = Extract.getConstantOperandVal(1);
17422       else
17423         ShufMask[i] = NumMaskElts;
17424     }
17425
17426     // Undef elements of the build vector remain undef because we initialize
17427     // the shuffle mask with -1.
17428   }
17429
17430   // buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
17431   // bitcast (shuffle V, ZeroVec, VectorMask)
17432   SDLoc DL(BV);
17433   EVT VecVT = Extract.getOperand(0).getValueType();
17434   SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
17435   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17436   SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
17437                                              ZeroVec, ShufMask, DAG);
17438   if (!Shuf)
17439     return SDValue();
17440   return DAG.getBitcast(VT, Shuf);
17441 }
17442
17443 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
17444 // operations. If the types of the vectors we're extracting from allow it,
17445 // turn this into a vector_shuffle node.
17446 SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
17447   SDLoc DL(N);
17448   EVT VT = N->getValueType(0);
17449
17450   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
17451   if (!isTypeLegal(VT))
17452     return SDValue();
17453
17454   if (SDValue V = reduceBuildVecToShuffleWithZero(N, DAG))
17455     return V;
17456
17457   // May only combine to shuffle after legalize if shuffle is legal.
17458   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
17459     return SDValue();
17460
17461   bool UsesZeroVector = false;
17462   unsigned NumElems = N->getNumOperands();
17463
17464   // Record, for each element of the newly built vector, which input vector
17465   // that element comes from. -1 stands for undef, 0 for the zero vector,
17466   // and positive values for the input vectors.
17467   // VectorMask maps each element to its vector number, and VecIn maps vector
17468   // numbers to their initial SDValues.
17469
17470   SmallVector<int, 8> VectorMask(NumElems, -1);
17471   SmallVector<SDValue, 8> VecIn;
17472   VecIn.push_back(SDValue());
17473
17474   for (unsigned i = 0; i != NumElems; ++i) {
17475     SDValue Op = N->getOperand(i);
17476
17477     if (Op.isUndef())
17478       continue;
17479
17480     // See if we can use a blend with a zero vector.
17481     // TODO: Should we generalize this to a blend with an arbitrary constant
17482     // vector?
17483     if (isNullConstant(Op) || isNullFPConstant(Op)) {
17484       UsesZeroVector = true;
17485       VectorMask[i] = 0;
17486       continue;
17487     }
17488
17489     // Not an undef or zero. If the input is something other than an
17490     // EXTRACT_VECTOR_ELT with an in-range constant index, bail out.
17491     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
17492         !isa<ConstantSDNode>(Op.getOperand(1)))
17493       return SDValue();
17494     SDValue ExtractedFromVec = Op.getOperand(0);
17495
17496     const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
17497     if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
17498       return SDValue();
17499
17500     // All inputs must have the same element type as the output.
17501     if (VT.getVectorElementType() !=
17502         ExtractedFromVec.getValueType().getVectorElementType())
17503       return SDValue();
17504
17505     // Have we seen this input vector before?
17506     // The vectors are expected to be tiny (usually 1 or 2 elements), so using
17507     // a map back from SDValues to numbers isn't worth it.
17508     unsigned Idx = std::distance(
17509         VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
17510     if (Idx == VecIn.size())
17511       VecIn.push_back(ExtractedFromVec);
17512
17513     VectorMask[i] = Idx;
17514   }
17515
17516   // If we didn't find at least one input vector, bail out.
17517   if (VecIn.size() < 2)
17518     return SDValue();
17519
17520   // If all the Operands of BUILD_VECTOR extract from same
17521   // vector, then split the vector efficiently based on the maximum
17522   // vector access index and adjust the VectorMask and
17523   // VecIn accordingly.
17524   bool DidSplitVec = false;
17525   if (VecIn.size() == 2) {
17526     unsigned MaxIndex = 0;
17527     unsigned NearestPow2 = 0;
17528     SDValue Vec = VecIn.back();
17529     EVT InVT = Vec.getValueType();
17530     MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
17531     SmallVector<unsigned, 8> IndexVec(NumElems, 0);
17532
17533     for (unsigned i = 0; i < NumElems; i++) {
17534       if (VectorMask[i] <= 0)
17535         continue;
17536       unsigned Index = N->getOperand(i).getConstantOperandVal(1);
17537       IndexVec[i] = Index;
17538       MaxIndex = std::max(MaxIndex, Index);
17539     }
17540
17541     NearestPow2 = PowerOf2Ceil(MaxIndex);
17542     if (InVT.isSimple() && NearestPow2 > 2 && MaxIndex < NearestPow2 &&
17543         NumElems * 2 < NearestPow2) {
17544       unsigned SplitSize = NearestPow2 / 2;
17545       EVT SplitVT = EVT::getVectorVT(*DAG.getContext(),
17546                                      InVT.getVectorElementType(), SplitSize);
17547       if (TLI.isTypeLegal(SplitVT)) {
17548         SDValue VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17549                                      DAG.getConstant(SplitSize, DL, IdxTy));
17550         SDValue VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, Vec,
17551                                      DAG.getConstant(0, DL, IdxTy));
17552         VecIn.pop_back();
17553         VecIn.push_back(VecIn1);
17554         VecIn.push_back(VecIn2);
17555         DidSplitVec = true;
17556
17557         for (unsigned i = 0; i < NumElems; i++) {
17558           if (VectorMask[i] <= 0)
17559             continue;
17560           VectorMask[i] = (IndexVec[i] < SplitSize) ? 1 : 2;
17561         }
17562       }
17563     }
17564   }
17565
17566   // TODO: We want to sort the vectors by descending length, so that adjacent
17567   // pairs have similar length, and the longer vector is always first in the
17568   // pair.
17569
17570   // TODO: Should this fire if some of the input vectors has illegal type (like
17571   // it does now), or should we let legalization run its course first?
17572
17573   // Shuffle phase:
17574   // Take pairs of vectors, and shuffle them so that the result has elements
17575   // from these vectors in the correct places.
17576   // For example, given:
17577   // t10: i32 = extract_vector_elt t1, Constant:i64<0>
17578   // t11: i32 = extract_vector_elt t2, Constant:i64<0>
17579   // t12: i32 = extract_vector_elt t3, Constant:i64<0>
17580   // t13: i32 = extract_vector_elt t1, Constant:i64<1>
17581   // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
17582   // We will generate:
17583   // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
17584   // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
17585   SmallVector<SDValue, 4> Shuffles;
17586   for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
17587     unsigned LeftIdx = 2 * In + 1;
17588     SDValue VecLeft = VecIn[LeftIdx];
17589     SDValue VecRight =
17590         (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
17591
17592     if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
17593                                                 VecRight, LeftIdx, DidSplitVec))
17594       Shuffles.push_back(Shuffle);
17595     else
17596       return SDValue();
17597   }
17598
17599   // If we need the zero vector as an "ingredient" in the blend tree, add it
17600   // to the list of shuffles.
17601   if (UsesZeroVector)
17602     Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
17603                                       : DAG.getConstantFP(0.0, DL, VT));
17604
17605   // If we only have one shuffle, we're done.
17606   if (Shuffles.size() == 1)
17607     return Shuffles[0];
17608
17609   // Update the vector mask to point to the post-shuffle vectors.
17610   for (int &Vec : VectorMask)
17611     if (Vec == 0)
17612       Vec = Shuffles.size() - 1;
17613     else
17614       Vec = (Vec - 1) / 2;
17615
17616   // More than one shuffle. Generate a binary tree of blends, e.g. if from
17617   // the previous step we got the set of shuffles t10, t11, t12, t13, we will
17618   // generate:
17619   // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
17620   // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
17621   // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
17622   // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
17623   // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
17624   // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
17625   // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
17626
17627   // Make sure the initial size of the shuffle list is even.
17628   if (Shuffles.size() % 2)
17629     Shuffles.push_back(DAG.getUNDEF(VT));
17630
17631   for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
17632     if (CurSize % 2) {
17633       Shuffles[CurSize] = DAG.getUNDEF(VT);
17634       CurSize++;
17635     }
17636     for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
17637       int Left = 2 * In;
17638       int Right = 2 * In + 1;
17639       SmallVector<int, 8> Mask(NumElems, -1);
17640       for (unsigned i = 0; i != NumElems; ++i) {
17641         if (VectorMask[i] == Left) {
17642           Mask[i] = i;
17643           VectorMask[i] = In;
17644         } else if (VectorMask[i] == Right) {
17645           Mask[i] = i + NumElems;
17646           VectorMask[i] = In;
17647         }
17648       }
17649
17650       Shuffles[In] =
17651           DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
17652     }
17653   }
17654   return Shuffles[0];
17655 }
17656
17657 // Try to turn a build vector of zero extends of extract vector elts into a
17658 // a vector zero extend and possibly an extract subvector.
17659 // TODO: Support sign extend?
17660 // TODO: Allow undef elements?
17661 SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
17662   if (LegalOperations)
17663     return SDValue();
17664
17665   EVT VT = N->getValueType(0);
17666
17667   bool FoundZeroExtend = false;
17668   SDValue Op0 = N->getOperand(0);
17669   auto checkElem = [&](SDValue Op) -> int64_t {
17670     unsigned Opc = Op.getOpcode();
17671     FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
17672     if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
17673         Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
17674         Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
17675       if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
17676         return C->getZExtValue();
17677     return -1;
17678   };
17679
17680   // Make sure the first element matches
17681   // (zext (extract_vector_elt X, C))
17682   int64_t Offset = checkElem(Op0);
17683   if (Offset < 0)
17684     return SDValue();
17685
17686   unsigned NumElems = N->getNumOperands();
17687   SDValue In = Op0.getOperand(0).getOperand(0);
17688   EVT InSVT = In.getValueType().getScalarType();
17689   EVT InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumElems);
17690
17691   // Don't create an illegal input type after type legalization.
17692   if (LegalTypes && !TLI.isTypeLegal(InVT))
17693     return SDValue();
17694
17695   // Ensure all the elements come from the same vector and are adjacent.
17696   for (unsigned i = 1; i != NumElems; ++i) {
17697     if ((Offset + i) != checkElem(N->getOperand(i)))
17698       return SDValue();
17699   }
17700
17701   SDLoc DL(N);
17702   In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
17703                    Op0.getOperand(0).getOperand(1));
17704   return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
17705                      VT, In);
17706 }
17707
17708 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
17709   EVT VT = N->getValueType(0);
17710
17711   // A vector built entirely of undefs is undef.
17712   if (ISD::allOperandsUndef(N))
17713     return DAG.getUNDEF(VT);
17714
17715   // If this is a splat of a bitcast from another vector, change to a
17716   // concat_vector.
17717   // For example:
17718   //   (build_vector (i64 (bitcast (v2i32 X))), (i64 (bitcast (v2i32 X)))) ->
17719   //     (v2i64 (bitcast (concat_vectors (v2i32 X), (v2i32 X))))
17720   //
17721   // If X is a build_vector itself, the concat can become a larger build_vector.
17722   // TODO: Maybe this is useful for non-splat too?
17723   if (!LegalOperations) {
17724     if (SDValue Splat = cast<BuildVectorSDNode>(N)->getSplatValue()) {
17725       Splat = peekThroughBitcasts(Splat);
17726       EVT SrcVT = Splat.getValueType();
17727       if (SrcVT.isVector()) {
17728         unsigned NumElts = N->getNumOperands() * SrcVT.getVectorNumElements();
17729         EVT NewVT = EVT::getVectorVT(*DAG.getContext(),
17730                                      SrcVT.getVectorElementType(), NumElts);
17731         if (!LegalTypes || TLI.isTypeLegal(NewVT)) {
17732           SmallVector<SDValue, 8> Ops(N->getNumOperands(), Splat);
17733           SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N),
17734                                        NewVT, Ops);
17735           return DAG.getBitcast(VT, Concat);
17736         }
17737       }
17738     }
17739   }
17740
17741   // Check if we can express BUILD VECTOR via subvector extract.
17742   if (!LegalTypes && (N->getNumOperands() > 1)) {
17743     SDValue Op0 = N->getOperand(0);
17744     auto checkElem = [&](SDValue Op) -> uint64_t {
17745       if ((Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) &&
17746           (Op0.getOperand(0) == Op.getOperand(0)))
17747         if (auto CNode = dyn_cast<ConstantSDNode>(Op.getOperand(1)))
17748           return CNode->getZExtValue();
17749       return -1;
17750     };
17751
17752     int Offset = checkElem(Op0);
17753     for (unsigned i = 0; i < N->getNumOperands(); ++i) {
17754       if (Offset + i != checkElem(N->getOperand(i))) {
17755         Offset = -1;
17756         break;
17757       }
17758     }
17759
17760     if ((Offset == 0) &&
17761         (Op0.getOperand(0).getValueType() == N->getValueType(0)))
17762       return Op0.getOperand(0);
17763     if ((Offset != -1) &&
17764         ((Offset % N->getValueType(0).getVectorNumElements()) ==
17765          0)) // IDX must be multiple of output size.
17766       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0),
17767                          Op0.getOperand(0), Op0.getOperand(1));
17768   }
17769
17770   if (SDValue V = convertBuildVecZextToZext(N))
17771     return V;
17772
17773   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
17774     return V;
17775
17776   if (SDValue V = reduceBuildVecToShuffle(N))
17777     return V;
17778
17779   return SDValue();
17780 }
17781
17782 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
17783   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17784   EVT OpVT = N->getOperand(0).getValueType();
17785
17786   // If the operands are legal vectors, leave them alone.
17787   if (TLI.isTypeLegal(OpVT))
17788     return SDValue();
17789
17790   SDLoc DL(N);
17791   EVT VT = N->getValueType(0);
17792   SmallVector<SDValue, 8> Ops;
17793
17794   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
17795   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17796
17797   // Keep track of what we encounter.
17798   bool AnyInteger = false;
17799   bool AnyFP = false;
17800   for (const SDValue &Op : N->ops()) {
17801     if (ISD::BITCAST == Op.getOpcode() &&
17802         !Op.getOperand(0).getValueType().isVector())
17803       Ops.push_back(Op.getOperand(0));
17804     else if (ISD::UNDEF == Op.getOpcode())
17805       Ops.push_back(ScalarUndef);
17806     else
17807       return SDValue();
17808
17809     // Note whether we encounter an integer or floating point scalar.
17810     // If it's neither, bail out, it could be something weird like x86mmx.
17811     EVT LastOpVT = Ops.back().getValueType();
17812     if (LastOpVT.isFloatingPoint())
17813       AnyFP = true;
17814     else if (LastOpVT.isInteger())
17815       AnyInteger = true;
17816     else
17817       return SDValue();
17818   }
17819
17820   // If any of the operands is a floating point scalar bitcast to a vector,
17821   // use floating point types throughout, and bitcast everything.
17822   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
17823   if (AnyFP) {
17824     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
17825     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
17826     if (AnyInteger) {
17827       for (SDValue &Op : Ops) {
17828         if (Op.getValueType() == SVT)
17829           continue;
17830         if (Op.isUndef())
17831           Op = ScalarUndef;
17832         else
17833           Op = DAG.getBitcast(SVT, Op);
17834       }
17835     }
17836   }
17837
17838   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
17839                                VT.getSizeInBits() / SVT.getSizeInBits());
17840   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
17841 }
17842
17843 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
17844 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
17845 // most two distinct vectors the same size as the result, attempt to turn this
17846 // into a legal shuffle.
17847 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
17848   EVT VT = N->getValueType(0);
17849   EVT OpVT = N->getOperand(0).getValueType();
17850   int NumElts = VT.getVectorNumElements();
17851   int NumOpElts = OpVT.getVectorNumElements();
17852
17853   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
17854   SmallVector<int, 8> Mask;
17855
17856   for (SDValue Op : N->ops()) {
17857     Op = peekThroughBitcasts(Op);
17858
17859     // UNDEF nodes convert to UNDEF shuffle mask values.
17860     if (Op.isUndef()) {
17861       Mask.append((unsigned)NumOpElts, -1);
17862       continue;
17863     }
17864
17865     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
17866       return SDValue();
17867
17868     // What vector are we extracting the subvector from and at what index?
17869     SDValue ExtVec = Op.getOperand(0);
17870
17871     // We want the EVT of the original extraction to correctly scale the
17872     // extraction index.
17873     EVT ExtVT = ExtVec.getValueType();
17874     ExtVec = peekThroughBitcasts(ExtVec);
17875
17876     // UNDEF nodes convert to UNDEF shuffle mask values.
17877     if (ExtVec.isUndef()) {
17878       Mask.append((unsigned)NumOpElts, -1);
17879       continue;
17880     }
17881
17882     if (!isa<ConstantSDNode>(Op.getOperand(1)))
17883       return SDValue();
17884     int ExtIdx = Op.getConstantOperandVal(1);
17885
17886     // Ensure that we are extracting a subvector from a vector the same
17887     // size as the result.
17888     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
17889       return SDValue();
17890
17891     // Scale the subvector index to account for any bitcast.
17892     int NumExtElts = ExtVT.getVectorNumElements();
17893     if (0 == (NumExtElts % NumElts))
17894       ExtIdx /= (NumExtElts / NumElts);
17895     else if (0 == (NumElts % NumExtElts))
17896       ExtIdx *= (NumElts / NumExtElts);
17897     else
17898       return SDValue();
17899
17900     // At most we can reference 2 inputs in the final shuffle.
17901     if (SV0.isUndef() || SV0 == ExtVec) {
17902       SV0 = ExtVec;
17903       for (int i = 0; i != NumOpElts; ++i)
17904         Mask.push_back(i + ExtIdx);
17905     } else if (SV1.isUndef() || SV1 == ExtVec) {
17906       SV1 = ExtVec;
17907       for (int i = 0; i != NumOpElts; ++i)
17908         Mask.push_back(i + ExtIdx + NumElts);
17909     } else {
17910       return SDValue();
17911     }
17912   }
17913
17914   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
17915   return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
17916                                      DAG.getBitcast(VT, SV1), Mask, DAG);
17917 }
17918
17919 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
17920   // If we only have one input vector, we don't need to do any concatenation.
17921   if (N->getNumOperands() == 1)
17922     return N->getOperand(0);
17923
17924   // Check if all of the operands are undefs.
17925   EVT VT = N->getValueType(0);
17926   if (ISD::allOperandsUndef(N))
17927     return DAG.getUNDEF(VT);
17928
17929   // Optimize concat_vectors where all but the first of the vectors are undef.
17930   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
17931         return Op.isUndef();
17932       })) {
17933     SDValue In = N->getOperand(0);
17934     assert(In.getValueType().isVector() && "Must concat vectors");
17935
17936     // If the input is a concat_vectors, just make a larger concat by padding
17937     // with smaller undefs.
17938     if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
17939       unsigned NumOps = N->getNumOperands() * In.getNumOperands();
17940       SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
17941       Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
17942       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
17943     }
17944
17945     SDValue Scalar = peekThroughOneUseBitcasts(In);
17946
17947     // concat_vectors(scalar_to_vector(scalar), undef) ->
17948     //     scalar_to_vector(scalar)
17949     if (!LegalOperations && Scalar.getOpcode() == ISD::SCALAR_TO_VECTOR &&
17950          Scalar.hasOneUse()) {
17951       EVT SVT = Scalar.getValueType().getVectorElementType();
17952       if (SVT == Scalar.getOperand(0).getValueType())
17953         Scalar = Scalar.getOperand(0);
17954     }
17955
17956     // concat_vectors(scalar, undef) -> scalar_to_vector(scalar)
17957     if (!Scalar.getValueType().isVector()) {
17958       // If the bitcast type isn't legal, it might be a trunc of a legal type;
17959       // look through the trunc so we can still do the transform:
17960       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
17961       if (Scalar->getOpcode() == ISD::TRUNCATE &&
17962           !TLI.isTypeLegal(Scalar.getValueType()) &&
17963           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
17964         Scalar = Scalar->getOperand(0);
17965
17966       EVT SclTy = Scalar.getValueType();
17967
17968       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
17969         return SDValue();
17970
17971       // Bail out if the vector size is not a multiple of the scalar size.
17972       if (VT.getSizeInBits() % SclTy.getSizeInBits())
17973         return SDValue();
17974
17975       unsigned VNTNumElms = VT.getSizeInBits() / SclTy.getSizeInBits();
17976       if (VNTNumElms < 2)
17977         return SDValue();
17978
17979       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, VNTNumElms);
17980       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
17981         return SDValue();
17982
17983       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
17984       return DAG.getBitcast(VT, Res);
17985     }
17986   }
17987
17988   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
17989   // We have already tested above for an UNDEF only concatenation.
17990   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
17991   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
17992   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
17993     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
17994   };
17995   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
17996     SmallVector<SDValue, 8> Opnds;
17997     EVT SVT = VT.getScalarType();
17998
17999     EVT MinVT = SVT;
18000     if (!SVT.isFloatingPoint()) {
18001       // If BUILD_VECTOR are from built from integer, they may have different
18002       // operand types. Get the smallest type and truncate all operands to it.
18003       bool FoundMinVT = false;
18004       for (const SDValue &Op : N->ops())
18005         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
18006           EVT OpSVT = Op.getOperand(0).getValueType();
18007           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
18008           FoundMinVT = true;
18009         }
18010       assert(FoundMinVT && "Concat vector type mismatch");
18011     }
18012
18013     for (const SDValue &Op : N->ops()) {
18014       EVT OpVT = Op.getValueType();
18015       unsigned NumElts = OpVT.getVectorNumElements();
18016
18017       if (ISD::UNDEF == Op.getOpcode())
18018         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
18019
18020       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
18021         if (SVT.isFloatingPoint()) {
18022           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
18023           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
18024         } else {
18025           for (unsigned i = 0; i != NumElts; ++i)
18026             Opnds.push_back(
18027                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
18028         }
18029       }
18030     }
18031
18032     assert(VT.getVectorNumElements() == Opnds.size() &&
18033            "Concat vector type mismatch");
18034     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
18035   }
18036
18037   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
18038   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
18039     return V;
18040
18041   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
18042   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
18043     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
18044       return V;
18045
18046   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
18047   // nodes often generate nop CONCAT_VECTOR nodes.
18048   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
18049   // place the incoming vectors at the exact same location.
18050   SDValue SingleSource = SDValue();
18051   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
18052
18053   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
18054     SDValue Op = N->getOperand(i);
18055
18056     if (Op.isUndef())
18057       continue;
18058
18059     // Check if this is the identity extract:
18060     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
18061       return SDValue();
18062
18063     // Find the single incoming vector for the extract_subvector.
18064     if (SingleSource.getNode()) {
18065       if (Op.getOperand(0) != SingleSource)
18066         return SDValue();
18067     } else {
18068       SingleSource = Op.getOperand(0);
18069
18070       // Check the source type is the same as the type of the result.
18071       // If not, this concat may extend the vector, so we can not
18072       // optimize it away.
18073       if (SingleSource.getValueType() != N->getValueType(0))
18074         return SDValue();
18075     }
18076
18077     auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
18078     // The extract index must be constant.
18079     if (!CS)
18080       return SDValue();
18081
18082     // Check that we are reading from the identity index.
18083     unsigned IdentityIndex = i * PartNumElem;
18084     if (CS->getAPIntValue() != IdentityIndex)
18085       return SDValue();
18086   }
18087
18088   if (SingleSource.getNode())
18089     return SingleSource;
18090
18091   return SDValue();
18092 }
18093
18094 // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
18095 // if the subvector can be sourced for free.
18096 static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
18097   if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
18098       V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
18099     return V.getOperand(1);
18100   }
18101   auto *IndexC = dyn_cast<ConstantSDNode>(Index);
18102   if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
18103       V.getOperand(0).getValueType() == SubVT &&
18104       (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
18105     uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
18106     return V.getOperand(SubIdx);
18107   }
18108   return SDValue();
18109 }
18110
18111 static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
18112                                               SelectionDAG &DAG) {
18113   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18114   SDValue BinOp = Extract->getOperand(0);
18115   unsigned BinOpcode = BinOp.getOpcode();
18116   if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
18117     return SDValue();
18118
18119   EVT VecVT = BinOp.getValueType();
18120   SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
18121   if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
18122     return SDValue();
18123
18124   SDValue Index = Extract->getOperand(1);
18125   EVT SubVT = Extract->getValueType(0);
18126   if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
18127     return SDValue();
18128
18129   SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
18130   SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
18131
18132   // TODO: We could handle the case where only 1 operand is being inserted by
18133   //       creating an extract of the other operand, but that requires checking
18134   //       number of uses and/or costs.
18135   if (!Sub0 || !Sub1)
18136     return SDValue();
18137
18138   // We are inserting both operands of the wide binop only to extract back
18139   // to the narrow vector size. Eliminate all of the insert/extract:
18140   // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
18141   return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
18142                      BinOp->getFlags());
18143 }
18144
18145 /// If we are extracting a subvector produced by a wide binary operator try
18146 /// to use a narrow binary operator and/or avoid concatenation and extraction.
18147 static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
18148   // TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
18149   // some of these bailouts with other transforms.
18150
18151   if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
18152     return V;
18153
18154   // The extract index must be a constant, so we can map it to a concat operand.
18155   auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18156   if (!ExtractIndexC)
18157     return SDValue();
18158
18159   // We are looking for an optionally bitcasted wide vector binary operator
18160   // feeding an extract subvector.
18161   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18162   SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
18163   unsigned BOpcode = BinOp.getOpcode();
18164   if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
18165     return SDValue();
18166
18167   // The binop must be a vector type, so we can extract some fraction of it.
18168   EVT WideBVT = BinOp.getValueType();
18169   if (!WideBVT.isVector())
18170     return SDValue();
18171
18172   EVT VT = Extract->getValueType(0);
18173   unsigned ExtractIndex = ExtractIndexC->getZExtValue();
18174   assert(ExtractIndex % VT.getVectorNumElements() == 0 &&
18175          "Extract index is not a multiple of the vector length.");
18176
18177   // Bail out if this is not a proper multiple width extraction.
18178   unsigned WideWidth = WideBVT.getSizeInBits();
18179   unsigned NarrowWidth = VT.getSizeInBits();
18180   if (WideWidth % NarrowWidth != 0)
18181     return SDValue();
18182
18183   // Bail out if we are extracting a fraction of a single operation. This can
18184   // occur because we potentially looked through a bitcast of the binop.
18185   unsigned NarrowingRatio = WideWidth / NarrowWidth;
18186   unsigned WideNumElts = WideBVT.getVectorNumElements();
18187   if (WideNumElts % NarrowingRatio != 0)
18188     return SDValue();
18189
18190   // Bail out if the target does not support a narrower version of the binop.
18191   EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
18192                                    WideNumElts / NarrowingRatio);
18193   if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
18194     return SDValue();
18195
18196   // If extraction is cheap, we don't need to look at the binop operands
18197   // for concat ops. The narrow binop alone makes this transform profitable.
18198   // We can't just reuse the original extract index operand because we may have
18199   // bitcasted.
18200   unsigned ConcatOpNum = ExtractIndex / VT.getVectorNumElements();
18201   unsigned ExtBOIdx = ConcatOpNum * NarrowBVT.getVectorNumElements();
18202   EVT ExtBOIdxVT = Extract->getOperand(1).getValueType();
18203   if (TLI.isExtractSubvectorCheap(NarrowBVT, WideBVT, ExtBOIdx) &&
18204       BinOp.hasOneUse() && Extract->getOperand(0)->hasOneUse()) {
18205     // extract (binop B0, B1), N --> binop (extract B0, N), (extract B1, N)
18206     SDLoc DL(Extract);
18207     SDValue NewExtIndex = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18208     SDValue X = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18209                             BinOp.getOperand(0), NewExtIndex);
18210     SDValue Y = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18211                             BinOp.getOperand(1), NewExtIndex);
18212     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y,
18213                                       BinOp.getNode()->getFlags());
18214     return DAG.getBitcast(VT, NarrowBinOp);
18215   }
18216
18217   // Only handle the case where we are doubling and then halving. A larger ratio
18218   // may require more than two narrow binops to replace the wide binop.
18219   if (NarrowingRatio != 2)
18220     return SDValue();
18221
18222   // TODO: The motivating case for this transform is an x86 AVX1 target. That
18223   // target has temptingly almost legal versions of bitwise logic ops in 256-bit
18224   // flavors, but no other 256-bit integer support. This could be extended to
18225   // handle any binop, but that may require fixing/adding other folds to avoid
18226   // codegen regressions.
18227   if (BOpcode != ISD::AND && BOpcode != ISD::OR && BOpcode != ISD::XOR)
18228     return SDValue();
18229
18230   // We need at least one concatenation operation of a binop operand to make
18231   // this transform worthwhile. The concat must double the input vector sizes.
18232   auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
18233     if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
18234       return V.getOperand(ConcatOpNum);
18235     return SDValue();
18236   };
18237   SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
18238   SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
18239
18240   if (SubVecL || SubVecR) {
18241     // If a binop operand was not the result of a concat, we must extract a
18242     // half-sized operand for our new narrow binop:
18243     // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
18244     // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
18245     // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
18246     SDLoc DL(Extract);
18247     SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
18248     SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
18249                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18250                                       BinOp.getOperand(0), IndexC);
18251
18252     SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
18253                         : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
18254                                       BinOp.getOperand(1), IndexC);
18255
18256     SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
18257     return DAG.getBitcast(VT, NarrowBinOp);
18258   }
18259
18260   return SDValue();
18261 }
18262
18263 /// If we are extracting a subvector from a wide vector load, convert to a
18264 /// narrow load to eliminate the extraction:
18265 /// (extract_subvector (load wide vector)) --> (load narrow vector)
18266 static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
18267   // TODO: Add support for big-endian. The offset calculation must be adjusted.
18268   if (DAG.getDataLayout().isBigEndian())
18269     return SDValue();
18270
18271   auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
18272   auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
18273   if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
18274     return SDValue();
18275
18276   // Allow targets to opt-out.
18277   EVT VT = Extract->getValueType(0);
18278   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18279   if (!TLI.shouldReduceLoadWidth(Ld, Ld->getExtensionType(), VT))
18280     return SDValue();
18281
18282   // The narrow load will be offset from the base address of the old load if
18283   // we are extracting from something besides index 0 (little-endian).
18284   SDLoc DL(Extract);
18285   SDValue BaseAddr = Ld->getOperand(1);
18286   unsigned Offset = ExtIdx->getZExtValue() * VT.getScalarType().getStoreSize();
18287
18288   // TODO: Use "BaseIndexOffset" to make this more effective.
18289   SDValue NewAddr = DAG.getMemBasePlusOffset(BaseAddr, Offset, DL);
18290   MachineFunction &MF = DAG.getMachineFunction();
18291   MachineMemOperand *MMO = MF.getMachineMemOperand(Ld->getMemOperand(), Offset,
18292                                                    VT.getStoreSize());
18293   SDValue NewLd = DAG.getLoad(VT, DL, Ld->getChain(), NewAddr, MMO);
18294   DAG.makeEquivalentMemoryOrdering(Ld, NewLd);
18295   return NewLd;
18296 }
18297
18298 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
18299   EVT NVT = N->getValueType(0);
18300   SDValue V = N->getOperand(0);
18301
18302   // Extract from UNDEF is UNDEF.
18303   if (V.isUndef())
18304     return DAG.getUNDEF(NVT);
18305
18306   if (TLI.isOperationLegalOrCustomOrPromote(ISD::LOAD, NVT))
18307     if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
18308       return NarrowLoad;
18309
18310   // Combine an extract of an extract into a single extract_subvector.
18311   // ext (ext X, C), 0 --> ext X, C
18312   SDValue Index = N->getOperand(1);
18313   if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
18314       V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
18315     if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
18316                                     V.getConstantOperandVal(1)) &&
18317         TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
18318       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
18319                          V.getOperand(1));
18320     }
18321   }
18322
18323   // Try to move vector bitcast after extract_subv by scaling extraction index:
18324   // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
18325   if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
18326       V.getOperand(0).getValueType().isVector()) {
18327     SDValue SrcOp = V.getOperand(0);
18328     EVT SrcVT = SrcOp.getValueType();
18329     unsigned SrcNumElts = SrcVT.getVectorNumElements();
18330     unsigned DestNumElts = V.getValueType().getVectorNumElements();
18331     if ((SrcNumElts % DestNumElts) == 0) {
18332       unsigned SrcDestRatio = SrcNumElts / DestNumElts;
18333       unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
18334       EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
18335                                       NewExtNumElts);
18336       if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
18337         unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
18338         SDLoc DL(N);
18339         SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
18340         SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
18341                                          V.getOperand(0), NewIndex);
18342         return DAG.getBitcast(NVT, NewExtract);
18343       }
18344     }
18345     // TODO - handle (DestNumElts % SrcNumElts) == 0
18346   }
18347
18348   // Combine:
18349   //    (extract_subvec (concat V1, V2, ...), i)
18350   // Into:
18351   //    Vi if possible
18352   // Only operand 0 is checked as 'concat' assumes all inputs of the same
18353   // type.
18354   if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
18355       V.getOperand(0).getValueType() == NVT) {
18356     unsigned Idx = N->getConstantOperandVal(1);
18357     unsigned NumElems = NVT.getVectorNumElements();
18358     assert((Idx % NumElems) == 0 &&
18359            "IDX in concat is not a multiple of the result vector length.");
18360     return V->getOperand(Idx / NumElems);
18361   }
18362
18363   V = peekThroughBitcasts(V);
18364
18365   // If the input is a build vector. Try to make a smaller build vector.
18366   if (V.getOpcode() == ISD::BUILD_VECTOR) {
18367     if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
18368       EVT InVT = V.getValueType();
18369       unsigned ExtractSize = NVT.getSizeInBits();
18370       unsigned EltSize = InVT.getScalarSizeInBits();
18371       // Only do this if we won't split any elements.
18372       if (ExtractSize % EltSize == 0) {
18373         unsigned NumElems = ExtractSize / EltSize;
18374         EVT EltVT = InVT.getVectorElementType();
18375         EVT ExtractVT = NumElems == 1 ? EltVT
18376                                       : EVT::getVectorVT(*DAG.getContext(),
18377                                                          EltVT, NumElems);
18378         if ((Level < AfterLegalizeDAG ||
18379              (NumElems == 1 ||
18380               TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
18381             (!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
18382           unsigned IdxVal = IdxC->getZExtValue();
18383           IdxVal *= NVT.getScalarSizeInBits();
18384           IdxVal /= EltSize;
18385
18386           if (NumElems == 1) {
18387             SDValue Src = V->getOperand(IdxVal);
18388             if (EltVT != Src.getValueType())
18389               Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
18390             return DAG.getBitcast(NVT, Src);
18391           }
18392
18393           // Extract the pieces from the original build_vector.
18394           SDValue BuildVec = DAG.getBuildVector(
18395               ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
18396           return DAG.getBitcast(NVT, BuildVec);
18397         }
18398       }
18399     }
18400   }
18401
18402   if (V.getOpcode() == ISD::INSERT_SUBVECTOR) {
18403     // Handle only simple case where vector being inserted and vector
18404     // being extracted are of same size.
18405     EVT SmallVT = V.getOperand(1).getValueType();
18406     if (!NVT.bitsEq(SmallVT))
18407       return SDValue();
18408
18409     // Only handle cases where both indexes are constants.
18410     auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
18411     auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
18412     if (InsIdx && ExtIdx) {
18413       // Combine:
18414       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
18415       // Into:
18416       //    indices are equal or bit offsets are equal => V1
18417       //    otherwise => (extract_subvec V1, ExtIdx)
18418       if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
18419           ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
18420         return DAG.getBitcast(NVT, V.getOperand(1));
18421       return DAG.getNode(
18422           ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
18423           DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
18424           Index);
18425     }
18426   }
18427
18428   if (SDValue NarrowBOp = narrowExtractedVectorBinOp(N, DAG))
18429     return NarrowBOp;
18430
18431   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
18432     return SDValue(N, 0);
18433
18434   return SDValue();
18435 }
18436
18437 /// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
18438 /// followed by concatenation. Narrow vector ops may have better performance
18439 /// than wide ops, and this can unlock further narrowing of other vector ops.
18440 /// Targets can invert this transform later if it is not profitable.
18441 static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
18442                                          SelectionDAG &DAG) {
18443   SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
18444   if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
18445       N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
18446       !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
18447     return SDValue();
18448
18449   // Split the wide shuffle mask into halves. Any mask element that is accessing
18450   // operand 1 is offset down to account for narrowing of the vectors.
18451   ArrayRef<int> Mask = Shuf->getMask();
18452   EVT VT = Shuf->getValueType(0);
18453   unsigned NumElts = VT.getVectorNumElements();
18454   unsigned HalfNumElts = NumElts / 2;
18455   SmallVector<int, 16> Mask0(HalfNumElts, -1);
18456   SmallVector<int, 16> Mask1(HalfNumElts, -1);
18457   for (unsigned i = 0; i != NumElts; ++i) {
18458     if (Mask[i] == -1)
18459       continue;
18460     int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
18461     if (i < HalfNumElts)
18462       Mask0[i] = M;
18463     else
18464       Mask1[i - HalfNumElts] = M;
18465   }
18466
18467   // Ask the target if this is a valid transform.
18468   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
18469   EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
18470                                 HalfNumElts);
18471   if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
18472       !TLI.isShuffleMaskLegal(Mask1, HalfVT))
18473     return SDValue();
18474
18475   // shuffle (concat X, undef), (concat Y, undef), Mask -->
18476   // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
18477   SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
18478   SDLoc DL(Shuf);
18479   SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
18480   SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
18481   return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
18482 }
18483
18484 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
18485 // or turn a shuffle of a single concat into simpler shuffle then concat.
18486 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
18487   EVT VT = N->getValueType(0);
18488   unsigned NumElts = VT.getVectorNumElements();
18489
18490   SDValue N0 = N->getOperand(0);
18491   SDValue N1 = N->getOperand(1);
18492   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18493   ArrayRef<int> Mask = SVN->getMask();
18494
18495   SmallVector<SDValue, 4> Ops;
18496   EVT ConcatVT = N0.getOperand(0).getValueType();
18497   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
18498   unsigned NumConcats = NumElts / NumElemsPerConcat;
18499
18500   auto IsUndefMaskElt = [](int i) { return i == -1; };
18501
18502   // Special case: shuffle(concat(A,B)) can be more efficiently represented
18503   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
18504   // half vector elements.
18505   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
18506       llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
18507                    IsUndefMaskElt)) {
18508     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
18509                               N0.getOperand(1),
18510                               Mask.slice(0, NumElemsPerConcat));
18511     N1 = DAG.getUNDEF(ConcatVT);
18512     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
18513   }
18514
18515   // Look at every vector that's inserted. We're looking for exact
18516   // subvector-sized copies from a concatenated vector
18517   for (unsigned I = 0; I != NumConcats; ++I) {
18518     unsigned Begin = I * NumElemsPerConcat;
18519     ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
18520
18521     // Make sure we're dealing with a copy.
18522     if (llvm::all_of(SubMask, IsUndefMaskElt)) {
18523       Ops.push_back(DAG.getUNDEF(ConcatVT));
18524       continue;
18525     }
18526
18527     int OpIdx = -1;
18528     for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
18529       if (IsUndefMaskElt(SubMask[i]))
18530         continue;
18531       if ((SubMask[i] % (int)NumElemsPerConcat) != i)
18532         return SDValue();
18533       int EltOpIdx = SubMask[i] / NumElemsPerConcat;
18534       if (0 <= OpIdx && EltOpIdx != OpIdx)
18535         return SDValue();
18536       OpIdx = EltOpIdx;
18537     }
18538     assert(0 <= OpIdx && "Unknown concat_vectors op");
18539
18540     if (OpIdx < (int)N0.getNumOperands())
18541       Ops.push_back(N0.getOperand(OpIdx));
18542     else
18543       Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
18544   }
18545
18546   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
18547 }
18548
18549 // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
18550 // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
18551 //
18552 // SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
18553 // a simplification in some sense, but it isn't appropriate in general: some
18554 // BUILD_VECTORs are substantially cheaper than others. The general case
18555 // of a BUILD_VECTOR requires inserting each element individually (or
18556 // performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
18557 // all constants is a single constant pool load.  A BUILD_VECTOR where each
18558 // element is identical is a splat.  A BUILD_VECTOR where most of the operands
18559 // are undef lowers to a small number of element insertions.
18560 //
18561 // To deal with this, we currently use a bunch of mostly arbitrary heuristics.
18562 // We don't fold shuffles where one side is a non-zero constant, and we don't
18563 // fold shuffles if the resulting (non-splat) BUILD_VECTOR would have duplicate
18564 // non-constant operands. This seems to work out reasonably well in practice.
18565 static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
18566                                        SelectionDAG &DAG,
18567                                        const TargetLowering &TLI) {
18568   EVT VT = SVN->getValueType(0);
18569   unsigned NumElts = VT.getVectorNumElements();
18570   SDValue N0 = SVN->getOperand(0);
18571   SDValue N1 = SVN->getOperand(1);
18572
18573   if (!N0->hasOneUse())
18574     return SDValue();
18575
18576   // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
18577   // discussed above.
18578   if (!N1.isUndef()) {
18579     if (!N1->hasOneUse())
18580       return SDValue();
18581
18582     bool N0AnyConst = isAnyConstantBuildVector(N0);
18583     bool N1AnyConst = isAnyConstantBuildVector(N1);
18584     if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
18585       return SDValue();
18586     if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
18587       return SDValue();
18588   }
18589
18590   // If both inputs are splats of the same value then we can safely merge this
18591   // to a single BUILD_VECTOR with undef elements based on the shuffle mask.
18592   bool IsSplat = false;
18593   auto *BV0 = dyn_cast<BuildVectorSDNode>(N0);
18594   auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
18595   if (BV0 && BV1)
18596     if (SDValue Splat0 = BV0->getSplatValue())
18597       IsSplat = (Splat0 == BV1->getSplatValue());
18598
18599   SmallVector<SDValue, 8> Ops;
18600   SmallSet<SDValue, 16> DuplicateOps;
18601   for (int M : SVN->getMask()) {
18602     SDValue Op = DAG.getUNDEF(VT.getScalarType());
18603     if (M >= 0) {
18604       int Idx = M < (int)NumElts ? M : M - NumElts;
18605       SDValue &S = (M < (int)NumElts ? N0 : N1);
18606       if (S.getOpcode() == ISD::BUILD_VECTOR) {
18607         Op = S.getOperand(Idx);
18608       } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
18609         SDValue Op0 = S.getOperand(0);
18610         Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
18611       } else {
18612         // Operand can't be combined - bail out.
18613         return SDValue();
18614       }
18615     }
18616
18617     // Don't duplicate a non-constant BUILD_VECTOR operand unless we're
18618     // generating a splat; semantically, this is fine, but it's likely to
18619     // generate low-quality code if the target can't reconstruct an appropriate
18620     // shuffle.
18621     if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
18622       if (!IsSplat && !DuplicateOps.insert(Op).second)
18623         return SDValue();
18624
18625     Ops.push_back(Op);
18626   }
18627
18628   // BUILD_VECTOR requires all inputs to be of the same type, find the
18629   // maximum type and extend them all.
18630   EVT SVT = VT.getScalarType();
18631   if (SVT.isInteger())
18632     for (SDValue &Op : Ops)
18633       SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
18634   if (SVT != VT.getScalarType())
18635     for (SDValue &Op : Ops)
18636       Op = TLI.isZExtFree(Op.getValueType(), SVT)
18637                ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
18638                : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
18639   return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
18640 }
18641
18642 // Match shuffles that can be converted to any_vector_extend_in_reg.
18643 // This is often generated during legalization.
18644 // e.g. v4i32 <0,u,1,u> -> (v2i64 any_vector_extend_in_reg(v4i32 src))
18645 // TODO Add support for ZERO_EXTEND_VECTOR_INREG when we have a test case.
18646 static SDValue combineShuffleToVectorExtend(ShuffleVectorSDNode *SVN,
18647                                             SelectionDAG &DAG,
18648                                             const TargetLowering &TLI,
18649                                             bool LegalOperations) {
18650   EVT VT = SVN->getValueType(0);
18651   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18652
18653   // TODO Add support for big-endian when we have a test case.
18654   if (!VT.isInteger() || IsBigEndian)
18655     return SDValue();
18656
18657   unsigned NumElts = VT.getVectorNumElements();
18658   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18659   ArrayRef<int> Mask = SVN->getMask();
18660   SDValue N0 = SVN->getOperand(0);
18661
18662   // shuffle<0,-1,1,-1> == (v2i64 anyextend_vector_inreg(v4i32))
18663   auto isAnyExtend = [&Mask, &NumElts](unsigned Scale) {
18664     for (unsigned i = 0; i != NumElts; ++i) {
18665       if (Mask[i] < 0)
18666         continue;
18667       if ((i % Scale) == 0 && Mask[i] == (int)(i / Scale))
18668         continue;
18669       return false;
18670     }
18671     return true;
18672   };
18673
18674   // Attempt to match a '*_extend_vector_inreg' shuffle, we just search for
18675   // power-of-2 extensions as they are the most likely.
18676   for (unsigned Scale = 2; Scale < NumElts; Scale *= 2) {
18677     // Check for non power of 2 vector sizes
18678     if (NumElts % Scale != 0)
18679       continue;
18680     if (!isAnyExtend(Scale))
18681       continue;
18682
18683     EVT OutSVT = EVT::getIntegerVT(*DAG.getContext(), EltSizeInBits * Scale);
18684     EVT OutVT = EVT::getVectorVT(*DAG.getContext(), OutSVT, NumElts / Scale);
18685     // Never create an illegal type. Only create unsupported operations if we
18686     // are pre-legalization.
18687     if (TLI.isTypeLegal(OutVT))
18688       if (!LegalOperations ||
18689           TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND_VECTOR_INREG, OutVT))
18690         return DAG.getBitcast(VT,
18691                               DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG,
18692                                           SDLoc(SVN), OutVT, N0));
18693   }
18694
18695   return SDValue();
18696 }
18697
18698 // Detect 'truncate_vector_inreg' style shuffles that pack the lower parts of
18699 // each source element of a large type into the lowest elements of a smaller
18700 // destination type. This is often generated during legalization.
18701 // If the source node itself was a '*_extend_vector_inreg' node then we should
18702 // then be able to remove it.
18703 static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
18704                                         SelectionDAG &DAG) {
18705   EVT VT = SVN->getValueType(0);
18706   bool IsBigEndian = DAG.getDataLayout().isBigEndian();
18707
18708   // TODO Add support for big-endian when we have a test case.
18709   if (!VT.isInteger() || IsBigEndian)
18710     return SDValue();
18711
18712   SDValue N0 = peekThroughBitcasts(SVN->getOperand(0));
18713
18714   unsigned Opcode = N0.getOpcode();
18715   if (Opcode != ISD::ANY_EXTEND_VECTOR_INREG &&
18716       Opcode != ISD::SIGN_EXTEND_VECTOR_INREG &&
18717       Opcode != ISD::ZERO_EXTEND_VECTOR_INREG)
18718     return SDValue();
18719
18720   SDValue N00 = N0.getOperand(0);
18721   ArrayRef<int> Mask = SVN->getMask();
18722   unsigned NumElts = VT.getVectorNumElements();
18723   unsigned EltSizeInBits = VT.getScalarSizeInBits();
18724   unsigned ExtSrcSizeInBits = N00.getScalarValueSizeInBits();
18725   unsigned ExtDstSizeInBits = N0.getScalarValueSizeInBits();
18726
18727   if (ExtDstSizeInBits % ExtSrcSizeInBits != 0)
18728     return SDValue();
18729   unsigned ExtScale = ExtDstSizeInBits / ExtSrcSizeInBits;
18730
18731   // (v4i32 truncate_vector_inreg(v2i64)) == shuffle<0,2-1,-1>
18732   // (v8i16 truncate_vector_inreg(v4i32)) == shuffle<0,2,4,6,-1,-1,-1,-1>
18733   // (v8i16 truncate_vector_inreg(v2i64)) == shuffle<0,4,-1,-1,-1,-1,-1,-1>
18734   auto isTruncate = [&Mask, &NumElts](unsigned Scale) {
18735     for (unsigned i = 0; i != NumElts; ++i) {
18736       if (Mask[i] < 0)
18737         continue;
18738       if ((i * Scale) < NumElts && Mask[i] == (int)(i * Scale))
18739         continue;
18740       return false;
18741     }
18742     return true;
18743   };
18744
18745   // At the moment we just handle the case where we've truncated back to the
18746   // same size as before the extension.
18747   // TODO: handle more extension/truncation cases as cases arise.
18748   if (EltSizeInBits != ExtSrcSizeInBits)
18749     return SDValue();
18750
18751   // We can remove *extend_vector_inreg only if the truncation happens at
18752   // the same scale as the extension.
18753   if (isTruncate(ExtScale))
18754     return DAG.getBitcast(VT, N00);
18755
18756   return SDValue();
18757 }
18758
18759 // Combine shuffles of splat-shuffles of the form:
18760 // shuffle (shuffle V, undef, splat-mask), undef, M
18761 // If splat-mask contains undef elements, we need to be careful about
18762 // introducing undef's in the folded mask which are not the result of composing
18763 // the masks of the shuffles.
18764 static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
18765                                         SelectionDAG &DAG) {
18766   if (!Shuf->getOperand(1).isUndef())
18767     return SDValue();
18768   auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18769   if (!Splat || !Splat->isSplat())
18770     return SDValue();
18771
18772   ArrayRef<int> ShufMask = Shuf->getMask();
18773   ArrayRef<int> SplatMask = Splat->getMask();
18774   assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
18775
18776   // Prefer simplifying to the splat-shuffle, if possible. This is legal if
18777   // every undef mask element in the splat-shuffle has a corresponding undef
18778   // element in the user-shuffle's mask or if the composition of mask elements
18779   // would result in undef.
18780   // Examples for (shuffle (shuffle v, undef, SplatMask), undef, UserMask):
18781   // * UserMask=[0,2,u,u], SplatMask=[2,u,2,u] -> [2,2,u,u]
18782   //   In this case it is not legal to simplify to the splat-shuffle because we
18783   //   may be exposing the users of the shuffle an undef element at index 1
18784   //   which was not there before the combine.
18785   // * UserMask=[0,u,2,u], SplatMask=[2,u,2,u] -> [2,u,2,u]
18786   //   In this case the composition of masks yields SplatMask, so it's ok to
18787   //   simplify to the splat-shuffle.
18788   // * UserMask=[3,u,2,u], SplatMask=[2,u,2,u] -> [u,u,2,u]
18789   //   In this case the composed mask includes all undef elements of SplatMask
18790   //   and in addition sets element zero to undef. It is safe to simplify to
18791   //   the splat-shuffle.
18792   auto CanSimplifyToExistingSplat = [](ArrayRef<int> UserMask,
18793                                        ArrayRef<int> SplatMask) {
18794     for (unsigned i = 0, e = UserMask.size(); i != e; ++i)
18795       if (UserMask[i] != -1 && SplatMask[i] == -1 &&
18796           SplatMask[UserMask[i]] != -1)
18797         return false;
18798     return true;
18799   };
18800   if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
18801     return Shuf->getOperand(0);
18802
18803   // Create a new shuffle with a mask that is composed of the two shuffles'
18804   // masks.
18805   SmallVector<int, 32> NewMask;
18806   for (int Idx : ShufMask)
18807     NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
18808
18809   return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
18810                               Splat->getOperand(0), Splat->getOperand(1),
18811                               NewMask);
18812 }
18813
18814 /// If the shuffle mask is taking exactly one element from the first vector
18815 /// operand and passing through all other elements from the second vector
18816 /// operand, return the index of the mask element that is choosing an element
18817 /// from the first operand. Otherwise, return -1.
18818 static int getShuffleMaskIndexOfOneElementFromOp0IntoOp1(ArrayRef<int> Mask) {
18819   int MaskSize = Mask.size();
18820   int EltFromOp0 = -1;
18821   // TODO: This does not match if there are undef elements in the shuffle mask.
18822   // Should we ignore undefs in the shuffle mask instead? The trade-off is
18823   // removing an instruction (a shuffle), but losing the knowledge that some
18824   // vector lanes are not needed.
18825   for (int i = 0; i != MaskSize; ++i) {
18826     if (Mask[i] >= 0 && Mask[i] < MaskSize) {
18827       // We're looking for a shuffle of exactly one element from operand 0.
18828       if (EltFromOp0 != -1)
18829         return -1;
18830       EltFromOp0 = i;
18831     } else if (Mask[i] != i + MaskSize) {
18832       // Nothing from operand 1 can change lanes.
18833       return -1;
18834     }
18835   }
18836   return EltFromOp0;
18837 }
18838
18839 /// If a shuffle inserts exactly one element from a source vector operand into
18840 /// another vector operand and we can access the specified element as a scalar,
18841 /// then we can eliminate the shuffle.
18842 static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
18843                                       SelectionDAG &DAG) {
18844   // First, check if we are taking one element of a vector and shuffling that
18845   // element into another vector.
18846   ArrayRef<int> Mask = Shuf->getMask();
18847   SmallVector<int, 16> CommutedMask(Mask.begin(), Mask.end());
18848   SDValue Op0 = Shuf->getOperand(0);
18849   SDValue Op1 = Shuf->getOperand(1);
18850   int ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(Mask);
18851   if (ShufOp0Index == -1) {
18852     // Commute mask and check again.
18853     ShuffleVectorSDNode::commuteMask(CommutedMask);
18854     ShufOp0Index = getShuffleMaskIndexOfOneElementFromOp0IntoOp1(CommutedMask);
18855     if (ShufOp0Index == -1)
18856       return SDValue();
18857     // Commute operands to match the commuted shuffle mask.
18858     std::swap(Op0, Op1);
18859     Mask = CommutedMask;
18860   }
18861
18862   // The shuffle inserts exactly one element from operand 0 into operand 1.
18863   // Now see if we can access that element as a scalar via a real insert element
18864   // instruction.
18865   // TODO: We can try harder to locate the element as a scalar. Examples: it
18866   // could be an operand of SCALAR_TO_VECTOR, BUILD_VECTOR, or a constant.
18867   assert(Mask[ShufOp0Index] >= 0 && Mask[ShufOp0Index] < (int)Mask.size() &&
18868          "Shuffle mask value must be from operand 0");
18869   if (Op0.getOpcode() != ISD::INSERT_VECTOR_ELT)
18870     return SDValue();
18871
18872   auto *InsIndexC = dyn_cast<ConstantSDNode>(Op0.getOperand(2));
18873   if (!InsIndexC || InsIndexC->getSExtValue() != Mask[ShufOp0Index])
18874     return SDValue();
18875
18876   // There's an existing insertelement with constant insertion index, so we
18877   // don't need to check the legality/profitability of a replacement operation
18878   // that differs at most in the constant value. The target should be able to
18879   // lower any of those in a similar way. If not, legalization will expand this
18880   // to a scalar-to-vector plus shuffle.
18881   //
18882   // Note that the shuffle may move the scalar from the position that the insert
18883   // element used. Therefore, our new insert element occurs at the shuffle's
18884   // mask index value, not the insert's index value.
18885   // shuffle (insertelt v1, x, C), v2, mask --> insertelt v2, x, C'
18886   SDValue NewInsIndex = DAG.getConstant(ShufOp0Index, SDLoc(Shuf),
18887                                         Op0.getOperand(2).getValueType());
18888   return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Shuf), Op0.getValueType(),
18889                      Op1, Op0.getOperand(1), NewInsIndex);
18890 }
18891
18892 /// If we have a unary shuffle of a shuffle, see if it can be folded away
18893 /// completely. This has the potential to lose undef knowledge because the first
18894 /// shuffle may not have an undef mask element where the second one does. So
18895 /// only call this after doing simplifications based on demanded elements.
18896 static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
18897   // shuf (shuf0 X, Y, Mask0), undef, Mask
18898   auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
18899   if (!Shuf0 || !Shuf->getOperand(1).isUndef())
18900     return SDValue();
18901
18902   ArrayRef<int> Mask = Shuf->getMask();
18903   ArrayRef<int> Mask0 = Shuf0->getMask();
18904   for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
18905     // Ignore undef elements.
18906     if (Mask[i] == -1)
18907       continue;
18908     assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
18909
18910     // Is the element of the shuffle operand chosen by this shuffle the same as
18911     // the element chosen by the shuffle operand itself?
18912     if (Mask0[Mask[i]] != Mask0[i])
18913       return SDValue();
18914   }
18915   // Every element of this shuffle is identical to the result of the previous
18916   // shuffle, so we can replace this value.
18917   return Shuf->getOperand(0);
18918 }
18919
18920 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
18921   EVT VT = N->getValueType(0);
18922   unsigned NumElts = VT.getVectorNumElements();
18923
18924   SDValue N0 = N->getOperand(0);
18925   SDValue N1 = N->getOperand(1);
18926
18927   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
18928
18929   // Canonicalize shuffle undef, undef -> undef
18930   if (N0.isUndef() && N1.isUndef())
18931     return DAG.getUNDEF(VT);
18932
18933   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
18934
18935   // Canonicalize shuffle v, v -> v, undef
18936   if (N0 == N1) {
18937     SmallVector<int, 8> NewMask;
18938     for (unsigned i = 0; i != NumElts; ++i) {
18939       int Idx = SVN->getMaskElt(i);
18940       if (Idx >= (int)NumElts) Idx -= NumElts;
18941       NewMask.push_back(Idx);
18942     }
18943     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
18944   }
18945
18946   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
18947   if (N0.isUndef())
18948     return DAG.getCommutedVectorShuffle(*SVN);
18949
18950   // Remove references to rhs if it is undef
18951   if (N1.isUndef()) {
18952     bool Changed = false;
18953     SmallVector<int, 8> NewMask;
18954     for (unsigned i = 0; i != NumElts; ++i) {
18955       int Idx = SVN->getMaskElt(i);
18956       if (Idx >= (int)NumElts) {
18957         Idx = -1;
18958         Changed = true;
18959       }
18960       NewMask.push_back(Idx);
18961     }
18962     if (Changed)
18963       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
18964   }
18965
18966   if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
18967     return InsElt;
18968
18969   // A shuffle of a single vector that is a splatted value can always be folded.
18970   if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
18971     return V;
18972
18973   // If it is a splat, check if the argument vector is another splat or a
18974   // build_vector.
18975   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
18976     int SplatIndex = SVN->getSplatIndex();
18977     if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
18978         TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
18979       // splat (vector_bo L, R), Index -->
18980       // splat (scalar_bo (extelt L, Index), (extelt R, Index))
18981       SDValue L = N0.getOperand(0), R = N0.getOperand(1);
18982       SDLoc DL(N);
18983       EVT EltVT = VT.getScalarType();
18984       SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
18985       SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
18986       SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
18987       SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
18988                                   N0.getNode()->getFlags());
18989       SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
18990       SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
18991       return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
18992     }
18993
18994     // If this is a bit convert that changes the element type of the vector but
18995     // not the number of vector elements, look through it.  Be careful not to
18996     // look though conversions that change things like v4f32 to v2f64.
18997     SDNode *V = N0.getNode();
18998     if (V->getOpcode() == ISD::BITCAST) {
18999       SDValue ConvInput = V->getOperand(0);
19000       if (ConvInput.getValueType().isVector() &&
19001           ConvInput.getValueType().getVectorNumElements() == NumElts)
19002         V = ConvInput.getNode();
19003     }
19004
19005     if (V->getOpcode() == ISD::BUILD_VECTOR) {
19006       assert(V->getNumOperands() == NumElts &&
19007              "BUILD_VECTOR has wrong number of operands");
19008       SDValue Base;
19009       bool AllSame = true;
19010       for (unsigned i = 0; i != NumElts; ++i) {
19011         if (!V->getOperand(i).isUndef()) {
19012           Base = V->getOperand(i);
19013           break;
19014         }
19015       }
19016       // Splat of <u, u, u, u>, return <u, u, u, u>
19017       if (!Base.getNode())
19018         return N0;
19019       for (unsigned i = 0; i != NumElts; ++i) {
19020         if (V->getOperand(i) != Base) {
19021           AllSame = false;
19022           break;
19023         }
19024       }
19025       // Splat of <x, x, x, x>, return <x, x, x, x>
19026       if (AllSame)
19027         return N0;
19028
19029       // Canonicalize any other splat as a build_vector.
19030       SDValue Splatted = V->getOperand(SplatIndex);
19031       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
19032       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
19033
19034       // We may have jumped through bitcasts, so the type of the
19035       // BUILD_VECTOR may not match the type of the shuffle.
19036       if (V->getValueType(0) != VT)
19037         NewBV = DAG.getBitcast(VT, NewBV);
19038       return NewBV;
19039     }
19040   }
19041
19042   // Simplify source operands based on shuffle mask.
19043   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19044     return SDValue(N, 0);
19045
19046   // This is intentionally placed after demanded elements simplification because
19047   // it could eliminate knowledge of undef elements created by this shuffle.
19048   if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
19049     return ShufOp;
19050
19051   // Match shuffles that can be converted to any_vector_extend_in_reg.
19052   if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
19053     return V;
19054
19055   // Combine "truncate_vector_in_reg" style shuffles.
19056   if (SDValue V = combineTruncationShuffle(SVN, DAG))
19057     return V;
19058
19059   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
19060       Level < AfterLegalizeVectorOps &&
19061       (N1.isUndef() ||
19062       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
19063        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
19064     if (SDValue V = partitionShuffleOfConcats(N, DAG))
19065       return V;
19066   }
19067
19068   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
19069   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
19070   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT))
19071     if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
19072       return Res;
19073
19074   // If this shuffle only has a single input that is a bitcasted shuffle,
19075   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
19076   // back to their original types.
19077   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
19078       N1.isUndef() && Level < AfterLegalizeVectorOps &&
19079       TLI.isTypeLegal(VT)) {
19080     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
19081       if (Scale == 1)
19082         return SmallVector<int, 8>(Mask.begin(), Mask.end());
19083
19084       SmallVector<int, 8> NewMask;
19085       for (int M : Mask)
19086         for (int s = 0; s != Scale; ++s)
19087           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
19088       return NewMask;
19089     };
19090
19091     SDValue BC0 = peekThroughOneUseBitcasts(N0);
19092     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
19093       EVT SVT = VT.getScalarType();
19094       EVT InnerVT = BC0->getValueType(0);
19095       EVT InnerSVT = InnerVT.getScalarType();
19096
19097       // Determine which shuffle works with the smaller scalar type.
19098       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
19099       EVT ScaleSVT = ScaleVT.getScalarType();
19100
19101       if (TLI.isTypeLegal(ScaleVT) &&
19102           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
19103           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
19104         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19105         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
19106
19107         // Scale the shuffle masks to the smaller scalar type.
19108         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
19109         SmallVector<int, 8> InnerMask =
19110             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
19111         SmallVector<int, 8> OuterMask =
19112             ScaleShuffleMask(SVN->getMask(), OuterScale);
19113
19114         // Merge the shuffle masks.
19115         SmallVector<int, 8> NewMask;
19116         for (int M : OuterMask)
19117           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
19118
19119         // Test for shuffle mask legality over both commutations.
19120         SDValue SV0 = BC0->getOperand(0);
19121         SDValue SV1 = BC0->getOperand(1);
19122         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19123         if (!LegalMask) {
19124           std::swap(SV0, SV1);
19125           ShuffleVectorSDNode::commuteMask(NewMask);
19126           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
19127         }
19128
19129         if (LegalMask) {
19130           SV0 = DAG.getBitcast(ScaleVT, SV0);
19131           SV1 = DAG.getBitcast(ScaleVT, SV1);
19132           return DAG.getBitcast(
19133               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
19134         }
19135       }
19136     }
19137   }
19138
19139   // Canonicalize shuffles according to rules:
19140   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
19141   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
19142   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
19143   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
19144       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
19145       TLI.isTypeLegal(VT)) {
19146     // The incoming shuffle must be of the same type as the result of the
19147     // current shuffle.
19148     assert(N1->getOperand(0).getValueType() == VT &&
19149            "Shuffle types don't match");
19150
19151     SDValue SV0 = N1->getOperand(0);
19152     SDValue SV1 = N1->getOperand(1);
19153     bool HasSameOp0 = N0 == SV0;
19154     bool IsSV1Undef = SV1.isUndef();
19155     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
19156       // Commute the operands of this shuffle so that next rule
19157       // will trigger.
19158       return DAG.getCommutedVectorShuffle(*SVN);
19159   }
19160
19161   // Try to fold according to rules:
19162   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19163   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19164   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19165   // Don't try to fold shuffles with illegal type.
19166   // Only fold if this shuffle is the only user of the other shuffle.
19167   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
19168       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
19169     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
19170
19171     // Don't try to fold splats; they're likely to simplify somehow, or they
19172     // might be free.
19173     if (OtherSV->isSplat())
19174       return SDValue();
19175
19176     // The incoming shuffle must be of the same type as the result of the
19177     // current shuffle.
19178     assert(OtherSV->getOperand(0).getValueType() == VT &&
19179            "Shuffle types don't match");
19180
19181     SDValue SV0, SV1;
19182     SmallVector<int, 4> Mask;
19183     // Compute the combined shuffle mask for a shuffle with SV0 as the first
19184     // operand, and SV1 as the second operand.
19185     for (unsigned i = 0; i != NumElts; ++i) {
19186       int Idx = SVN->getMaskElt(i);
19187       if (Idx < 0) {
19188         // Propagate Undef.
19189         Mask.push_back(Idx);
19190         continue;
19191       }
19192
19193       SDValue CurrentVec;
19194       if (Idx < (int)NumElts) {
19195         // This shuffle index refers to the inner shuffle N0. Lookup the inner
19196         // shuffle mask to identify which vector is actually referenced.
19197         Idx = OtherSV->getMaskElt(Idx);
19198         if (Idx < 0) {
19199           // Propagate Undef.
19200           Mask.push_back(Idx);
19201           continue;
19202         }
19203
19204         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
19205                                            : OtherSV->getOperand(1);
19206       } else {
19207         // This shuffle index references an element within N1.
19208         CurrentVec = N1;
19209       }
19210
19211       // Simple case where 'CurrentVec' is UNDEF.
19212       if (CurrentVec.isUndef()) {
19213         Mask.push_back(-1);
19214         continue;
19215       }
19216
19217       // Canonicalize the shuffle index. We don't know yet if CurrentVec
19218       // will be the first or second operand of the combined shuffle.
19219       Idx = Idx % NumElts;
19220       if (!SV0.getNode() || SV0 == CurrentVec) {
19221         // Ok. CurrentVec is the left hand side.
19222         // Update the mask accordingly.
19223         SV0 = CurrentVec;
19224         Mask.push_back(Idx);
19225         continue;
19226       }
19227
19228       // Bail out if we cannot convert the shuffle pair into a single shuffle.
19229       if (SV1.getNode() && SV1 != CurrentVec)
19230         return SDValue();
19231
19232       // Ok. CurrentVec is the right hand side.
19233       // Update the mask accordingly.
19234       SV1 = CurrentVec;
19235       Mask.push_back(Idx + NumElts);
19236     }
19237
19238     // Check if all indices in Mask are Undef. In case, propagate Undef.
19239     bool isUndefMask = true;
19240     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
19241       isUndefMask &= Mask[i] < 0;
19242
19243     if (isUndefMask)
19244       return DAG.getUNDEF(VT);
19245
19246     if (!SV0.getNode())
19247       SV0 = DAG.getUNDEF(VT);
19248     if (!SV1.getNode())
19249       SV1 = DAG.getUNDEF(VT);
19250
19251     // Avoid introducing shuffles with illegal mask.
19252     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
19253     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
19254     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
19255     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
19256     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
19257     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
19258     return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
19259   }
19260
19261   if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
19262     return V;
19263
19264   return SDValue();
19265 }
19266
19267 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
19268   SDValue InVal = N->getOperand(0);
19269   EVT VT = N->getValueType(0);
19270
19271   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
19272   // with a VECTOR_SHUFFLE and possible truncate.
19273   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
19274     SDValue InVec = InVal->getOperand(0);
19275     SDValue EltNo = InVal->getOperand(1);
19276     auto InVecT = InVec.getValueType();
19277     if (ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo)) {
19278       SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
19279       int Elt = C0->getZExtValue();
19280       NewMask[0] = Elt;
19281       // If we have an implict truncate do truncate here as long as it's legal.
19282       // if it's not legal, this should
19283       if (VT.getScalarType() != InVal.getValueType() &&
19284           InVal.getValueType().isScalarInteger() &&
19285           isTypeLegal(VT.getScalarType())) {
19286         SDValue Val =
19287             DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
19288         return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
19289       }
19290       if (VT.getScalarType() == InVecT.getScalarType() &&
19291           VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
19292         SDValue LegalShuffle =
19293           TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
19294                                       DAG.getUNDEF(InVecT), NewMask, DAG);
19295         if (LegalShuffle) {
19296           // If the initial vector is the correct size this shuffle is a
19297           // valid result.
19298           if (VT == InVecT)
19299             return LegalShuffle;
19300           // If not we must truncate the vector.
19301           if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
19302             MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
19303             SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
19304             EVT SubVT =
19305                 EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
19306                                  VT.getVectorNumElements());
19307             return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
19308                                LegalShuffle, ZeroIdx);
19309           }
19310         }
19311       }
19312     }
19313   }
19314
19315   return SDValue();
19316 }
19317
19318 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
19319   EVT VT = N->getValueType(0);
19320   SDValue N0 = N->getOperand(0);
19321   SDValue N1 = N->getOperand(1);
19322   SDValue N2 = N->getOperand(2);
19323
19324   // If inserting an UNDEF, just return the original vector.
19325   if (N1.isUndef())
19326     return N0;
19327
19328   // If this is an insert of an extracted vector into an undef vector, we can
19329   // just use the input to the extract.
19330   if (N0.isUndef() && N1.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19331       N1.getOperand(1) == N2 && N1.getOperand(0).getValueType() == VT)
19332     return N1.getOperand(0);
19333
19334   // If we are inserting a bitcast value into an undef, with the same
19335   // number of elements, just use the bitcast input of the extract.
19336   // i.e. INSERT_SUBVECTOR UNDEF (BITCAST N1) N2 ->
19337   //        BITCAST (INSERT_SUBVECTOR UNDEF N1 N2)
19338   if (N0.isUndef() && N1.getOpcode() == ISD::BITCAST &&
19339       N1.getOperand(0).getOpcode() == ISD::EXTRACT_SUBVECTOR &&
19340       N1.getOperand(0).getOperand(1) == N2 &&
19341       N1.getOperand(0).getOperand(0).getValueType().getVectorNumElements() ==
19342           VT.getVectorNumElements() &&
19343       N1.getOperand(0).getOperand(0).getValueType().getSizeInBits() ==
19344           VT.getSizeInBits()) {
19345     return DAG.getBitcast(VT, N1.getOperand(0).getOperand(0));
19346   }
19347
19348   // If both N1 and N2 are bitcast values on which insert_subvector
19349   // would makes sense, pull the bitcast through.
19350   // i.e. INSERT_SUBVECTOR (BITCAST N0) (BITCAST N1) N2 ->
19351   //        BITCAST (INSERT_SUBVECTOR N0 N1 N2)
19352   if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST) {
19353     SDValue CN0 = N0.getOperand(0);
19354     SDValue CN1 = N1.getOperand(0);
19355     EVT CN0VT = CN0.getValueType();
19356     EVT CN1VT = CN1.getValueType();
19357     if (CN0VT.isVector() && CN1VT.isVector() &&
19358         CN0VT.getVectorElementType() == CN1VT.getVectorElementType() &&
19359         CN0VT.getVectorNumElements() == VT.getVectorNumElements()) {
19360       SDValue NewINSERT = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N),
19361                                       CN0.getValueType(), CN0, CN1, N2);
19362       return DAG.getBitcast(VT, NewINSERT);
19363     }
19364   }
19365
19366   // Combine INSERT_SUBVECTORs where we are inserting to the same index.
19367   // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
19368   // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
19369   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
19370       N0.getOperand(1).getValueType() == N1.getValueType() &&
19371       N0.getOperand(2) == N2)
19372     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
19373                        N1, N2);
19374
19375   // Eliminate an intermediate insert into an undef vector:
19376   // insert_subvector undef, (insert_subvector undef, X, 0), N2 -->
19377   // insert_subvector undef, X, N2
19378   if (N0.isUndef() && N1.getOpcode() == ISD::INSERT_SUBVECTOR &&
19379       N1.getOperand(0).isUndef() && isNullConstant(N1.getOperand(2)))
19380     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0,
19381                        N1.getOperand(1), N2);
19382
19383   if (!isa<ConstantSDNode>(N2))
19384     return SDValue();
19385
19386   uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
19387
19388   // Push subvector bitcasts to the output, adjusting the index as we go.
19389   // insert_subvector(bitcast(v), bitcast(s), c1)
19390   // -> bitcast(insert_subvector(v, s, c2))
19391   if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
19392       N1.getOpcode() == ISD::BITCAST) {
19393     SDValue N0Src = peekThroughBitcasts(N0);
19394     SDValue N1Src = peekThroughBitcasts(N1);
19395     EVT N0SrcSVT = N0Src.getValueType().getScalarType();
19396     EVT N1SrcSVT = N1Src.getValueType().getScalarType();
19397     if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
19398         N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
19399       EVT NewVT;
19400       SDLoc DL(N);
19401       SDValue NewIdx;
19402       MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
19403       LLVMContext &Ctx = *DAG.getContext();
19404       unsigned NumElts = VT.getVectorNumElements();
19405       unsigned EltSizeInBits = VT.getScalarSizeInBits();
19406       if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
19407         unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
19408         NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
19409         NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
19410       } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
19411         unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
19412         if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
19413           NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
19414           NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
19415         }
19416       }
19417       if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
19418         SDValue Res = DAG.getBitcast(NewVT, N0Src);
19419         Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
19420         return DAG.getBitcast(VT, Res);
19421       }
19422     }
19423   }
19424
19425   // Canonicalize insert_subvector dag nodes.
19426   // Example:
19427   // (insert_subvector (insert_subvector A, Idx0), Idx1)
19428   // -> (insert_subvector (insert_subvector A, Idx1), Idx0)
19429   if (N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.hasOneUse() &&
19430       N1.getValueType() == N0.getOperand(1).getValueType() &&
19431       isa<ConstantSDNode>(N0.getOperand(2))) {
19432     unsigned OtherIdx = N0.getConstantOperandVal(2);
19433     if (InsIdx < OtherIdx) {
19434       // Swap nodes.
19435       SDValue NewOp = DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT,
19436                                   N0.getOperand(0), N1, N2);
19437       AddToWorklist(NewOp.getNode());
19438       return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N0.getNode()),
19439                          VT, NewOp, N0.getOperand(1), N0.getOperand(2));
19440     }
19441   }
19442
19443   // If the input vector is a concatenation, and the insert replaces
19444   // one of the pieces, we can optimize into a single concat_vectors.
19445   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0.hasOneUse() &&
19446       N0.getOperand(0).getValueType() == N1.getValueType()) {
19447     unsigned Factor = N1.getValueType().getVectorNumElements();
19448
19449     SmallVector<SDValue, 8> Ops(N0->op_begin(), N0->op_end());
19450     Ops[cast<ConstantSDNode>(N2)->getZExtValue() / Factor] = N1;
19451
19452     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
19453   }
19454
19455   // Simplify source operands based on insertion.
19456   if (SimplifyDemandedVectorElts(SDValue(N, 0)))
19457     return SDValue(N, 0);
19458
19459   return SDValue();
19460 }
19461
19462 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
19463   SDValue N0 = N->getOperand(0);
19464
19465   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
19466   if (N0->getOpcode() == ISD::FP16_TO_FP)
19467     return N0->getOperand(0);
19468
19469   return SDValue();
19470 }
19471
19472 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
19473   SDValue N0 = N->getOperand(0);
19474
19475   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
19476   if (N0->getOpcode() == ISD::AND) {
19477     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
19478     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
19479       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
19480                          N0.getOperand(0));
19481     }
19482   }
19483
19484   return SDValue();
19485 }
19486
19487 SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
19488   SDValue N0 = N->getOperand(0);
19489   EVT VT = N0.getValueType();
19490   unsigned Opcode = N->getOpcode();
19491
19492   // VECREDUCE over 1-element vector is just an extract.
19493   if (VT.getVectorNumElements() == 1) {
19494     SDLoc dl(N);
19495     SDValue Res = DAG.getNode(
19496         ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
19497         DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
19498     if (Res.getValueType() != N->getValueType(0))
19499       Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
19500     return Res;
19501   }
19502
19503   // On an boolean vector an and/or reduction is the same as a umin/umax
19504   // reduction. Convert them if the latter is legal while the former isn't.
19505   if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
19506     unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
19507         ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
19508     if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
19509         TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
19510         DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
19511       return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
19512   }
19513
19514   return SDValue();
19515 }
19516
19517 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
19518 /// with the destination vector and a zero vector.
19519 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
19520 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
19521 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
19522   assert(N->getOpcode() == ISD::AND && "Unexpected opcode!");
19523
19524   EVT VT = N->getValueType(0);
19525   SDValue LHS = N->getOperand(0);
19526   SDValue RHS = peekThroughBitcasts(N->getOperand(1));
19527   SDLoc DL(N);
19528
19529   // Make sure we're not running after operation legalization where it
19530   // may have custom lowered the vector shuffles.
19531   if (LegalOperations)
19532     return SDValue();
19533
19534   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
19535     return SDValue();
19536
19537   EVT RVT = RHS.getValueType();
19538   unsigned NumElts = RHS.getNumOperands();
19539
19540   // Attempt to create a valid clear mask, splitting the mask into
19541   // sub elements and checking to see if each is
19542   // all zeros or all ones - suitable for shuffle masking.
19543   auto BuildClearMask = [&](int Split) {
19544     int NumSubElts = NumElts * Split;
19545     int NumSubBits = RVT.getScalarSizeInBits() / Split;
19546
19547     SmallVector<int, 8> Indices;
19548     for (int i = 0; i != NumSubElts; ++i) {
19549       int EltIdx = i / Split;
19550       int SubIdx = i % Split;
19551       SDValue Elt = RHS.getOperand(EltIdx);
19552       if (Elt.isUndef()) {
19553         Indices.push_back(-1);
19554         continue;
19555       }
19556
19557       APInt Bits;
19558       if (isa<ConstantSDNode>(Elt))
19559         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
19560       else if (isa<ConstantFPSDNode>(Elt))
19561         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
19562       else
19563         return SDValue();
19564
19565       // Extract the sub element from the constant bit mask.
19566       if (DAG.getDataLayout().isBigEndian()) {
19567         Bits.lshrInPlace((Split - SubIdx - 1) * NumSubBits);
19568       } else {
19569         Bits.lshrInPlace(SubIdx * NumSubBits);
19570       }
19571
19572       if (Split > 1)
19573         Bits = Bits.trunc(NumSubBits);
19574
19575       if (Bits.isAllOnesValue())
19576         Indices.push_back(i);
19577       else if (Bits == 0)
19578         Indices.push_back(i + NumSubElts);
19579       else
19580         return SDValue();
19581     }
19582
19583     // Let's see if the target supports this vector_shuffle.
19584     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
19585     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
19586     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
19587       return SDValue();
19588
19589     SDValue Zero = DAG.getConstant(0, DL, ClearVT);
19590     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
19591                                                    DAG.getBitcast(ClearVT, LHS),
19592                                                    Zero, Indices));
19593   };
19594
19595   // Determine maximum split level (byte level masking).
19596   int MaxSplit = 1;
19597   if (RVT.getScalarSizeInBits() % 8 == 0)
19598     MaxSplit = RVT.getScalarSizeInBits() / 8;
19599
19600   for (int Split = 1; Split <= MaxSplit; ++Split)
19601     if (RVT.getScalarSizeInBits() % Split == 0)
19602       if (SDValue S = BuildClearMask(Split))
19603         return S;
19604
19605   return SDValue();
19606 }
19607
19608 /// If a vector binop is performed on splat values, it may be profitable to
19609 /// extract, scalarize, and insert/splat.
19610 static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
19611   SDValue N0 = N->getOperand(0);
19612   SDValue N1 = N->getOperand(1);
19613   unsigned Opcode = N->getOpcode();
19614   EVT VT = N->getValueType(0);
19615   EVT EltVT = VT.getVectorElementType();
19616   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
19617
19618   // TODO: Remove/replace the extract cost check? If the elements are available
19619   //       as scalars, then there may be no extract cost. Should we ask if
19620   //       inserting a scalar back into a vector is cheap instead?
19621   int Index0, Index1;
19622   SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
19623   SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
19624   if (!Src0 || !Src1 || Index0 != Index1 ||
19625       Src0.getValueType().getVectorElementType() != EltVT ||
19626       Src1.getValueType().getVectorElementType() != EltVT ||
19627       !TLI.isExtractVecEltCheap(VT, Index0) ||
19628       !TLI.isOperationLegalOrCustom(Opcode, EltVT))
19629     return SDValue();
19630
19631   SDLoc DL(N);
19632   SDValue IndexC =
19633       DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
19634   SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
19635   SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
19636   SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
19637
19638   // If all lanes but 1 are undefined, no need to splat the scalar result.
19639   // TODO: Keep track of undefs and use that info in the general case.
19640   if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
19641       count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
19642       count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
19643     // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
19644     // build_vec ..undef, (bo X, Y), undef...
19645     SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
19646     Ops[Index0] = ScalarBO;
19647     return DAG.getBuildVector(VT, DL, Ops);
19648   }
19649
19650   // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
19651   SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
19652   return DAG.getBuildVector(VT, DL, Ops);
19653 }
19654
19655 /// Visit a binary vector operation, like ADD.
19656 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
19657   assert(N->getValueType(0).isVector() &&
19658          "SimplifyVBinOp only works on vectors!");
19659
19660   SDValue LHS = N->getOperand(0);
19661   SDValue RHS = N->getOperand(1);
19662   SDValue Ops[] = {LHS, RHS};
19663   EVT VT = N->getValueType(0);
19664   unsigned Opcode = N->getOpcode();
19665
19666   // See if we can constant fold the vector operation.
19667   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
19668           Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
19669     return Fold;
19670
19671   // Move unary shuffles with identical masks after a vector binop:
19672   // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
19673   //   --> shuffle (VBinOp A, B), Undef, Mask
19674   // This does not require type legality checks because we are creating the
19675   // same types of operations that are in the original sequence. We do have to
19676   // restrict ops like integer div that have immediate UB (eg, div-by-zero)
19677   // though. This code is adapted from the identical transform in instcombine.
19678   if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
19679       Opcode != ISD::UREM && Opcode != ISD::SREM &&
19680       Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
19681     auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
19682     auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
19683     if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
19684         LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
19685         (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
19686       SDLoc DL(N);
19687       SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
19688                                      RHS.getOperand(0), N->getFlags());
19689       SDValue UndefV = LHS.getOperand(1);
19690       return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
19691     }
19692   }
19693
19694   // The following pattern is likely to emerge with vector reduction ops. Moving
19695   // the binary operation ahead of insertion may allow using a narrower vector
19696   // instruction that has better performance than the wide version of the op:
19697   // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
19698   if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
19699       RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
19700       LHS.getOperand(2) == RHS.getOperand(2) &&
19701       (LHS.hasOneUse() || RHS.hasOneUse())) {
19702     SDValue X = LHS.getOperand(1);
19703     SDValue Y = RHS.getOperand(1);
19704     SDValue Z = LHS.getOperand(2);
19705     EVT NarrowVT = X.getValueType();
19706     if (NarrowVT == Y.getValueType() &&
19707         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19708       // (binop undef, undef) may not return undef, so compute that result.
19709       SDLoc DL(N);
19710       SDValue VecC =
19711           DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
19712       SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
19713       return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
19714     }
19715   }
19716
19717   // Make sure all but the first op are undef or constant.
19718   auto ConcatWithConstantOrUndef = [](SDValue Concat) {
19719     return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
19720            std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
19721                      [](const SDValue &Op) {
19722                        return Op.isUndef() ||
19723                               ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
19724                      });
19725   };
19726
19727   // The following pattern is likely to emerge with vector reduction ops. Moving
19728   // the binary operation ahead of the concat may allow using a narrower vector
19729   // instruction that has better performance than the wide version of the op:
19730   // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
19731   //   concat (VBinOp X, Y), VecC
19732   if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
19733       (LHS.hasOneUse() || RHS.hasOneUse())) {
19734     EVT NarrowVT = LHS.getOperand(0).getValueType();
19735     if (NarrowVT == RHS.getOperand(0).getValueType() &&
19736         TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
19737       SDLoc DL(N);
19738       unsigned NumOperands = LHS.getNumOperands();
19739       SmallVector<SDValue, 4> ConcatOps;
19740       for (unsigned i = 0; i != NumOperands; ++i) {
19741         // This constant fold for operands 1 and up.
19742         ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
19743                                         RHS.getOperand(i)));
19744       }
19745
19746       return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
19747     }
19748   }
19749
19750   if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
19751     return V;
19752
19753   return SDValue();
19754 }
19755
19756 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
19757                                     SDValue N2) {
19758   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
19759
19760   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
19761                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
19762
19763   // If we got a simplified select_cc node back from SimplifySelectCC, then
19764   // break it down into a new SETCC node, and a new SELECT node, and then return
19765   // the SELECT node, since we were called with a SELECT node.
19766   if (SCC.getNode()) {
19767     // Check to see if we got a select_cc back (to turn into setcc/select).
19768     // Otherwise, just return whatever node we got back, like fabs.
19769     if (SCC.getOpcode() == ISD::SELECT_CC) {
19770       const SDNodeFlags Flags = N0.getNode()->getFlags();
19771       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
19772                                   N0.getValueType(),
19773                                   SCC.getOperand(0), SCC.getOperand(1),
19774                                   SCC.getOperand(4), Flags);
19775       AddToWorklist(SETCC.getNode());
19776       SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
19777                                          SCC.getOperand(2), SCC.getOperand(3));
19778       SelectNode->setFlags(Flags);
19779       return SelectNode;
19780     }
19781
19782     return SCC;
19783   }
19784   return SDValue();
19785 }
19786
19787 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
19788 /// being selected between, see if we can simplify the select.  Callers of this
19789 /// should assume that TheSelect is deleted if this returns true.  As such, they
19790 /// should return the appropriate thing (e.g. the node) back to the top-level of
19791 /// the DAG combiner loop to avoid it being looked at.
19792 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
19793                                     SDValue RHS) {
19794   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19795   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
19796   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
19797     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
19798       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
19799       SDValue Sqrt = RHS;
19800       ISD::CondCode CC;
19801       SDValue CmpLHS;
19802       const ConstantFPSDNode *Zero = nullptr;
19803
19804       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
19805         CC = cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
19806         CmpLHS = TheSelect->getOperand(0);
19807         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
19808       } else {
19809         // SELECT or VSELECT
19810         SDValue Cmp = TheSelect->getOperand(0);
19811         if (Cmp.getOpcode() == ISD::SETCC) {
19812           CC = cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
19813           CmpLHS = Cmp.getOperand(0);
19814           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
19815         }
19816       }
19817       if (Zero && Zero->isZero() &&
19818           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
19819           CC == ISD::SETULT || CC == ISD::SETLT)) {
19820         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
19821         CombineTo(TheSelect, Sqrt);
19822         return true;
19823       }
19824     }
19825   }
19826   // Cannot simplify select with vector condition
19827   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
19828
19829   // If this is a select from two identical things, try to pull the operation
19830   // through the select.
19831   if (LHS.getOpcode() != RHS.getOpcode() ||
19832       !LHS.hasOneUse() || !RHS.hasOneUse())
19833     return false;
19834
19835   // If this is a load and the token chain is identical, replace the select
19836   // of two loads with a load through a select of the address to load from.
19837   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
19838   // constants have been dropped into the constant pool.
19839   if (LHS.getOpcode() == ISD::LOAD) {
19840     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
19841     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
19842
19843     // Token chains must be identical.
19844     if (LHS.getOperand(0) != RHS.getOperand(0) ||
19845         // Do not let this transformation reduce the number of volatile loads.
19846         LLD->isVolatile() || RLD->isVolatile() ||
19847         // FIXME: If either is a pre/post inc/dec load,
19848         // we'd need to split out the address adjustment.
19849         LLD->isIndexed() || RLD->isIndexed() ||
19850         // If this is an EXTLOAD, the VT's must match.
19851         LLD->getMemoryVT() != RLD->getMemoryVT() ||
19852         // If this is an EXTLOAD, the kind of extension must match.
19853         (LLD->getExtensionType() != RLD->getExtensionType() &&
19854          // The only exception is if one of the extensions is anyext.
19855          LLD->getExtensionType() != ISD::EXTLOAD &&
19856          RLD->getExtensionType() != ISD::EXTLOAD) ||
19857         // FIXME: this discards src value information.  This is
19858         // over-conservative. It would be beneficial to be able to remember
19859         // both potential memory locations.  Since we are discarding
19860         // src value info, don't do the transformation if the memory
19861         // locations are not in the default address space.
19862         LLD->getPointerInfo().getAddrSpace() != 0 ||
19863         RLD->getPointerInfo().getAddrSpace() != 0 ||
19864         // We can't produce a CMOV of a TargetFrameIndex since we won't
19865         // generate the address generation required.
19866         LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19867         RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
19868         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
19869                                       LLD->getBasePtr().getValueType()))
19870       return false;
19871
19872     // The loads must not depend on one another.
19873     if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD))
19874       return false;
19875
19876     // Check that the select condition doesn't reach either load.  If so,
19877     // folding this will induce a cycle into the DAG.  If not, this is safe to
19878     // xform, so create a select of the addresses.
19879
19880     SmallPtrSet<const SDNode *, 32> Visited;
19881     SmallVector<const SDNode *, 16> Worklist;
19882
19883     // Always fail if LLD and RLD are not independent. TheSelect is a
19884     // predecessor to all Nodes in question so we need not search past it.
19885
19886     Visited.insert(TheSelect);
19887     Worklist.push_back(LLD);
19888     Worklist.push_back(RLD);
19889
19890     if (SDNode::hasPredecessorHelper(LLD, Visited, Worklist) ||
19891         SDNode::hasPredecessorHelper(RLD, Visited, Worklist))
19892       return false;
19893
19894     SDValue Addr;
19895     if (TheSelect->getOpcode() == ISD::SELECT) {
19896       // We cannot do this optimization if any pair of {RLD, LLD} is a
19897       // predecessor to {RLD, LLD, CondNode}. As we've already compared the
19898       // Loads, we only need to check if CondNode is a successor to one of the
19899       // loads. We can further avoid this if there's no use of their chain
19900       // value.
19901       SDNode *CondNode = TheSelect->getOperand(0).getNode();
19902       Worklist.push_back(CondNode);
19903
19904       if ((LLD->hasAnyUseOfValue(1) &&
19905            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19906           (RLD->hasAnyUseOfValue(1) &&
19907            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19908         return false;
19909
19910       Addr = DAG.getSelect(SDLoc(TheSelect),
19911                            LLD->getBasePtr().getValueType(),
19912                            TheSelect->getOperand(0), LLD->getBasePtr(),
19913                            RLD->getBasePtr());
19914     } else {  // Otherwise SELECT_CC
19915       // We cannot do this optimization if any pair of {RLD, LLD} is a
19916       // predecessor to {RLD, LLD, CondLHS, CondRHS}. As we've already compared
19917       // the Loads, we only need to check if CondLHS/CondRHS is a successor to
19918       // one of the loads. We can further avoid this if there's no use of their
19919       // chain value.
19920
19921       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
19922       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
19923       Worklist.push_back(CondLHS);
19924       Worklist.push_back(CondRHS);
19925
19926       if ((LLD->hasAnyUseOfValue(1) &&
19927            SDNode::hasPredecessorHelper(LLD, Visited, Worklist)) ||
19928           (RLD->hasAnyUseOfValue(1) &&
19929            SDNode::hasPredecessorHelper(RLD, Visited, Worklist)))
19930         return false;
19931
19932       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
19933                          LLD->getBasePtr().getValueType(),
19934                          TheSelect->getOperand(0),
19935                          TheSelect->getOperand(1),
19936                          LLD->getBasePtr(), RLD->getBasePtr(),
19937                          TheSelect->getOperand(4));
19938     }
19939
19940     SDValue Load;
19941     // It is safe to replace the two loads if they have different alignments,
19942     // but the new load must be the minimum (most restrictive) alignment of the
19943     // inputs.
19944     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
19945     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
19946     if (!RLD->isInvariant())
19947       MMOFlags &= ~MachineMemOperand::MOInvariant;
19948     if (!RLD->isDereferenceable())
19949       MMOFlags &= ~MachineMemOperand::MODereferenceable;
19950     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
19951       // FIXME: Discards pointer and AA info.
19952       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
19953                          LLD->getChain(), Addr, MachinePointerInfo(), Alignment,
19954                          MMOFlags);
19955     } else {
19956       // FIXME: Discards pointer and AA info.
19957       Load = DAG.getExtLoad(
19958           LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType()
19959                                                   : LLD->getExtensionType(),
19960           SDLoc(TheSelect), TheSelect->getValueType(0), LLD->getChain(), Addr,
19961           MachinePointerInfo(), LLD->getMemoryVT(), Alignment, MMOFlags);
19962     }
19963
19964     // Users of the select now use the result of the load.
19965     CombineTo(TheSelect, Load);
19966
19967     // Users of the old loads now use the new load's chain.  We know the
19968     // old-load value is dead now.
19969     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
19970     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
19971     return true;
19972   }
19973
19974   return false;
19975 }
19976
19977 /// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
19978 /// bitwise 'and'.
19979 SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
19980                                             SDValue N1, SDValue N2, SDValue N3,
19981                                             ISD::CondCode CC) {
19982   // If this is a select where the false operand is zero and the compare is a
19983   // check of the sign bit, see if we can perform the "gzip trick":
19984   // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
19985   // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
19986   EVT XType = N0.getValueType();
19987   EVT AType = N2.getValueType();
19988   if (!isNullConstant(N3) || !XType.bitsGE(AType))
19989     return SDValue();
19990
19991   // If the comparison is testing for a positive value, we have to invert
19992   // the sign bit mask, so only do that transform if the target has a bitwise
19993   // 'and not' instruction (the invert is free).
19994   if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
19995     // (X > -1) ? A : 0
19996     // (X >  0) ? X : 0 <-- This is canonical signed max.
19997     if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
19998       return SDValue();
19999   } else if (CC == ISD::SETLT) {
20000     // (X <  0) ? A : 0
20001     // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
20002     if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
20003       return SDValue();
20004   } else {
20005     return SDValue();
20006   }
20007
20008   // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
20009   // constant.
20010   EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
20011   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
20012   if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
20013     unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
20014     SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
20015     SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
20016     AddToWorklist(Shift.getNode());
20017
20018     if (XType.bitsGT(AType)) {
20019       Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
20020       AddToWorklist(Shift.getNode());
20021     }
20022
20023     if (CC == ISD::SETGT)
20024       Shift = DAG.getNOT(DL, Shift, AType);
20025
20026     return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
20027   }
20028
20029   SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
20030   SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
20031   AddToWorklist(Shift.getNode());
20032
20033   if (XType.bitsGT(AType)) {
20034     Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
20035     AddToWorklist(Shift.getNode());
20036   }
20037
20038   if (CC == ISD::SETGT)
20039     Shift = DAG.getNOT(DL, Shift, AType);
20040
20041   return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
20042 }
20043
20044 /// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
20045 /// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
20046 /// in it. This may be a win when the constant is not otherwise available
20047 /// because it replaces two constant pool loads with one.
20048 SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
20049     const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
20050     ISD::CondCode CC) {
20051   if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
20052     return SDValue();
20053
20054   // If we are before legalize types, we want the other legalization to happen
20055   // first (for example, to avoid messing with soft float).
20056   auto *TV = dyn_cast<ConstantFPSDNode>(N2);
20057   auto *FV = dyn_cast<ConstantFPSDNode>(N3);
20058   EVT VT = N2.getValueType();
20059   if (!TV || !FV || !TLI.isTypeLegal(VT))
20060     return SDValue();
20061
20062   // If a constant can be materialized without loads, this does not make sense.
20063   if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
20064       TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
20065       TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
20066     return SDValue();
20067
20068   // If both constants have multiple uses, then we won't need to do an extra
20069   // load. The values are likely around in registers for other users.
20070   if (!TV->hasOneUse() && !FV->hasOneUse())
20071     return SDValue();
20072
20073   Constant *Elts[] = { const_cast<ConstantFP*>(FV->getConstantFPValue()),
20074                        const_cast<ConstantFP*>(TV->getConstantFPValue()) };
20075   Type *FPTy = Elts[0]->getType();
20076   const DataLayout &TD = DAG.getDataLayout();
20077
20078   // Create a ConstantArray of the two constants.
20079   Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
20080   SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
20081                                       TD.getPrefTypeAlignment(FPTy));
20082   unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
20083
20084   // Get offsets to the 0 and 1 elements of the array, so we can select between
20085   // them.
20086   SDValue Zero = DAG.getIntPtrConstant(0, DL);
20087   unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
20088   SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
20089   SDValue Cond =
20090       DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC);
20091   AddToWorklist(Cond.getNode());
20092   SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), Cond, One, Zero);
20093   AddToWorklist(CstOffset.getNode());
20094   CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset);
20095   AddToWorklist(CPIdx.getNode());
20096   return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
20097                      MachinePointerInfo::getConstantPool(
20098                          DAG.getMachineFunction()), Alignment);
20099 }
20100
20101 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
20102 /// where 'cond' is the comparison specified by CC.
20103 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
20104                                       SDValue N2, SDValue N3, ISD::CondCode CC,
20105                                       bool NotExtCompare) {
20106   // (x ? y : y) -> y.
20107   if (N2 == N3) return N2;
20108
20109   EVT CmpOpVT = N0.getValueType();
20110   EVT CmpResVT = getSetCCResultType(CmpOpVT);
20111   EVT VT = N2.getValueType();
20112   auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
20113   auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
20114   auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
20115
20116   // Determine if the condition we're dealing with is constant.
20117   if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
20118     AddToWorklist(SCC.getNode());
20119     if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
20120       // fold select_cc true, x, y -> x
20121       // fold select_cc false, x, y -> y
20122       return !(SCCC->isNullValue()) ? N2 : N3;
20123     }
20124   }
20125
20126   if (SDValue V =
20127           convertSelectOfFPConstantsToLoadOffset(DL, N0, N1, N2, N3, CC))
20128     return V;
20129
20130   if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
20131     return V;
20132
20133   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
20134   // where y is has a single bit set.
20135   // A plaintext description would be, we can turn the SELECT_CC into an AND
20136   // when the condition can be materialized as an all-ones register.  Any
20137   // single bit-test can be materialized as an all-ones register with
20138   // shift-left and shift-right-arith.
20139   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
20140       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
20141     SDValue AndLHS = N0->getOperand(0);
20142     auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
20143     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
20144       // Shift the tested bit over the sign bit.
20145       const APInt &AndMask = ConstAndRHS->getAPIntValue();
20146       SDValue ShlAmt =
20147         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
20148                         getShiftAmountTy(AndLHS.getValueType()));
20149       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
20150
20151       // Now arithmetic right shift it all the way over, so the result is either
20152       // all-ones, or zero.
20153       SDValue ShrAmt =
20154         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
20155                         getShiftAmountTy(Shl.getValueType()));
20156       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
20157
20158       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
20159     }
20160   }
20161
20162   // fold select C, 16, 0 -> shl C, 4
20163   bool Fold = N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2();
20164   bool Swap = N3C && isNullConstant(N2) && N3C->getAPIntValue().isPowerOf2();
20165
20166   if ((Fold || Swap) &&
20167       TLI.getBooleanContents(CmpOpVT) ==
20168           TargetLowering::ZeroOrOneBooleanContent &&
20169       (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, CmpOpVT))) {
20170
20171     if (Swap) {
20172       CC = ISD::getSetCCInverse(CC, CmpOpVT.isInteger());
20173       std::swap(N2C, N3C);
20174     }
20175
20176     // If the caller doesn't want us to simplify this into a zext of a compare,
20177     // don't do it.
20178     if (NotExtCompare && N2C->isOne())
20179       return SDValue();
20180
20181     SDValue Temp, SCC;
20182     // zext (setcc n0, n1)
20183     if (LegalTypes) {
20184       SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
20185       if (VT.bitsLT(SCC.getValueType()))
20186         Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
20187       else
20188         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20189     } else {
20190       SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
20191       Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), VT, SCC);
20192     }
20193
20194     AddToWorklist(SCC.getNode());
20195     AddToWorklist(Temp.getNode());
20196
20197     if (N2C->isOne())
20198       return Temp;
20199
20200     // shl setcc result by log2 n2c
20201     return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
20202                        DAG.getConstant(N2C->getAPIntValue().logBase2(),
20203                                        SDLoc(Temp),
20204                                        getShiftAmountTy(Temp.getValueType())));
20205   }
20206
20207   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
20208   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
20209   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
20210   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
20211   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
20212   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
20213   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
20214   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
20215   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
20216     SDValue ValueOnZero = N2;
20217     SDValue Count = N3;
20218     // If the condition is NE instead of E, swap the operands.
20219     if (CC == ISD::SETNE)
20220       std::swap(ValueOnZero, Count);
20221     // Check if the value on zero is a constant equal to the bits in the type.
20222     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
20223       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
20224         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
20225         // legal, combine to just cttz.
20226         if ((Count.getOpcode() == ISD::CTTZ ||
20227              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
20228             N0 == Count.getOperand(0) &&
20229             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
20230           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
20231         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
20232         // legal, combine to just ctlz.
20233         if ((Count.getOpcode() == ISD::CTLZ ||
20234              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
20235             N0 == Count.getOperand(0) &&
20236             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
20237           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
20238       }
20239     }
20240   }
20241
20242   return SDValue();
20243 }
20244
20245 /// This is a stub for TargetLowering::SimplifySetCC.
20246 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
20247                                    ISD::CondCode Cond, const SDLoc &DL,
20248                                    bool foldBooleans) {
20249   TargetLowering::DAGCombinerInfo
20250     DagCombineInfo(DAG, Level, false, this);
20251   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
20252 }
20253
20254 /// Given an ISD::SDIV node expressing a divide by constant, return
20255 /// a DAG expression to select that will generate the same value by multiplying
20256 /// by a magic number.
20257 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20258 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
20259   // when optimising for minimum size, we don't want to expand a div to a mul
20260   // and a shift.
20261   if (DAG.getMachineFunction().getFunction().hasMinSize())
20262     return SDValue();
20263
20264   SmallVector<SDNode *, 8> Built;
20265   if (SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, Built)) {
20266     for (SDNode *N : Built)
20267       AddToWorklist(N);
20268     return S;
20269   }
20270
20271   return SDValue();
20272 }
20273
20274 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
20275 /// DAG expression that will generate the same value by right shifting.
20276 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
20277   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
20278   if (!C)
20279     return SDValue();
20280
20281   // Avoid division by zero.
20282   if (C->isNullValue())
20283     return SDValue();
20284
20285   SmallVector<SDNode *, 8> Built;
20286   if (SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, Built)) {
20287     for (SDNode *N : Built)
20288       AddToWorklist(N);
20289     return S;
20290   }
20291
20292   return SDValue();
20293 }
20294
20295 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
20296 /// expression that will generate the same value by multiplying by a magic
20297 /// number.
20298 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
20299 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
20300   // when optimising for minimum size, we don't want to expand a div to a mul
20301   // and a shift.
20302   if (DAG.getMachineFunction().getFunction().hasMinSize())
20303     return SDValue();
20304
20305   SmallVector<SDNode *, 8> Built;
20306   if (SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, Built)) {
20307     for (SDNode *N : Built)
20308       AddToWorklist(N);
20309     return S;
20310   }
20311
20312   return SDValue();
20313 }
20314
20315 /// Determines the LogBase2 value for a non-null input value using the
20316 /// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
20317 SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
20318   EVT VT = V.getValueType();
20319   unsigned EltBits = VT.getScalarSizeInBits();
20320   SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
20321   SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
20322   SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
20323   return LogBase2;
20324 }
20325
20326 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20327 /// For the reciprocal, we need to find the zero of the function:
20328 ///   F(X) = A X - 1 [which has a zero at X = 1/A]
20329 ///     =>
20330 ///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
20331 ///     does not require additional intermediate precision]
20332 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
20333   if (Level >= AfterLegalizeDAG)
20334     return SDValue();
20335
20336   // TODO: Handle half and/or extended types?
20337   EVT VT = Op.getValueType();
20338   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20339     return SDValue();
20340
20341   // If estimates are explicitly disabled for this function, we're done.
20342   MachineFunction &MF = DAG.getMachineFunction();
20343   int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
20344   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20345     return SDValue();
20346
20347   // Estimates may be explicitly enabled for this type with a custom number of
20348   // refinement steps.
20349   int Iterations = TLI.getDivRefinementSteps(VT, MF);
20350   if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
20351     AddToWorklist(Est.getNode());
20352
20353     if (Iterations) {
20354       SDLoc DL(Op);
20355       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
20356
20357       // Newton iterations: Est = Est + Est (1 - Arg * Est)
20358       for (int i = 0; i < Iterations; ++i) {
20359         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
20360         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
20361         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20362         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
20363       }
20364     }
20365     return Est;
20366   }
20367
20368   return SDValue();
20369 }
20370
20371 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20372 /// For the reciprocal sqrt, we need to find the zero of the function:
20373 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20374 ///     =>
20375 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
20376 /// As a result, we precompute A/2 prior to the iteration loop.
20377 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
20378                                          unsigned Iterations,
20379                                          SDNodeFlags Flags, bool Reciprocal) {
20380   EVT VT = Arg.getValueType();
20381   SDLoc DL(Arg);
20382   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
20383
20384   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
20385   // this entire sequence requires only one FP constant.
20386   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
20387   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
20388
20389   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
20390   for (unsigned i = 0; i < Iterations; ++i) {
20391     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
20392     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
20393     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
20394     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
20395   }
20396
20397   // If non-reciprocal square root is requested, multiply the result by Arg.
20398   if (!Reciprocal)
20399     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
20400
20401   return Est;
20402 }
20403
20404 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
20405 /// For the reciprocal sqrt, we need to find the zero of the function:
20406 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
20407 ///     =>
20408 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
20409 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
20410                                          unsigned Iterations,
20411                                          SDNodeFlags Flags, bool Reciprocal) {
20412   EVT VT = Arg.getValueType();
20413   SDLoc DL(Arg);
20414   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
20415   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
20416
20417   // This routine must enter the loop below to work correctly
20418   // when (Reciprocal == false).
20419   assert(Iterations > 0);
20420
20421   // Newton iterations for reciprocal square root:
20422   // E = (E * -0.5) * ((A * E) * E + -3.0)
20423   for (unsigned i = 0; i < Iterations; ++i) {
20424     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
20425     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
20426     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
20427
20428     // When calculating a square root at the last iteration build:
20429     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
20430     // (notice a common subexpression)
20431     SDValue LHS;
20432     if (Reciprocal || (i + 1) < Iterations) {
20433       // RSQRT: LHS = (E * -0.5)
20434       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
20435     } else {
20436       // SQRT: LHS = (A * E) * -0.5
20437       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
20438     }
20439
20440     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
20441   }
20442
20443   return Est;
20444 }
20445
20446 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
20447 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
20448 /// Op can be zero.
20449 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
20450                                            bool Reciprocal) {
20451   if (Level >= AfterLegalizeDAG)
20452     return SDValue();
20453
20454   // TODO: Handle half and/or extended types?
20455   EVT VT = Op.getValueType();
20456   if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
20457     return SDValue();
20458
20459   // If estimates are explicitly disabled for this function, we're done.
20460   MachineFunction &MF = DAG.getMachineFunction();
20461   int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
20462   if (Enabled == TLI.ReciprocalEstimate::Disabled)
20463     return SDValue();
20464
20465   // Estimates may be explicitly enabled for this type with a custom number of
20466   // refinement steps.
20467   int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
20468
20469   bool UseOneConstNR = false;
20470   if (SDValue Est =
20471       TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
20472                           Reciprocal)) {
20473     AddToWorklist(Est.getNode());
20474
20475     if (Iterations) {
20476       Est = UseOneConstNR
20477             ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
20478             : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
20479
20480       if (!Reciprocal) {
20481         // The estimate is now completely wrong if the input was exactly 0.0 or
20482         // possibly a denormal. Force the answer to 0.0 for those cases.
20483         SDLoc DL(Op);
20484         EVT CCVT = getSetCCResultType(VT);
20485         ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
20486         const Function &F = DAG.getMachineFunction().getFunction();
20487         Attribute Denorms = F.getFnAttribute("denormal-fp-math");
20488         if (Denorms.getValueAsString().equals("ieee")) {
20489           // fabs(X) < SmallestNormal ? 0.0 : Est
20490           const fltSemantics &FltSem = DAG.EVTToAPFloatSemantics(VT);
20491           APFloat SmallestNorm = APFloat::getSmallestNormalized(FltSem);
20492           SDValue NormC = DAG.getConstantFP(SmallestNorm, DL, VT);
20493           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20494           SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
20495           SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
20496           Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
20497         } else {
20498           // X == 0.0 ? 0.0 : Est
20499           SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
20500           SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
20501           Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
20502         }
20503       }
20504     }
20505     return Est;
20506   }
20507
20508   return SDValue();
20509 }
20510
20511 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20512   return buildSqrtEstimateImpl(Op, Flags, true);
20513 }
20514
20515 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
20516   return buildSqrtEstimateImpl(Op, Flags, false);
20517 }
20518
20519 /// Return true if there is any possibility that the two addresses overlap.
20520 bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
20521
20522   struct MemUseCharacteristics {
20523     bool IsVolatile;
20524     SDValue BasePtr;
20525     int64_t Offset;
20526     Optional<int64_t> NumBytes;
20527     MachineMemOperand *MMO;
20528   };
20529
20530   auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
20531     if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
20532       int64_t Offset = 0;
20533       if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
20534         Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
20535                      ? C->getSExtValue()
20536                      : (LSN->getAddressingMode() == ISD::PRE_DEC)
20537                            ? -1 * C->getSExtValue()
20538                            : 0;
20539       return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
20540               Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
20541               LSN->getMemOperand()};
20542     }
20543     if (const auto *LN = cast<LifetimeSDNode>(N))
20544       return {false /*isVolatile*/, LN->getOperand(1),
20545               (LN->hasOffset()) ? LN->getOffset() : 0,
20546               (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
20547                                 : Optional<int64_t>(),
20548               (MachineMemOperand *)nullptr};
20549     // Default.
20550     return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
20551             Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
20552   };
20553
20554   MemUseCharacteristics MUC0 = getCharacteristics(Op0),
20555                         MUC1 = getCharacteristics(Op1);
20556
20557   // If they are to the same address, then they must be aliases.
20558   if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
20559       MUC0.Offset == MUC1.Offset)
20560     return true;
20561
20562   // If they are both volatile then they cannot be reordered.
20563   if (MUC0.IsVolatile && MUC1.IsVolatile)
20564     return true;
20565
20566   if (MUC0.MMO && MUC1.MMO) {
20567     if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20568         (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20569       return false;
20570   }
20571
20572   // Try to prove that there is aliasing, or that there is no aliasing. Either
20573   // way, we can return now. If nothing can be proved, proceed with more tests.
20574   bool IsAlias;
20575   if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
20576                                        DAG, IsAlias))
20577     return IsAlias;
20578
20579   // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
20580   // either are not known.
20581   if (!MUC0.MMO || !MUC1.MMO)
20582     return true;
20583
20584   // If one operation reads from invariant memory, and the other may store, they
20585   // cannot alias. These should really be checking the equivalent of mayWrite,
20586   // but it only matters for memory nodes other than load /store.
20587   if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
20588       (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
20589     return false;
20590
20591   // If we know required SrcValue1 and SrcValue2 have relatively large
20592   // alignment compared to the size and offset of the access, we may be able
20593   // to prove they do not alias. This check is conservative for now to catch
20594   // cases created by splitting vector types.
20595   int64_t SrcValOffset0 = MUC0.MMO->getOffset();
20596   int64_t SrcValOffset1 = MUC1.MMO->getOffset();
20597   unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
20598   unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
20599   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
20600       MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
20601       *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
20602     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
20603     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
20604
20605     // There is no overlap between these relatively aligned accesses of
20606     // similar size. Return no alias.
20607     if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
20608         (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
20609       return false;
20610   }
20611
20612   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
20613                    ? CombinerGlobalAA
20614                    : DAG.getSubtarget().useAA();
20615 #ifndef NDEBUG
20616   if (CombinerAAOnlyFunc.getNumOccurrences() &&
20617       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
20618     UseAA = false;
20619 #endif
20620
20621   if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
20622     // Use alias analysis information.
20623     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
20624     int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
20625     int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
20626     AliasResult AAResult = AA->alias(
20627         MemoryLocation(MUC0.MMO->getValue(), Overlap0,
20628                        UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
20629         MemoryLocation(MUC1.MMO->getValue(), Overlap1,
20630                        UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
20631     if (AAResult == NoAlias)
20632       return false;
20633   }
20634
20635   // Otherwise we have to assume they alias.
20636   return true;
20637 }
20638
20639 /// Walk up chain skipping non-aliasing memory nodes,
20640 /// looking for aliasing nodes and adding them to the Aliases vector.
20641 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
20642                                    SmallVectorImpl<SDValue> &Aliases) {
20643   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
20644   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
20645
20646   // Get alias information for node.
20647   const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
20648
20649   // Starting off.
20650   Chains.push_back(OriginalChain);
20651   unsigned Depth = 0;
20652
20653   // Attempt to improve chain by a single step
20654   std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
20655     switch (C.getOpcode()) {
20656     case ISD::EntryToken:
20657       // No need to mark EntryToken.
20658       C = SDValue();
20659       return true;
20660     case ISD::LOAD:
20661     case ISD::STORE: {
20662       // Get alias information for C.
20663       bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
20664                       !cast<LSBaseSDNode>(C.getNode())->isVolatile();
20665       if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
20666         // Look further up the chain.
20667         C = C.getOperand(0);
20668         return true;
20669       }
20670       // Alias, so stop here.
20671       return false;
20672     }
20673
20674     case ISD::CopyFromReg:
20675       // Always forward past past CopyFromReg.
20676       C = C.getOperand(0);
20677       return true;
20678
20679     case ISD::LIFETIME_START:
20680     case ISD::LIFETIME_END: {
20681       // We can forward past any lifetime start/end that can be proven not to
20682       // alias the memory access.
20683       if (!isAlias(N, C.getNode())) {
20684         // Look further up the chain.
20685         C = C.getOperand(0);
20686         return true;
20687       }
20688       return false;
20689     }
20690     default:
20691       return false;
20692     }
20693   };
20694
20695   // Look at each chain and determine if it is an alias.  If so, add it to the
20696   // aliases list.  If not, then continue up the chain looking for the next
20697   // candidate.
20698   while (!Chains.empty()) {
20699     SDValue Chain = Chains.pop_back_val();
20700
20701     // Don't bother if we've seen Chain before.
20702     if (!Visited.insert(Chain.getNode()).second)
20703       continue;
20704
20705     // For TokenFactor nodes, look at each operand and only continue up the
20706     // chain until we reach the depth limit.
20707     //
20708     // FIXME: The depth check could be made to return the last non-aliasing
20709     // chain we found before we hit a tokenfactor rather than the original
20710     // chain.
20711     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
20712       Aliases.clear();
20713       Aliases.push_back(OriginalChain);
20714       return;
20715     }
20716
20717     if (Chain.getOpcode() == ISD::TokenFactor) {
20718       // We have to check each of the operands of the token factor for "small"
20719       // token factors, so we queue them up.  Adding the operands to the queue
20720       // (stack) in reverse order maintains the original order and increases the
20721       // likelihood that getNode will find a matching token factor (CSE.)
20722       if (Chain.getNumOperands() > 16) {
20723         Aliases.push_back(Chain);
20724         continue;
20725       }
20726       for (unsigned n = Chain.getNumOperands(); n;)
20727         Chains.push_back(Chain.getOperand(--n));
20728       ++Depth;
20729       continue;
20730     }
20731     // Everything else
20732     if (ImproveChain(Chain)) {
20733       // Updated Chain Found, Consider new chain if one exists.
20734       if (Chain.getNode())
20735         Chains.push_back(Chain);
20736       ++Depth;
20737       continue;
20738     }
20739     // No Improved Chain Possible, treat as Alias.
20740     Aliases.push_back(Chain);
20741   }
20742 }
20743
20744 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
20745 /// (aliasing node.)
20746 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
20747   if (OptLevel == CodeGenOpt::None)
20748     return OldChain;
20749
20750   // Ops for replacing token factor.
20751   SmallVector<SDValue, 8> Aliases;
20752
20753   // Accumulate all the aliases to this node.
20754   GatherAllAliases(N, OldChain, Aliases);
20755
20756   // If no operands then chain to entry token.
20757   if (Aliases.size() == 0)
20758     return DAG.getEntryNode();
20759
20760   // If a single operand then chain to it.  We don't need to revisit it.
20761   if (Aliases.size() == 1)
20762     return Aliases[0];
20763
20764   // Construct a custom tailored token factor.
20765   return DAG.getTokenFactor(SDLoc(N), Aliases);
20766 }
20767
20768 namespace {
20769 // TODO: Replace with with std::monostate when we move to C++17.
20770 struct UnitT { } Unit;
20771 bool operator==(const UnitT &, const UnitT &) { return true; }
20772 bool operator!=(const UnitT &, const UnitT &) { return false; }
20773 } // namespace
20774
20775 // This function tries to collect a bunch of potentially interesting
20776 // nodes to improve the chains of, all at once. This might seem
20777 // redundant, as this function gets called when visiting every store
20778 // node, so why not let the work be done on each store as it's visited?
20779 //
20780 // I believe this is mainly important because MergeConsecutiveStores
20781 // is unable to deal with merging stores of different sizes, so unless
20782 // we improve the chains of all the potential candidates up-front
20783 // before running MergeConsecutiveStores, it might only see some of
20784 // the nodes that will eventually be candidates, and then not be able
20785 // to go from a partially-merged state to the desired final
20786 // fully-merged state.
20787
20788 bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
20789   SmallVector<StoreSDNode *, 8> ChainedStores;
20790   StoreSDNode *STChain = St;
20791   // Intervals records which offsets from BaseIndex have been covered. In
20792   // the common case, every store writes to the immediately previous address
20793   // space and thus merged with the previous interval at insertion time.
20794
20795   using IMap =
20796       llvm::IntervalMap<int64_t, UnitT, 8, IntervalMapHalfOpenInfo<int64_t>>;
20797   IMap::Allocator A;
20798   IMap Intervals(A);
20799
20800   // This holds the base pointer, index, and the offset in bytes from the base
20801   // pointer.
20802   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20803
20804   // We must have a base and an offset.
20805   if (!BasePtr.getBase().getNode())
20806     return false;
20807
20808   // Do not handle stores to undef base pointers.
20809   if (BasePtr.getBase().isUndef())
20810     return false;
20811
20812   // Add ST's interval.
20813   Intervals.insert(0, (St->getMemoryVT().getSizeInBits() + 7) / 8, Unit);
20814
20815   while (StoreSDNode *Chain = dyn_cast<StoreSDNode>(STChain->getChain())) {
20816     // If the chain has more than one use, then we can't reorder the mem ops.
20817     if (!SDValue(Chain, 0)->hasOneUse())
20818       break;
20819     if (Chain->isVolatile() || Chain->isIndexed())
20820       break;
20821
20822     // Find the base pointer and offset for this memory node.
20823     const BaseIndexOffset Ptr = BaseIndexOffset::match(Chain, DAG);
20824     // Check that the base pointer is the same as the original one.
20825     int64_t Offset;
20826     if (!BasePtr.equalBaseIndex(Ptr, DAG, Offset))
20827       break;
20828     int64_t Length = (Chain->getMemoryVT().getSizeInBits() + 7) / 8;
20829     // Make sure we don't overlap with other intervals by checking the ones to
20830     // the left or right before inserting.
20831     auto I = Intervals.find(Offset);
20832     // If there's a next interval, we should end before it.
20833     if (I != Intervals.end() && I.start() < (Offset + Length))
20834       break;
20835     // If there's a previous interval, we should start after it.
20836     if (I != Intervals.begin() && (--I).stop() <= Offset)
20837       break;
20838     Intervals.insert(Offset, Offset + Length, Unit);
20839
20840     ChainedStores.push_back(Chain);
20841     STChain = Chain;
20842   }
20843
20844   // If we didn't find a chained store, exit.
20845   if (ChainedStores.size() == 0)
20846     return false;
20847
20848   // Improve all chained stores (St and ChainedStores members) starting from
20849   // where the store chain ended and return single TokenFactor.
20850   SDValue NewChain = STChain->getChain();
20851   SmallVector<SDValue, 8> TFOps;
20852   for (unsigned I = ChainedStores.size(); I;) {
20853     StoreSDNode *S = ChainedStores[--I];
20854     SDValue BetterChain = FindBetterChain(S, NewChain);
20855     S = cast<StoreSDNode>(DAG.UpdateNodeOperands(
20856         S, BetterChain, S->getOperand(1), S->getOperand(2), S->getOperand(3)));
20857     TFOps.push_back(SDValue(S, 0));
20858     ChainedStores[I] = S;
20859   }
20860
20861   // Improve St's chain. Use a new node to avoid creating a loop from CombineTo.
20862   SDValue BetterChain = FindBetterChain(St, NewChain);
20863   SDValue NewST;
20864   if (St->isTruncatingStore())
20865     NewST = DAG.getTruncStore(BetterChain, SDLoc(St), St->getValue(),
20866                               St->getBasePtr(), St->getMemoryVT(),
20867                               St->getMemOperand());
20868   else
20869     NewST = DAG.getStore(BetterChain, SDLoc(St), St->getValue(),
20870                          St->getBasePtr(), St->getMemOperand());
20871
20872   TFOps.push_back(NewST);
20873
20874   // If we improved every element of TFOps, then we've lost the dependence on
20875   // NewChain to successors of St and we need to add it back to TFOps. Do so at
20876   // the beginning to keep relative order consistent with FindBetterChains.
20877   auto hasImprovedChain = [&](SDValue ST) -> bool {
20878     return ST->getOperand(0) != NewChain;
20879   };
20880   bool AddNewChain = llvm::all_of(TFOps, hasImprovedChain);
20881   if (AddNewChain)
20882     TFOps.insert(TFOps.begin(), NewChain);
20883
20884   SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
20885   CombineTo(St, TF);
20886
20887   // Add TF and its operands to the worklist.
20888   AddToWorklist(TF.getNode());
20889   for (const SDValue &Op : TF->ops())
20890     AddToWorklist(Op.getNode());
20891   AddToWorklist(STChain);
20892   return true;
20893 }
20894
20895 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
20896   if (OptLevel == CodeGenOpt::None)
20897     return false;
20898
20899   const BaseIndexOffset BasePtr = BaseIndexOffset::match(St, DAG);
20900
20901   // We must have a base and an offset.
20902   if (!BasePtr.getBase().getNode())
20903     return false;
20904
20905   // Do not handle stores to undef base pointers.
20906   if (BasePtr.getBase().isUndef())
20907     return false;
20908
20909   // Directly improve a chain of disjoint stores starting at St.
20910   if (parallelizeChainedStores(St))
20911     return true;
20912
20913   // Improve St's Chain..
20914   SDValue BetterChain = FindBetterChain(St, St->getChain());
20915   if (St->getChain() != BetterChain) {
20916     replaceStoreChain(St, BetterChain);
20917     return true;
20918   }
20919   return false;
20920 }
20921
20922 /// This is the entry point for the file.
20923 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis *AA,
20924                            CodeGenOpt::Level OptLevel) {
20925   /// This is the main entry point to this class.
20926   DAGCombiner(*this, AA, OptLevel).Run(Level);
20927 }