llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp

   1 //===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the SelectionDAG::LegalizeVectors method.
  10 //
  11 // The vector legalizer looks for vector operations which might need to be
  12 // scalarized and legalizes them. This is a separate step from Legalize because
  13 // scalarizing can introduce illegal types.  For example, suppose we have an
  14 // ISD::SDIV of type v2i64 on x86-32.  The type is legal (for example, addition
  15 // on a v2i64 is legal), but ISD::SDIV isn't legal, so we have to unroll the
  16 // operation, which introduces nodes with the illegal type i64 which must be
  17 // expanded.  Similarly, suppose we have an ISD::SRA of type v16i8 on PowerPC;
  18 // the operation must be unrolled, which introduces nodes with the illegal
  19 // type i8 which must be promoted.
  20 //
  21 // This does not legalize vector manipulations like ISD::BUILD_VECTOR,
  22 // or operations that happen to take a vector which are custom-lowered;
  23 // the legalization for such operations never produces nodes
  24 // with illegal types, so it's okay to put off legalizing them until
  25 // SelectionDAG::Legalize runs.
  26 //
  27 //===----------------------------------------------------------------------===//
  28
  29 #include "llvm/ADT/DenseMap.h"
  30 #include "llvm/ADT/SmallVector.h"
  31 #include "llvm/Analysis/TargetLibraryInfo.h"
  32 #include "llvm/Analysis/VectorUtils.h"
  33 #include "llvm/CodeGen/ISDOpcodes.h"
  34 #include "llvm/CodeGen/SelectionDAG.h"
  35 #include "llvm/CodeGen/SelectionDAGNodes.h"
  36 #include "llvm/CodeGen/TargetLowering.h"
  37 #include "llvm/CodeGen/ValueTypes.h"
  38 #include "llvm/CodeGenTypes/MachineValueType.h"
  39 #include "llvm/IR/DataLayout.h"
  40 #include "llvm/Support/Casting.h"
  41 #include "llvm/Support/Compiler.h"
  42 #include "llvm/Support/Debug.h"
  43 #include "llvm/Support/ErrorHandling.h"
  44 #include <cassert>
  45 #include <cstdint>
  46 #include <iterator>
  47 #include <utility>
  48
  49 using namespace llvm;
  50
  51 #define DEBUG_TYPE "legalizevectorops"
  52
  53 namespace {
  54
  55 class VectorLegalizer {
  56   SelectionDAG& DAG;
  57   const TargetLowering &TLI;
  58   bool Changed = false; // Keep track of whether anything changed
  59
  60   /// For nodes that are of legal width, and that have more than one use, this
  61   /// map indicates what regularized operand to use.  This allows us to avoid
  62   /// legalizing the same thing more than once.
  63   SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes;
  64
  65   /// Adds a node to the translation cache.
  66   void AddLegalizedOperand(SDValue From, SDValue To) {
  67     LegalizedNodes.insert(std::make_pair(From, To));
  68     // If someone requests legalization of the new node, return itself.
  69     if (From != To)
  70       LegalizedNodes.insert(std::make_pair(To, To));
  71   }
  72
  73   /// Legalizes the given node.
  74   SDValue LegalizeOp(SDValue Op);
  75
  76   /// Assuming the node is legal, "legalize" the results.
  77   SDValue TranslateLegalizeResults(SDValue Op, SDNode *Result);
  78
  79   /// Make sure Results are legal and update the translation cache.
  80   SDValue RecursivelyLegalizeResults(SDValue Op,
  81                                      MutableArrayRef<SDValue> Results);
  82
  83   /// Wrapper to interface LowerOperation with a vector of Results.
  84   /// Returns false if the target wants to use default expansion. Otherwise
  85   /// returns true. If return is true and the Results are empty, then the
  86   /// target wants to keep the input node as is.
  87   bool LowerOperationWrapper(SDNode *N, SmallVectorImpl<SDValue> &Results);
  88
  89   /// Implements unrolling a VSETCC.
  90   SDValue UnrollVSETCC(SDNode *Node);
  91
  92   /// Implement expand-based legalization of vector operations.
  93   ///
  94   /// This is just a high-level routine to dispatch to specific code paths for
  95   /// operations to legalize them.
  96   void Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results);
  97
  98   /// Implements expansion for FP_TO_UINT; falls back to UnrollVectorOp if
  99   /// FP_TO_SINT isn't legal.
 100   void ExpandFP_TO_UINT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 101
 102   /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if
 103   /// SINT_TO_FLOAT and SHR on vectors isn't legal.
 104   void ExpandUINT_TO_FLOAT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 105
 106   /// Implement expansion for SIGN_EXTEND_INREG using SRL and SRA.
 107   SDValue ExpandSEXTINREG(SDNode *Node);
 108
 109   /// Implement expansion for ANY_EXTEND_VECTOR_INREG.
 110   ///
 111   /// Shuffles the low lanes of the operand into place and bitcasts to the proper
 112   /// type. The contents of the bits in the extended part of each element are
 113   /// undef.
 114   SDValue ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node);
 115
 116   /// Implement expansion for SIGN_EXTEND_VECTOR_INREG.
 117   ///
 118   /// Shuffles the low lanes of the operand into place, bitcasts to the proper
 119   /// type, then shifts left and arithmetic shifts right to introduce a sign
 120   /// extension.
 121   SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node);
 122
 123   /// Implement expansion for ZERO_EXTEND_VECTOR_INREG.
 124   ///
 125   /// Shuffles the low lanes of the operand into place and blends zeros into
 126   /// the remaining lanes, finally bitcasting to the proper type.
 127   SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node);
 128
 129   /// Expand bswap of vectors into a shuffle if legal.
 130   SDValue ExpandBSWAP(SDNode *Node);
 131
 132   /// Implement vselect in terms of XOR, AND, OR when blend is not
 133   /// supported by the target.
 134   SDValue ExpandVSELECT(SDNode *Node);
 135   SDValue ExpandVP_SELECT(SDNode *Node);
 136   SDValue ExpandVP_MERGE(SDNode *Node);
 137   SDValue ExpandVP_REM(SDNode *Node);
 138   SDValue ExpandVP_FNEG(SDNode *Node);
 139   SDValue ExpandVP_FABS(SDNode *Node);
 140   SDValue ExpandVP_FCOPYSIGN(SDNode *Node);
 141   SDValue ExpandSELECT(SDNode *Node);
 142   std::pair<SDValue, SDValue> ExpandLoad(SDNode *N);
 143   SDValue ExpandStore(SDNode *N);
 144   SDValue ExpandFNEG(SDNode *Node);
 145   SDValue ExpandFABS(SDNode *Node);
 146   SDValue ExpandFCOPYSIGN(SDNode *Node);
 147   void ExpandFSUB(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 148   void ExpandSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 149   SDValue ExpandBITREVERSE(SDNode *Node);
 150   void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 151   void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 152   void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 153   void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 154   void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 155   void ExpandREM(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 156
 157   bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
 158                             SmallVectorImpl<SDValue> &Results);
 159   bool tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall Call_F32,
 160                             RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
 161                             RTLIB::Libcall Call_F128,
 162                             RTLIB::Libcall Call_PPCF128,
 163                             SmallVectorImpl<SDValue> &Results);
 164
 165   void UnrollStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 166
 167   /// Implements vector promotion.
 168   ///
 169   /// This is essentially just bitcasting the operands to a different type and
 170   /// bitcasting the result back to the original type.
 171   void Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 172
 173   /// Implements [SU]INT_TO_FP vector promotion.
 174   ///
 175   /// This is a [zs]ext of the input operand to a larger integer type.
 176   void PromoteINT_TO_FP(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 177
 178   /// Implements FP_TO_[SU]INT vector promotion of the result type.
 179   ///
 180   /// It is promoted to a larger integer type.  The result is then
 181   /// truncated back to the original type.
 182   void PromoteFP_TO_INT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 183
 184   /// Implements vector setcc operation promotion.
 185   ///
 186   /// All vector operands are promoted to a vector type with larger element
 187   /// type.
 188   void PromoteSETCC(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 189
 190   void PromoteSTRICT(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 191
 192 public:
 193   VectorLegalizer(SelectionDAG& dag) :
 194       DAG(dag), TLI(dag.getTargetLoweringInfo()) {}
 195
 196   /// Begin legalizer the vector operations in the DAG.
 197   bool Run();
 198 };
 199
 200 } // end anonymous namespace
 201
 202 bool VectorLegalizer::Run() {
 203   // Before we start legalizing vector nodes, check if there are any vectors.
 204   bool HasVectors = false;
 205   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
 206        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I) {
 207     // Check if the values of the nodes contain vectors. We don't need to check
 208     // the operands because we are going to check their values at some point.
 209     HasVectors = llvm::any_of(I->values(), [](EVT T) { return T.isVector(); });
 210
 211     // If we found a vector node we can start the legalization.
 212     if (HasVectors)
 213       break;
 214   }
 215
 216   // If this basic block has no vectors then no need to legalize vectors.
 217   if (!HasVectors)
 218     return false;
 219
 220   // The legalize process is inherently a bottom-up recursive process (users
 221   // legalize their uses before themselves).  Given infinite stack space, we
 222   // could just start legalizing on the root and traverse the whole graph.  In
 223   // practice however, this causes us to run out of stack space on large basic
 224   // blocks.  To avoid this problem, compute an ordering of the nodes where each
 225   // node is only legalized after all of its operands are legalized.
 226   DAG.AssignTopologicalOrder();
 227   for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
 228        E = std::prev(DAG.allnodes_end()); I != std::next(E); ++I)
 229     LegalizeOp(SDValue(&*I, 0));
 230
 231   // Finally, it's possible the root changed.  Get the new root.
 232   SDValue OldRoot = DAG.getRoot();
 233   assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?");
 234   DAG.setRoot(LegalizedNodes[OldRoot]);
 235
 236   LegalizedNodes.clear();
 237
 238   // Remove dead nodes now.
 239   DAG.RemoveDeadNodes();
 240
 241   return Changed;
 242 }
 243
 244 SDValue VectorLegalizer::TranslateLegalizeResults(SDValue Op, SDNode *Result) {
 245   assert(Op->getNumValues() == Result->getNumValues() &&
 246          "Unexpected number of results");
 247   // Generic legalization: just pass the operand through.
 248   for (unsigned i = 0, e = Op->getNumValues(); i != e; ++i)
 249     AddLegalizedOperand(Op.getValue(i), SDValue(Result, i));
 250   return SDValue(Result, Op.getResNo());
 251 }
 252
 253 SDValue
 254 VectorLegalizer::RecursivelyLegalizeResults(SDValue Op,
 255                                             MutableArrayRef<SDValue> Results) {
 256   assert(Results.size() == Op->getNumValues() &&
 257          "Unexpected number of results");
 258   // Make sure that the generated code is itself legal.
 259   for (unsigned i = 0, e = Results.size(); i != e; ++i) {
 260     Results[i] = LegalizeOp(Results[i]);
 261     AddLegalizedOperand(Op.getValue(i), Results[i]);
 262   }
 263
 264   return Results[Op.getResNo()];
 265 }
 266
 267 SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
 268   // Note that LegalizeOp may be reentered even from single-use nodes, which
 269   // means that we always must cache transformed nodes.
 270   DenseMap<SDValue, SDValue>::iterator I = LegalizedNodes.find(Op);
 271   if (I != LegalizedNodes.end()) return I->second;
 272
 273   // Legalize the operands
 274   SmallVector<SDValue, 8> Ops;
 275   for (const SDValue &Oper : Op->op_values())
 276     Ops.push_back(LegalizeOp(Oper));
 277
 278   SDNode *Node = DAG.UpdateNodeOperands(Op.getNode(), Ops);
 279
 280   bool HasVectorValueOrOp =
 281       llvm::any_of(Node->values(), [](EVT T) { return T.isVector(); }) ||
 282       llvm::any_of(Node->op_values(),
 283                    [](SDValue O) { return O.getValueType().isVector(); });
 284   if (!HasVectorValueOrOp)
 285     return TranslateLegalizeResults(Op, Node);
 286
 287   TargetLowering::LegalizeAction Action = TargetLowering::Legal;
 288   EVT ValVT;
 289   switch (Op.getOpcode()) {
 290   default:
 291     return TranslateLegalizeResults(Op, Node);
 292   case ISD::LOAD: {
 293     LoadSDNode *LD = cast<LoadSDNode>(Node);
 294     ISD::LoadExtType ExtType = LD->getExtensionType();
 295     EVT LoadedVT = LD->getMemoryVT();
 296     if (LoadedVT.isVector() && ExtType != ISD::NON_EXTLOAD)
 297       Action = TLI.getLoadExtAction(ExtType, LD->getValueType(0), LoadedVT);
 298     break;
 299   }
 300   case ISD::STORE: {
 301     StoreSDNode *ST = cast<StoreSDNode>(Node);
 302     EVT StVT = ST->getMemoryVT();
 303     MVT ValVT = ST->getValue().getSimpleValueType();
 304     if (StVT.isVector() && ST->isTruncatingStore())
 305       Action = TLI.getTruncStoreAction(ValVT, StVT);
 306     break;
 307   }
 308   case ISD::MERGE_VALUES:
 309     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
 310     // This operation lies about being legal: when it claims to be legal,
 311     // it should actually be expanded.
 312     if (Action == TargetLowering::Legal)
 313       Action = TargetLowering::Expand;
 314     break;
 315 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)               \
 316   case ISD::STRICT_##DAGN:
 317 #include "llvm/IR/ConstrainedOps.def"
 318     ValVT = Node->getValueType(0);
 319     if (Op.getOpcode() == ISD::STRICT_SINT_TO_FP ||
 320         Op.getOpcode() == ISD::STRICT_UINT_TO_FP)
 321       ValVT = Node->getOperand(1).getValueType();
 322     if (Op.getOpcode() == ISD::STRICT_FSETCC ||
 323         Op.getOpcode() == ISD::STRICT_FSETCCS) {
 324       MVT OpVT = Node->getOperand(1).getSimpleValueType();
 325       ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(3))->get();
 326       Action = TLI.getCondCodeAction(CCCode, OpVT);
 327       if (Action == TargetLowering::Legal)
 328         Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
 329     } else {
 330       Action = TLI.getOperationAction(Node->getOpcode(), ValVT);
 331     }
 332     // If we're asked to expand a strict vector floating-point operation,
 333     // by default we're going to simply unroll it.  That is usually the
 334     // best approach, except in the case where the resulting strict (scalar)
 335     // operations would themselves use the fallback mutation to non-strict.
 336     // In that specific case, just do the fallback on the vector op.
 337     if (Action == TargetLowering::Expand && !TLI.isStrictFPEnabled() &&
 338         TLI.getStrictFPOperationAction(Node->getOpcode(), ValVT) ==
 339             TargetLowering::Legal) {
 340       EVT EltVT = ValVT.getVectorElementType();
 341       if (TLI.getOperationAction(Node->getOpcode(), EltVT)
 342           == TargetLowering::Expand &&
 343           TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
 344           == TargetLowering::Legal)
 345         Action = TargetLowering::Legal;
 346     }
 347     break;
 348   case ISD::ADD:
 349   case ISD::SUB:
 350   case ISD::MUL:
 351   case ISD::MULHS:
 352   case ISD::MULHU:
 353   case ISD::SDIV:
 354   case ISD::UDIV:
 355   case ISD::SREM:
 356   case ISD::UREM:
 357   case ISD::SDIVREM:
 358   case ISD::UDIVREM:
 359   case ISD::FADD:
 360   case ISD::FSUB:
 361   case ISD::FMUL:
 362   case ISD::FDIV:
 363   case ISD::FREM:
 364   case ISD::AND:
 365   case ISD::OR:
 366   case ISD::XOR:
 367   case ISD::SHL:
 368   case ISD::SRA:
 369   case ISD::SRL:
 370   case ISD::FSHL:
 371   case ISD::FSHR:
 372   case ISD::ROTL:
 373   case ISD::ROTR:
 374   case ISD::ABS:
 375   case ISD::ABDS:
 376   case ISD::ABDU:
 377   case ISD::AVGCEILS:
 378   case ISD::AVGCEILU:
 379   case ISD::AVGFLOORS:
 380   case ISD::AVGFLOORU:
 381   case ISD::BSWAP:
 382   case ISD::BITREVERSE:
 383   case ISD::CTLZ:
 384   case ISD::CTTZ:
 385   case ISD::CTLZ_ZERO_UNDEF:
 386   case ISD::CTTZ_ZERO_UNDEF:
 387   case ISD::CTPOP:
 388   case ISD::SELECT:
 389   case ISD::VSELECT:
 390   case ISD::SELECT_CC:
 391   case ISD::ZERO_EXTEND:
 392   case ISD::ANY_EXTEND:
 393   case ISD::TRUNCATE:
 394   case ISD::SIGN_EXTEND:
 395   case ISD::FP_TO_SINT:
 396   case ISD::FP_TO_UINT:
 397   case ISD::FNEG:
 398   case ISD::FABS:
 399   case ISD::FMINNUM:
 400   case ISD::FMAXNUM:
 401   case ISD::FMINNUM_IEEE:
 402   case ISD::FMAXNUM_IEEE:
 403   case ISD::FMINIMUM:
 404   case ISD::FMAXIMUM:
 405   case ISD::FMINIMUMNUM:
 406   case ISD::FMAXIMUMNUM:
 407   case ISD::FCOPYSIGN:
 408   case ISD::FSQRT:
 409   case ISD::FSIN:
 410   case ISD::FCOS:
 411   case ISD::FTAN:
 412   case ISD::FASIN:
 413   case ISD::FACOS:
 414   case ISD::FATAN:
 415   case ISD::FATAN2:
 416   case ISD::FSINH:
 417   case ISD::FCOSH:
 418   case ISD::FTANH:
 419   case ISD::FLDEXP:
 420   case ISD::FPOWI:
 421   case ISD::FPOW:
 422   case ISD::FLOG:
 423   case ISD::FLOG2:
 424   case ISD::FLOG10:
 425   case ISD::FEXP:
 426   case ISD::FEXP2:
 427   case ISD::FEXP10:
 428   case ISD::FCEIL:
 429   case ISD::FTRUNC:
 430   case ISD::FRINT:
 431   case ISD::FNEARBYINT:
 432   case ISD::FROUND:
 433   case ISD::FROUNDEVEN:
 434   case ISD::FFLOOR:
 435   case ISD::FP_ROUND:
 436   case ISD::FP_EXTEND:
 437   case ISD::FPTRUNC_ROUND:
 438   case ISD::FMA:
 439   case ISD::SIGN_EXTEND_INREG:
 440   case ISD::ANY_EXTEND_VECTOR_INREG:
 441   case ISD::SIGN_EXTEND_VECTOR_INREG:
 442   case ISD::ZERO_EXTEND_VECTOR_INREG:
 443   case ISD::SMIN:
 444   case ISD::SMAX:
 445   case ISD::UMIN:
 446   case ISD::UMAX:
 447   case ISD::SMUL_LOHI:
 448   case ISD::UMUL_LOHI:
 449   case ISD::SADDO:
 450   case ISD::UADDO:
 451   case ISD::SSUBO:
 452   case ISD::USUBO:
 453   case ISD::SMULO:
 454   case ISD::UMULO:
 455   case ISD::FCANONICALIZE:
 456   case ISD::FFREXP:
 457   case ISD::FSINCOS:
 458   case ISD::SADDSAT:
 459   case ISD::UADDSAT:
 460   case ISD::SSUBSAT:
 461   case ISD::USUBSAT:
 462   case ISD::SSHLSAT:
 463   case ISD::USHLSAT:
 464   case ISD::FP_TO_SINT_SAT:
 465   case ISD::FP_TO_UINT_SAT:
 466   case ISD::MGATHER:
 467   case ISD::VECTOR_COMPRESS:
 468   case ISD::SCMP:
 469   case ISD::UCMP:
 470     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
 471     break;
 472   case ISD::SMULFIX:
 473   case ISD::SMULFIXSAT:
 474   case ISD::UMULFIX:
 475   case ISD::UMULFIXSAT:
 476   case ISD::SDIVFIX:
 477   case ISD::SDIVFIXSAT:
 478   case ISD::UDIVFIX:
 479   case ISD::UDIVFIXSAT: {
 480     unsigned Scale = Node->getConstantOperandVal(2);
 481     Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
 482                                               Node->getValueType(0), Scale);
 483     break;
 484   }
 485   case ISD::LROUND:
 486   case ISD::LLROUND:
 487   case ISD::LRINT:
 488   case ISD::LLRINT:
 489   case ISD::SINT_TO_FP:
 490   case ISD::UINT_TO_FP:
 491   case ISD::VECREDUCE_ADD:
 492   case ISD::VECREDUCE_MUL:
 493   case ISD::VECREDUCE_AND:
 494   case ISD::VECREDUCE_OR:
 495   case ISD::VECREDUCE_XOR:
 496   case ISD::VECREDUCE_SMAX:
 497   case ISD::VECREDUCE_SMIN:
 498   case ISD::VECREDUCE_UMAX:
 499   case ISD::VECREDUCE_UMIN:
 500   case ISD::VECREDUCE_FADD:
 501   case ISD::VECREDUCE_FMUL:
 502   case ISD::VECREDUCE_FMAX:
 503   case ISD::VECREDUCE_FMIN:
 504   case ISD::VECREDUCE_FMAXIMUM:
 505   case ISD::VECREDUCE_FMINIMUM:
 506     Action = TLI.getOperationAction(Node->getOpcode(),
 507                                     Node->getOperand(0).getValueType());
 508     break;
 509   case ISD::VECREDUCE_SEQ_FADD:
 510   case ISD::VECREDUCE_SEQ_FMUL:
 511     Action = TLI.getOperationAction(Node->getOpcode(),
 512                                     Node->getOperand(1).getValueType());
 513     break;
 514   case ISD::SETCC: {
 515     MVT OpVT = Node->getOperand(0).getSimpleValueType();
 516     ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get();
 517     Action = TLI.getCondCodeAction(CCCode, OpVT);
 518     if (Action == TargetLowering::Legal)
 519       Action = TLI.getOperationAction(Node->getOpcode(), OpVT);
 520     break;
 521   }
 522
 523 #define BEGIN_REGISTER_VP_SDNODE(VPID, LEGALPOS, ...)                          \
 524   case ISD::VPID: {                                                            \
 525     EVT LegalizeVT = LEGALPOS < 0 ? Node->getValueType(-(1 + LEGALPOS))        \
 526                                   : Node->getOperand(LEGALPOS).getValueType(); \
 527     if (ISD::VPID == ISD::VP_SETCC) {                                          \
 528       ISD::CondCode CCCode = cast<CondCodeSDNode>(Node->getOperand(2))->get(); \
 529       Action = TLI.getCondCodeAction(CCCode, LegalizeVT.getSimpleVT());        \
 530       if (Action != TargetLowering::Legal)                                     \
 531         break;                                                                 \
 532     }                                                                          \
 533     /* Defer non-vector results to LegalizeDAG. */                             \
 534     if (!Node->getValueType(0).isVector() &&                                   \
 535         Node->getValueType(0) != MVT::Other) {                                 \
 536       Action = TargetLowering::Legal;                                          \
 537       break;                                                                   \
 538     }                                                                          \
 539     Action = TLI.getOperationAction(Node->getOpcode(), LegalizeVT);            \
 540   } break;
 541 #include "llvm/IR/VPIntrinsics.def"
 542   }
 543
 544   LLVM_DEBUG(dbgs() << "\nLegalizing vector op: "; Node->dump(&DAG));
 545
 546   SmallVector<SDValue, 8> ResultVals;
 547   switch (Action) {
 548   default: llvm_unreachable("This action is not supported yet!");
 549   case TargetLowering::Promote:
 550     assert((Op.getOpcode() != ISD::LOAD && Op.getOpcode() != ISD::STORE) &&
 551            "This action is not supported yet!");
 552     LLVM_DEBUG(dbgs() << "Promoting\n");
 553     Promote(Node, ResultVals);
 554     assert(!ResultVals.empty() && "No results for promotion?");
 555     break;
 556   case TargetLowering::Legal:
 557     LLVM_DEBUG(dbgs() << "Legal node: nothing to do\n");
 558     break;
 559   case TargetLowering::Custom:
 560     LLVM_DEBUG(dbgs() << "Trying custom legalization\n");
 561     if (LowerOperationWrapper(Node, ResultVals))
 562       break;
 563     LLVM_DEBUG(dbgs() << "Could not custom legalize node\n");
 564     [[fallthrough]];
 565   case TargetLowering::Expand:
 566     LLVM_DEBUG(dbgs() << "Expanding\n");
 567     Expand(Node, ResultVals);
 568     break;
 569   }
 570
 571   if (ResultVals.empty())
 572     return TranslateLegalizeResults(Op, Node);
 573
 574   Changed = true;
 575   return RecursivelyLegalizeResults(Op, ResultVals);
 576 }
 577
 578 // FIXME: This is very similar to TargetLowering::LowerOperationWrapper. Can we
 579 // merge them somehow?
 580 bool VectorLegalizer::LowerOperationWrapper(SDNode *Node,
 581                                             SmallVectorImpl<SDValue> &Results) {
 582   SDValue Res = TLI.LowerOperation(SDValue(Node, 0), DAG);
 583
 584   if (!Res.getNode())
 585     return false;
 586
 587   if (Res == SDValue(Node, 0))
 588     return true;
 589
 590   // If the original node has one result, take the return value from
 591   // LowerOperation as is. It might not be result number 0.
 592   if (Node->getNumValues() == 1) {
 593     Results.push_back(Res);
 594     return true;
 595   }
 596
 597   // If the original node has multiple results, then the return node should
 598   // have the same number of results.
 599   assert((Node->getNumValues() == Res->getNumValues()) &&
 600          "Lowering returned the wrong number of results!");
 601
 602   // Places new result values base on N result number.
 603   for (unsigned I = 0, E = Node->getNumValues(); I != E; ++I)
 604     Results.push_back(Res.getValue(I));
 605
 606   return true;
 607 }
 608
 609 void VectorLegalizer::PromoteSETCC(SDNode *Node,
 610                                    SmallVectorImpl<SDValue> &Results) {
 611   MVT VecVT = Node->getOperand(0).getSimpleValueType();
 612   MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
 613
 614   unsigned ExtOp = VecVT.isFloatingPoint() ? ISD::FP_EXTEND : ISD::ANY_EXTEND;
 615
 616   SDLoc DL(Node);
 617   SmallVector<SDValue, 5> Operands(Node->getNumOperands());
 618
 619   Operands[0] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(0));
 620   Operands[1] = DAG.getNode(ExtOp, DL, NewVecVT, Node->getOperand(1));
 621   Operands[2] = Node->getOperand(2);
 622
 623   if (Node->getOpcode() == ISD::VP_SETCC) {
 624     Operands[3] = Node->getOperand(3); // mask
 625     Operands[4] = Node->getOperand(4); // evl
 626   }
 627
 628   SDValue Res = DAG.getNode(Node->getOpcode(), DL, Node->getSimpleValueType(0),
 629                             Operands, Node->getFlags());
 630
 631   Results.push_back(Res);
 632 }
 633
 634 void VectorLegalizer::PromoteSTRICT(SDNode *Node,
 635                                     SmallVectorImpl<SDValue> &Results) {
 636   MVT VecVT = Node->getOperand(1).getSimpleValueType();
 637   MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT);
 638
 639   assert(VecVT.isFloatingPoint());
 640
 641   SDLoc DL(Node);
 642   SmallVector<SDValue, 5> Operands(Node->getNumOperands());
 643   SmallVector<SDValue, 2> Chains;
 644
 645   for (unsigned j = 1; j != Node->getNumOperands(); ++j)
 646     if (Node->getOperand(j).getValueType().isVector() &&
 647         !(ISD::isVPOpcode(Node->getOpcode()) &&
 648           ISD::getVPMaskIdx(Node->getOpcode()) == j)) // Skip mask operand.
 649     {
 650       // promote the vector operand.
 651       SDValue Ext =
 652           DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVecVT, MVT::Other},
 653                       {Node->getOperand(0), Node->getOperand(j)});
 654       Operands[j] = Ext.getValue(0);
 655       Chains.push_back(Ext.getValue(1));
 656     } else
 657       Operands[j] = Node->getOperand(j); // Skip no vector operand.
 658
 659   SDVTList VTs = DAG.getVTList(NewVecVT, Node->getValueType(1));
 660
 661   Operands[0] = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
 662
 663   SDValue Res =
 664       DAG.getNode(Node->getOpcode(), DL, VTs, Operands, Node->getFlags());
 665
 666   SDValue Round =
 667       DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VecVT, MVT::Other},
 668                   {Res.getValue(1), Res.getValue(0),
 669                    DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
 670
 671   Results.push_back(Round.getValue(0));
 672   Results.push_back(Round.getValue(1));
 673 }
 674
 675 void VectorLegalizer::Promote(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
 676   // For a few operations there is a specific concept for promotion based on
 677   // the operand's type.
 678   switch (Node->getOpcode()) {
 679   case ISD::SINT_TO_FP:
 680   case ISD::UINT_TO_FP:
 681   case ISD::STRICT_SINT_TO_FP:
 682   case ISD::STRICT_UINT_TO_FP:
 683     // "Promote" the operation by extending the operand.
 684     PromoteINT_TO_FP(Node, Results);
 685     return;
 686   case ISD::FP_TO_UINT:
 687   case ISD::FP_TO_SINT:
 688   case ISD::STRICT_FP_TO_UINT:
 689   case ISD::STRICT_FP_TO_SINT:
 690     // Promote the operation by extending the operand.
 691     PromoteFP_TO_INT(Node, Results);
 692     return;
 693   case ISD::VP_SETCC:
 694   case ISD::SETCC:
 695     // Promote the operation by extending the operand.
 696     PromoteSETCC(Node, Results);
 697     return;
 698   case ISD::STRICT_FADD:
 699   case ISD::STRICT_FSUB:
 700   case ISD::STRICT_FMUL:
 701   case ISD::STRICT_FDIV:
 702   case ISD::STRICT_FSQRT:
 703   case ISD::STRICT_FMA:
 704     PromoteSTRICT(Node, Results);
 705     return;
 706   case ISD::FP_ROUND:
 707   case ISD::FP_EXTEND:
 708     // These operations are used to do promotion so they can't be promoted
 709     // themselves.
 710     llvm_unreachable("Don't know how to promote this operation!");
 711   case ISD::VP_FABS:
 712   case ISD::VP_FCOPYSIGN:
 713   case ISD::VP_FNEG:
 714     // Promoting fabs, fneg, and fcopysign changes their semantics.
 715     llvm_unreachable("These operations should not be promoted");
 716   }
 717
 718   // There are currently two cases of vector promotion:
 719   // 1) Bitcasting a vector of integers to a different type to a vector of the
 720   //    same overall length. For example, x86 promotes ISD::AND v2i32 to v1i64.
 721   // 2) Extending a vector of floats to a vector of the same number of larger
 722   //    floats. For example, AArch64 promotes ISD::FADD on v4f16 to v4f32.
 723   assert(Node->getNumValues() == 1 &&
 724          "Can't promote a vector with multiple results!");
 725   MVT VT = Node->getSimpleValueType(0);
 726   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
 727   SDLoc dl(Node);
 728   SmallVector<SDValue, 4> Operands(Node->getNumOperands());
 729
 730   for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
 731     // Do not promote the mask operand of a VP OP.
 732     bool SkipPromote = ISD::isVPOpcode(Node->getOpcode()) &&
 733                        ISD::getVPMaskIdx(Node->getOpcode()) == j;
 734     if (Node->getOperand(j).getValueType().isVector() && !SkipPromote)
 735       if (Node->getOperand(j)
 736               .getValueType()
 737               .getVectorElementType()
 738               .isFloatingPoint() &&
 739           NVT.isVector() && NVT.getVectorElementType().isFloatingPoint())
 740         Operands[j] = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(j));
 741       else
 742         Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(j));
 743     else
 744       Operands[j] = Node->getOperand(j);
 745   }
 746
 747   SDValue Res =
 748       DAG.getNode(Node->getOpcode(), dl, NVT, Operands, Node->getFlags());
 749
 750   if ((VT.isFloatingPoint() && NVT.isFloatingPoint()) ||
 751       (VT.isVector() && VT.getVectorElementType().isFloatingPoint() &&
 752        NVT.isVector() && NVT.getVectorElementType().isFloatingPoint()))
 753     Res = DAG.getNode(ISD::FP_ROUND, dl, VT, Res,
 754                       DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
 755   else
 756     Res = DAG.getNode(ISD::BITCAST, dl, VT, Res);
 757
 758   Results.push_back(Res);
 759 }
 760
 761 void VectorLegalizer::PromoteINT_TO_FP(SDNode *Node,
 762                                        SmallVectorImpl<SDValue> &Results) {
 763   // INT_TO_FP operations may require the input operand be promoted even
 764   // when the type is otherwise legal.
 765   bool IsStrict = Node->isStrictFPOpcode();
 766   MVT VT = Node->getOperand(IsStrict ? 1 : 0).getSimpleValueType();
 767   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
 768   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
 769          "Vectors have different number of elements!");
 770
 771   SDLoc dl(Node);
 772   SmallVector<SDValue, 4> Operands(Node->getNumOperands());
 773
 774   unsigned Opc = (Node->getOpcode() == ISD::UINT_TO_FP ||
 775                   Node->getOpcode() == ISD::STRICT_UINT_TO_FP)
 776                      ? ISD::ZERO_EXTEND
 777                      : ISD::SIGN_EXTEND;
 778   for (unsigned j = 0; j != Node->getNumOperands(); ++j) {
 779     if (Node->getOperand(j).getValueType().isVector())
 780       Operands[j] = DAG.getNode(Opc, dl, NVT, Node->getOperand(j));
 781     else
 782       Operands[j] = Node->getOperand(j);
 783   }
 784
 785   if (IsStrict) {
 786     SDValue Res = DAG.getNode(Node->getOpcode(), dl,
 787                               {Node->getValueType(0), MVT::Other}, Operands);
 788     Results.push_back(Res);
 789     Results.push_back(Res.getValue(1));
 790     return;
 791   }
 792
 793   SDValue Res =
 794       DAG.getNode(Node->getOpcode(), dl, Node->getValueType(0), Operands);
 795   Results.push_back(Res);
 796 }
 797
 798 // For FP_TO_INT we promote the result type to a vector type with wider
 799 // elements and then truncate the result.  This is different from the default
 800 // PromoteVector which uses bitcast to promote thus assumning that the
 801 // promoted vector type has the same overall size.
 802 void VectorLegalizer::PromoteFP_TO_INT(SDNode *Node,
 803                                        SmallVectorImpl<SDValue> &Results) {
 804   MVT VT = Node->getSimpleValueType(0);
 805   MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
 806   bool IsStrict = Node->isStrictFPOpcode();
 807   assert(NVT.getVectorNumElements() == VT.getVectorNumElements() &&
 808          "Vectors have different number of elements!");
 809
 810   unsigned NewOpc = Node->getOpcode();
 811   // Change FP_TO_UINT to FP_TO_SINT if possible.
 812   // TODO: Should we only do this if FP_TO_UINT itself isn't legal?
 813   if (NewOpc == ISD::FP_TO_UINT &&
 814       TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
 815     NewOpc = ISD::FP_TO_SINT;
 816
 817   if (NewOpc == ISD::STRICT_FP_TO_UINT &&
 818       TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
 819     NewOpc = ISD::STRICT_FP_TO_SINT;
 820
 821   SDLoc dl(Node);
 822   SDValue Promoted, Chain;
 823   if (IsStrict) {
 824     Promoted = DAG.getNode(NewOpc, dl, {NVT, MVT::Other},
 825                            {Node->getOperand(0), Node->getOperand(1)});
 826     Chain = Promoted.getValue(1);
 827   } else
 828     Promoted = DAG.getNode(NewOpc, dl, NVT, Node->getOperand(0));
 829
 830   // Assert that the converted value fits in the original type.  If it doesn't
 831   // (eg: because the value being converted is too big), then the result of the
 832   // original operation was undefined anyway, so the assert is still correct.
 833   if (Node->getOpcode() == ISD::FP_TO_UINT ||
 834       Node->getOpcode() == ISD::STRICT_FP_TO_UINT)
 835     NewOpc = ISD::AssertZext;
 836   else
 837     NewOpc = ISD::AssertSext;
 838
 839   Promoted = DAG.getNode(NewOpc, dl, NVT, Promoted,
 840                          DAG.getValueType(VT.getScalarType()));
 841   Promoted = DAG.getNode(ISD::TRUNCATE, dl, VT, Promoted);
 842   Results.push_back(Promoted);
 843   if (IsStrict)
 844     Results.push_back(Chain);
 845 }
 846
 847 std::pair<SDValue, SDValue> VectorLegalizer::ExpandLoad(SDNode *N) {
 848   LoadSDNode *LD = cast<LoadSDNode>(N);
 849   return TLI.scalarizeVectorLoad(LD, DAG);
 850 }
 851
 852 SDValue VectorLegalizer::ExpandStore(SDNode *N) {
 853   StoreSDNode *ST = cast<StoreSDNode>(N);
 854   SDValue TF = TLI.scalarizeVectorStore(ST, DAG);
 855   return TF;
 856 }
 857
 858 void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
 859   switch (Node->getOpcode()) {
 860   case ISD::LOAD: {
 861     std::pair<SDValue, SDValue> Tmp = ExpandLoad(Node);
 862     Results.push_back(Tmp.first);
 863     Results.push_back(Tmp.second);
 864     return;
 865   }
 866   case ISD::STORE:
 867     Results.push_back(ExpandStore(Node));
 868     return;
 869   case ISD::MERGE_VALUES:
 870     for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
 871       Results.push_back(Node->getOperand(i));
 872     return;
 873   case ISD::SIGN_EXTEND_INREG:
 874     if (SDValue Expanded = ExpandSEXTINREG(Node)) {
 875       Results.push_back(Expanded);
 876       return;
 877     }
 878     break;
 879   case ISD::ANY_EXTEND_VECTOR_INREG:
 880     Results.push_back(ExpandANY_EXTEND_VECTOR_INREG(Node));
 881     return;
 882   case ISD::SIGN_EXTEND_VECTOR_INREG:
 883     Results.push_back(ExpandSIGN_EXTEND_VECTOR_INREG(Node));
 884     return;
 885   case ISD::ZERO_EXTEND_VECTOR_INREG:
 886     Results.push_back(ExpandZERO_EXTEND_VECTOR_INREG(Node));
 887     return;
 888   case ISD::BSWAP:
 889     if (SDValue Expanded = ExpandBSWAP(Node)) {
 890       Results.push_back(Expanded);
 891       return;
 892     }
 893     break;
 894   case ISD::VP_BSWAP:
 895     Results.push_back(TLI.expandVPBSWAP(Node, DAG));
 896     return;
 897   case ISD::VSELECT:
 898     if (SDValue Expanded = ExpandVSELECT(Node)) {
 899       Results.push_back(Expanded);
 900       return;
 901     }
 902     break;
 903   case ISD::VP_SELECT:
 904     if (SDValue Expanded = ExpandVP_SELECT(Node)) {
 905       Results.push_back(Expanded);
 906       return;
 907     }
 908     break;
 909   case ISD::VP_SREM:
 910   case ISD::VP_UREM:
 911     if (SDValue Expanded = ExpandVP_REM(Node)) {
 912       Results.push_back(Expanded);
 913       return;
 914     }
 915     break;
 916   case ISD::VP_FNEG:
 917     if (SDValue Expanded = ExpandVP_FNEG(Node)) {
 918       Results.push_back(Expanded);
 919       return;
 920     }
 921     break;
 922   case ISD::VP_FABS:
 923     if (SDValue Expanded = ExpandVP_FABS(Node)) {
 924       Results.push_back(Expanded);
 925       return;
 926     }
 927     break;
 928   case ISD::VP_FCOPYSIGN:
 929     if (SDValue Expanded = ExpandVP_FCOPYSIGN(Node)) {
 930       Results.push_back(Expanded);
 931       return;
 932     }
 933     break;
 934   case ISD::SELECT:
 935     if (SDValue Expanded = ExpandSELECT(Node)) {
 936       Results.push_back(Expanded);
 937       return;
 938     }
 939     break;
 940   case ISD::SELECT_CC: {
 941     if (Node->getValueType(0).isScalableVector()) {
 942       EVT CondVT = TLI.getSetCCResultType(
 943           DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
 944       SDValue SetCC =
 945           DAG.getNode(ISD::SETCC, SDLoc(Node), CondVT, Node->getOperand(0),
 946                       Node->getOperand(1), Node->getOperand(4));
 947       Results.push_back(DAG.getSelect(SDLoc(Node), Node->getValueType(0), SetCC,
 948                                       Node->getOperand(2),
 949                                       Node->getOperand(3)));
 950       return;
 951     }
 952     break;
 953   }
 954   case ISD::FP_TO_UINT:
 955     ExpandFP_TO_UINT(Node, Results);
 956     return;
 957   case ISD::UINT_TO_FP:
 958     ExpandUINT_TO_FLOAT(Node, Results);
 959     return;
 960   case ISD::FNEG:
 961     if (SDValue Expanded = ExpandFNEG(Node)) {
 962       Results.push_back(Expanded);
 963       return;
 964     }
 965     break;
 966   case ISD::FABS:
 967     if (SDValue Expanded = ExpandFABS(Node)) {
 968       Results.push_back(Expanded);
 969       return;
 970     }
 971     break;
 972   case ISD::FCOPYSIGN:
 973     if (SDValue Expanded = ExpandFCOPYSIGN(Node)) {
 974       Results.push_back(Expanded);
 975       return;
 976     }
 977     break;
 978   case ISD::FSUB:
 979     ExpandFSUB(Node, Results);
 980     return;
 981   case ISD::SETCC:
 982   case ISD::VP_SETCC:
 983     ExpandSETCC(Node, Results);
 984     return;
 985   case ISD::ABS:
 986     if (SDValue Expanded = TLI.expandABS(Node, DAG)) {
 987       Results.push_back(Expanded);
 988       return;
 989     }
 990     break;
 991   case ISD::ABDS:
 992   case ISD::ABDU:
 993     if (SDValue Expanded = TLI.expandABD(Node, DAG)) {
 994       Results.push_back(Expanded);
 995       return;
 996     }
 997     break;
 998   case ISD::AVGCEILS:
 999   case ISD::AVGCEILU:
1000   case ISD::AVGFLOORS:
1001   case ISD::AVGFLOORU:
1002     if (SDValue Expanded = TLI.expandAVG(Node, DAG)) {
1003       Results.push_back(Expanded);
1004       return;
1005     }
1006     break;
1007   case ISD::BITREVERSE:
1008     if (SDValue Expanded = ExpandBITREVERSE(Node)) {
1009       Results.push_back(Expanded);
1010       return;
1011     }
1012     break;
1013   case ISD::VP_BITREVERSE:
1014     if (SDValue Expanded = TLI.expandVPBITREVERSE(Node, DAG)) {
1015       Results.push_back(Expanded);
1016       return;
1017     }
1018     break;
1019   case ISD::CTPOP:
1020     if (SDValue Expanded = TLI.expandCTPOP(Node, DAG)) {
1021       Results.push_back(Expanded);
1022       return;
1023     }
1024     break;
1025   case ISD::VP_CTPOP:
1026     if (SDValue Expanded = TLI.expandVPCTPOP(Node, DAG)) {
1027       Results.push_back(Expanded);
1028       return;
1029     }
1030     break;
1031   case ISD::CTLZ:
1032   case ISD::CTLZ_ZERO_UNDEF:
1033     if (SDValue Expanded = TLI.expandCTLZ(Node, DAG)) {
1034       Results.push_back(Expanded);
1035       return;
1036     }
1037     break;
1038   case ISD::VP_CTLZ:
1039   case ISD::VP_CTLZ_ZERO_UNDEF:
1040     if (SDValue Expanded = TLI.expandVPCTLZ(Node, DAG)) {
1041       Results.push_back(Expanded);
1042       return;
1043     }
1044     break;
1045   case ISD::CTTZ:
1046   case ISD::CTTZ_ZERO_UNDEF:
1047     if (SDValue Expanded = TLI.expandCTTZ(Node, DAG)) {
1048       Results.push_back(Expanded);
1049       return;
1050     }
1051     break;
1052   case ISD::VP_CTTZ:
1053   case ISD::VP_CTTZ_ZERO_UNDEF:
1054     if (SDValue Expanded = TLI.expandVPCTTZ(Node, DAG)) {
1055       Results.push_back(Expanded);
1056       return;
1057     }
1058     break;
1059   case ISD::FSHL:
1060   case ISD::VP_FSHL:
1061   case ISD::FSHR:
1062   case ISD::VP_FSHR:
1063     if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG)) {
1064       Results.push_back(Expanded);
1065       return;
1066     }
1067     break;
1068   case ISD::ROTL:
1069   case ISD::ROTR:
1070     if (SDValue Expanded = TLI.expandROT(Node, false /*AllowVectorOps*/, DAG)) {
1071       Results.push_back(Expanded);
1072       return;
1073     }
1074     break;
1075   case ISD::FMINNUM:
1076   case ISD::FMAXNUM:
1077     if (SDValue Expanded = TLI.expandFMINNUM_FMAXNUM(Node, DAG)) {
1078       Results.push_back(Expanded);
1079       return;
1080     }
1081     break;
1082   case ISD::FMINIMUM:
1083   case ISD::FMAXIMUM:
1084     Results.push_back(TLI.expandFMINIMUM_FMAXIMUM(Node, DAG));
1085     return;
1086   case ISD::FMINIMUMNUM:
1087   case ISD::FMAXIMUMNUM:
1088     Results.push_back(TLI.expandFMINIMUMNUM_FMAXIMUMNUM(Node, DAG));
1089     return;
1090   case ISD::SMIN:
1091   case ISD::SMAX:
1092   case ISD::UMIN:
1093   case ISD::UMAX:
1094     if (SDValue Expanded = TLI.expandIntMINMAX(Node, DAG)) {
1095       Results.push_back(Expanded);
1096       return;
1097     }
1098     break;
1099   case ISD::UADDO:
1100   case ISD::USUBO:
1101     ExpandUADDSUBO(Node, Results);
1102     return;
1103   case ISD::SADDO:
1104   case ISD::SSUBO:
1105     ExpandSADDSUBO(Node, Results);
1106     return;
1107   case ISD::UMULO:
1108   case ISD::SMULO:
1109     ExpandMULO(Node, Results);
1110     return;
1111   case ISD::USUBSAT:
1112   case ISD::SSUBSAT:
1113   case ISD::UADDSAT:
1114   case ISD::SADDSAT:
1115     if (SDValue Expanded = TLI.expandAddSubSat(Node, DAG)) {
1116       Results.push_back(Expanded);
1117       return;
1118     }
1119     break;
1120   case ISD::USHLSAT:
1121   case ISD::SSHLSAT:
1122     if (SDValue Expanded = TLI.expandShlSat(Node, DAG)) {
1123       Results.push_back(Expanded);
1124       return;
1125     }
1126     break;
1127   case ISD::FP_TO_SINT_SAT:
1128   case ISD::FP_TO_UINT_SAT:
1129     // Expand the fpsosisat if it is scalable to prevent it from unrolling below.
1130     if (Node->getValueType(0).isScalableVector()) {
1131       if (SDValue Expanded = TLI.expandFP_TO_INT_SAT(Node, DAG)) {
1132         Results.push_back(Expanded);
1133         return;
1134       }
1135     }
1136     break;
1137   case ISD::SMULFIX:
1138   case ISD::UMULFIX:
1139     if (SDValue Expanded = TLI.expandFixedPointMul(Node, DAG)) {
1140       Results.push_back(Expanded);
1141       return;
1142     }
1143     break;
1144   case ISD::SMULFIXSAT:
1145   case ISD::UMULFIXSAT:
1146     // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
1147     // why. Maybe it results in worse codegen compared to the unroll for some
1148     // targets? This should probably be investigated. And if we still prefer to
1149     // unroll an explanation could be helpful.
1150     break;
1151   case ISD::SDIVFIX:
1152   case ISD::UDIVFIX:
1153     ExpandFixedPointDiv(Node, Results);
1154     return;
1155   case ISD::SDIVFIXSAT:
1156   case ISD::UDIVFIXSAT:
1157     break;
1158 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)               \
1159   case ISD::STRICT_##DAGN:
1160 #include "llvm/IR/ConstrainedOps.def"
1161     ExpandStrictFPOp(Node, Results);
1162     return;
1163   case ISD::VECREDUCE_ADD:
1164   case ISD::VECREDUCE_MUL:
1165   case ISD::VECREDUCE_AND:
1166   case ISD::VECREDUCE_OR:
1167   case ISD::VECREDUCE_XOR:
1168   case ISD::VECREDUCE_SMAX:
1169   case ISD::VECREDUCE_SMIN:
1170   case ISD::VECREDUCE_UMAX:
1171   case ISD::VECREDUCE_UMIN:
1172   case ISD::VECREDUCE_FADD:
1173   case ISD::VECREDUCE_FMUL:
1174   case ISD::VECREDUCE_FMAX:
1175   case ISD::VECREDUCE_FMIN:
1176   case ISD::VECREDUCE_FMAXIMUM:
1177   case ISD::VECREDUCE_FMINIMUM:
1178     Results.push_back(TLI.expandVecReduce(Node, DAG));
1179     return;
1180   case ISD::VECREDUCE_SEQ_FADD:
1181   case ISD::VECREDUCE_SEQ_FMUL:
1182     Results.push_back(TLI.expandVecReduceSeq(Node, DAG));
1183     return;
1184   case ISD::SREM:
1185   case ISD::UREM:
1186     ExpandREM(Node, Results);
1187     return;
1188   case ISD::VP_MERGE:
1189     if (SDValue Expanded = ExpandVP_MERGE(Node)) {
1190       Results.push_back(Expanded);
1191       return;
1192     }
1193     break;
1194   case ISD::FREM:
1195     if (tryExpandVecMathCall(Node, RTLIB::REM_F32, RTLIB::REM_F64,
1196                              RTLIB::REM_F80, RTLIB::REM_F128,
1197                              RTLIB::REM_PPCF128, Results))
1198       return;
1199
1200     break;
1201   case ISD::FSINCOS: {
1202     RTLIB::Libcall LC =
1203         RTLIB::getFSINCOS(Node->getValueType(0).getVectorElementType());
1204     if (DAG.expandMultipleResultFPLibCall(LC, Node, Results))
1205       return;
1206     break;
1207   }
1208   case ISD::VECTOR_COMPRESS:
1209     Results.push_back(TLI.expandVECTOR_COMPRESS(Node, DAG));
1210     return;
1211   case ISD::SCMP:
1212   case ISD::UCMP:
1213     Results.push_back(TLI.expandCMP(Node, DAG));
1214     return;
1215
1216   case ISD::FADD:
1217   case ISD::FMUL:
1218   case ISD::FMA:
1219   case ISD::FDIV:
1220   case ISD::FCEIL:
1221   case ISD::FFLOOR:
1222   case ISD::FNEARBYINT:
1223   case ISD::FRINT:
1224   case ISD::FROUND:
1225   case ISD::FROUNDEVEN:
1226   case ISD::FTRUNC:
1227   case ISD::FSQRT:
1228     if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1229       Results.push_back(Expanded);
1230       return;
1231     }
1232     break;
1233   }
1234
1235   SDValue Unrolled = DAG.UnrollVectorOp(Node);
1236   if (Node->getNumValues() == 1) {
1237     Results.push_back(Unrolled);
1238   } else {
1239     assert(Node->getNumValues() == Unrolled->getNumValues() &&
1240       "VectorLegalizer Expand returned wrong number of results!");
1241     for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I)
1242       Results.push_back(Unrolled.getValue(I));
1243   }
1244 }
1245
1246 SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) {
1247   // Lower a select instruction where the condition is a scalar and the
1248   // operands are vectors. Lower this select to VSELECT and implement it
1249   // using XOR AND OR. The selector bit is broadcasted.
1250   EVT VT = Node->getValueType(0);
1251   SDLoc DL(Node);
1252
1253   SDValue Mask = Node->getOperand(0);
1254   SDValue Op1 = Node->getOperand(1);
1255   SDValue Op2 = Node->getOperand(2);
1256
1257   assert(VT.isVector() && !Mask.getValueType().isVector()
1258          && Op1.getValueType() == Op2.getValueType() && "Invalid type");
1259
1260   // If we can't even use the basic vector operations of
1261   // AND,OR,XOR, we will have to scalarize the op.
1262   // Notice that the operation may be 'promoted' which means that it is
1263   // 'bitcasted' to another type which is handled.
1264   // Also, we need to be able to construct a splat vector using either
1265   // BUILD_VECTOR or SPLAT_VECTOR.
1266   // FIXME: Should we also permit fixed-length SPLAT_VECTOR as a fallback to
1267   // BUILD_VECTOR?
1268   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1269       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1270       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand ||
1271       TLI.getOperationAction(VT.isFixedLengthVector() ? ISD::BUILD_VECTOR
1272                                                       : ISD::SPLAT_VECTOR,
1273                              VT) == TargetLowering::Expand)
1274     return SDValue();
1275
1276   // Generate a mask operand.
1277   EVT MaskTy = VT.changeVectorElementTypeToInteger();
1278
1279   // What is the size of each element in the vector mask.
1280   EVT BitTy = MaskTy.getScalarType();
1281
1282   Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getAllOnesConstant(DL, BitTy),
1283                        DAG.getConstant(0, DL, BitTy));
1284
1285   // Broadcast the mask so that the entire vector is all one or all zero.
1286   Mask = DAG.getSplat(MaskTy, DL, Mask);
1287
1288   // Bitcast the operands to be the same type as the mask.
1289   // This is needed when we select between FP types because
1290   // the mask is a vector of integers.
1291   Op1 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op1);
1292   Op2 = DAG.getNode(ISD::BITCAST, DL, MaskTy, Op2);
1293
1294   SDValue NotMask = DAG.getNOT(DL, Mask, MaskTy);
1295
1296   Op1 = DAG.getNode(ISD::AND, DL, MaskTy, Op1, Mask);
1297   Op2 = DAG.getNode(ISD::AND, DL, MaskTy, Op2, NotMask);
1298   SDValue Val = DAG.getNode(ISD::OR, DL, MaskTy, Op1, Op2);
1299   return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1300 }
1301
1302 SDValue VectorLegalizer::ExpandSEXTINREG(SDNode *Node) {
1303   EVT VT = Node->getValueType(0);
1304
1305   // Make sure that the SRA and SHL instructions are available.
1306   if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand ||
1307       TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand)
1308     return SDValue();
1309
1310   SDLoc DL(Node);
1311   EVT OrigTy = cast<VTSDNode>(Node->getOperand(1))->getVT();
1312
1313   unsigned BW = VT.getScalarSizeInBits();
1314   unsigned OrigBW = OrigTy.getScalarSizeInBits();
1315   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
1316
1317   SDValue Op = DAG.getNode(ISD::SHL, DL, VT, Node->getOperand(0), ShiftSz);
1318   return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz);
1319 }
1320
1321 // Generically expand a vector anyext in register to a shuffle of the relevant
1322 // lanes into the appropriate locations, with other lanes left undef.
1323 SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDNode *Node) {
1324   SDLoc DL(Node);
1325   EVT VT = Node->getValueType(0);
1326   int NumElements = VT.getVectorNumElements();
1327   SDValue Src = Node->getOperand(0);
1328   EVT SrcVT = Src.getValueType();
1329   int NumSrcElements = SrcVT.getVectorNumElements();
1330
1331   // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1332   // into a larger vector type.
1333   if (SrcVT.bitsLE(VT)) {
1334     assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1335            "ANY_EXTEND_VECTOR_INREG vector size mismatch");
1336     NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1337     SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1338                              NumSrcElements);
1339     Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1340                       Src, DAG.getVectorIdxConstant(0, DL));
1341   }
1342
1343   // Build a base mask of undef shuffles.
1344   SmallVector<int, 16> ShuffleMask;
1345   ShuffleMask.resize(NumSrcElements, -1);
1346
1347   // Place the extended lanes into the correct locations.
1348   int ExtLaneScale = NumSrcElements / NumElements;
1349   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1350   for (int i = 0; i < NumElements; ++i)
1351     ShuffleMask[i * ExtLaneScale + EndianOffset] = i;
1352
1353   return DAG.getNode(
1354       ISD::BITCAST, DL, VT,
1355       DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask));
1356 }
1357
1358 SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDNode *Node) {
1359   SDLoc DL(Node);
1360   EVT VT = Node->getValueType(0);
1361   SDValue Src = Node->getOperand(0);
1362   EVT SrcVT = Src.getValueType();
1363
1364   // First build an any-extend node which can be legalized above when we
1365   // recurse through it.
1366   SDValue Op = DAG.getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Src);
1367
1368   // Now we need sign extend. Do this by shifting the elements. Even if these
1369   // aren't legal operations, they have a better chance of being legalized
1370   // without full scalarization than the sign extension does.
1371   unsigned EltWidth = VT.getScalarSizeInBits();
1372   unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
1373   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
1374   return DAG.getNode(ISD::SRA, DL, VT,
1375                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
1376                      ShiftAmount);
1377 }
1378
1379 // Generically expand a vector zext in register to a shuffle of the relevant
1380 // lanes into the appropriate locations, a blend of zero into the high bits,
1381 // and a bitcast to the wider element type.
1382 SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDNode *Node) {
1383   SDLoc DL(Node);
1384   EVT VT = Node->getValueType(0);
1385   int NumElements = VT.getVectorNumElements();
1386   SDValue Src = Node->getOperand(0);
1387   EVT SrcVT = Src.getValueType();
1388   int NumSrcElements = SrcVT.getVectorNumElements();
1389
1390   // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
1391   // into a larger vector type.
1392   if (SrcVT.bitsLE(VT)) {
1393     assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
1394            "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
1395     NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
1396     SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
1397                              NumSrcElements);
1398     Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT),
1399                       Src, DAG.getVectorIdxConstant(0, DL));
1400   }
1401
1402   // Build up a zero vector to blend into this one.
1403   SDValue Zero = DAG.getConstant(0, DL, SrcVT);
1404
1405   // Shuffle the incoming lanes into the correct position, and pull all other
1406   // lanes from the zero vector.
1407   auto ShuffleMask = llvm::to_vector<16>(llvm::seq<int>(0, NumSrcElements));
1408
1409   int ExtLaneScale = NumSrcElements / NumElements;
1410   int EndianOffset = DAG.getDataLayout().isBigEndian() ? ExtLaneScale - 1 : 0;
1411   for (int i = 0; i < NumElements; ++i)
1412     ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i;
1413
1414   return DAG.getNode(ISD::BITCAST, DL, VT,
1415                      DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask));
1416 }
1417
1418 static void createBSWAPShuffleMask(EVT VT, SmallVectorImpl<int> &ShuffleMask) {
1419   int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8;
1420   for (int I = 0, E = VT.getVectorNumElements(); I != E; ++I)
1421     for (int J = ScalarSizeInBytes - 1; J >= 0; --J)
1422       ShuffleMask.push_back((I * ScalarSizeInBytes) + J);
1423 }
1424
1425 SDValue VectorLegalizer::ExpandBSWAP(SDNode *Node) {
1426   EVT VT = Node->getValueType(0);
1427
1428   // Scalable vectors can't use shuffle expansion.
1429   if (VT.isScalableVector())
1430     return TLI.expandBSWAP(Node, DAG);
1431
1432   // Generate a byte wise shuffle mask for the BSWAP.
1433   SmallVector<int, 16> ShuffleMask;
1434   createBSWAPShuffleMask(VT, ShuffleMask);
1435   EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, ShuffleMask.size());
1436
1437   // Only emit a shuffle if the mask is legal.
1438   if (TLI.isShuffleMaskLegal(ShuffleMask, ByteVT)) {
1439     SDLoc DL(Node);
1440     SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1441     Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT), ShuffleMask);
1442     return DAG.getNode(ISD::BITCAST, DL, VT, Op);
1443   }
1444
1445   // If we have the appropriate vector bit operations, it is better to use them
1446   // than unrolling and expanding each component.
1447   if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1448       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1449       TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1450       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1451     return TLI.expandBSWAP(Node, DAG);
1452
1453   // Otherwise let the caller unroll.
1454   return SDValue();
1455 }
1456
1457 SDValue VectorLegalizer::ExpandBITREVERSE(SDNode *Node) {
1458   EVT VT = Node->getValueType(0);
1459
1460   // We can't unroll or use shuffles for scalable vectors.
1461   if (VT.isScalableVector())
1462     return TLI.expandBITREVERSE(Node, DAG);
1463
1464   // If we have the scalar operation, it's probably cheaper to unroll it.
1465   if (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, VT.getScalarType()))
1466     return SDValue();
1467
1468   // If the vector element width is a whole number of bytes, test if its legal
1469   // to BSWAP shuffle the bytes and then perform the BITREVERSE on the byte
1470   // vector. This greatly reduces the number of bit shifts necessary.
1471   unsigned ScalarSizeInBits = VT.getScalarSizeInBits();
1472   if (ScalarSizeInBits > 8 && (ScalarSizeInBits % 8) == 0) {
1473     SmallVector<int, 16> BSWAPMask;
1474     createBSWAPShuffleMask(VT, BSWAPMask);
1475
1476     EVT ByteVT = EVT::getVectorVT(*DAG.getContext(), MVT::i8, BSWAPMask.size());
1477     if (TLI.isShuffleMaskLegal(BSWAPMask, ByteVT) &&
1478         (TLI.isOperationLegalOrCustom(ISD::BITREVERSE, ByteVT) ||
1479          (TLI.isOperationLegalOrCustom(ISD::SHL, ByteVT) &&
1480           TLI.isOperationLegalOrCustom(ISD::SRL, ByteVT) &&
1481           TLI.isOperationLegalOrCustomOrPromote(ISD::AND, ByteVT) &&
1482           TLI.isOperationLegalOrCustomOrPromote(ISD::OR, ByteVT)))) {
1483       SDLoc DL(Node);
1484       SDValue Op = DAG.getNode(ISD::BITCAST, DL, ByteVT, Node->getOperand(0));
1485       Op = DAG.getVectorShuffle(ByteVT, DL, Op, DAG.getUNDEF(ByteVT),
1486                                 BSWAPMask);
1487       Op = DAG.getNode(ISD::BITREVERSE, DL, ByteVT, Op);
1488       Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
1489       return Op;
1490     }
1491   }
1492
1493   // If we have the appropriate vector bit operations, it is better to use them
1494   // than unrolling and expanding each component.
1495   if (TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
1496       TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
1497       TLI.isOperationLegalOrCustomOrPromote(ISD::AND, VT) &&
1498       TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT))
1499     return TLI.expandBITREVERSE(Node, DAG);
1500
1501   // Otherwise unroll.
1502   return SDValue();
1503 }
1504
1505 SDValue VectorLegalizer::ExpandVSELECT(SDNode *Node) {
1506   // Implement VSELECT in terms of XOR, AND, OR
1507   // on platforms which do not support blend natively.
1508   SDLoc DL(Node);
1509
1510   SDValue Mask = Node->getOperand(0);
1511   SDValue Op1 = Node->getOperand(1);
1512   SDValue Op2 = Node->getOperand(2);
1513
1514   EVT VT = Mask.getValueType();
1515
1516   // If we can't even use the basic vector operations of
1517   // AND,OR,XOR, we will have to scalarize the op.
1518   // Notice that the operation may be 'promoted' which means that it is
1519   // 'bitcasted' to another type which is handled.
1520   if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand ||
1521       TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand ||
1522       TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand)
1523     return SDValue();
1524
1525   // This operation also isn't safe with AND, OR, XOR when the boolean type is
1526   // 0/1 and the select operands aren't also booleans, as we need an all-ones
1527   // vector constant to mask with.
1528   // FIXME: Sign extend 1 to all ones if that's legal on the target.
1529   auto BoolContents = TLI.getBooleanContents(Op1.getValueType());
1530   if (BoolContents != TargetLowering::ZeroOrNegativeOneBooleanContent &&
1531       !(BoolContents == TargetLowering::ZeroOrOneBooleanContent &&
1532         Op1.getValueType().getVectorElementType() == MVT::i1))
1533     return SDValue();
1534
1535   // If the mask and the type are different sizes, unroll the vector op. This
1536   // can occur when getSetCCResultType returns something that is different in
1537   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
1538   if (VT.getSizeInBits() != Op1.getValueSizeInBits())
1539     return SDValue();
1540
1541   // Bitcast the operands to be the same type as the mask.
1542   // This is needed when we select between FP types because
1543   // the mask is a vector of integers.
1544   Op1 = DAG.getNode(ISD::BITCAST, DL, VT, Op1);
1545   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
1546
1547   SDValue NotMask = DAG.getNOT(DL, Mask, VT);
1548
1549   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
1550   Op2 = DAG.getNode(ISD::AND, DL, VT, Op2, NotMask);
1551   SDValue Val = DAG.getNode(ISD::OR, DL, VT, Op1, Op2);
1552   return DAG.getNode(ISD::BITCAST, DL, Node->getValueType(0), Val);
1553 }
1554
1555 SDValue VectorLegalizer::ExpandVP_SELECT(SDNode *Node) {
1556   // Implement VP_SELECT in terms of VP_XOR, VP_AND and VP_OR on platforms which
1557   // do not support it natively.
1558   SDLoc DL(Node);
1559
1560   SDValue Mask = Node->getOperand(0);
1561   SDValue Op1 = Node->getOperand(1);
1562   SDValue Op2 = Node->getOperand(2);
1563   SDValue EVL = Node->getOperand(3);
1564
1565   EVT VT = Mask.getValueType();
1566
1567   // If we can't even use the basic vector operations of
1568   // VP_AND,VP_OR,VP_XOR, we will have to scalarize the op.
1569   if (TLI.getOperationAction(ISD::VP_AND, VT) == TargetLowering::Expand ||
1570       TLI.getOperationAction(ISD::VP_XOR, VT) == TargetLowering::Expand ||
1571       TLI.getOperationAction(ISD::VP_OR, VT) == TargetLowering::Expand)
1572     return SDValue();
1573
1574   // This operation also isn't safe when the operands aren't also booleans.
1575   if (Op1.getValueType().getVectorElementType() != MVT::i1)
1576     return SDValue();
1577
1578   SDValue Ones = DAG.getAllOnesConstant(DL, VT);
1579   SDValue NotMask = DAG.getNode(ISD::VP_XOR, DL, VT, Mask, Ones, Ones, EVL);
1580
1581   Op1 = DAG.getNode(ISD::VP_AND, DL, VT, Op1, Mask, Ones, EVL);
1582   Op2 = DAG.getNode(ISD::VP_AND, DL, VT, Op2, NotMask, Ones, EVL);
1583   return DAG.getNode(ISD::VP_OR, DL, VT, Op1, Op2, Ones, EVL);
1584 }
1585
1586 SDValue VectorLegalizer::ExpandVP_MERGE(SDNode *Node) {
1587   // Implement VP_MERGE in terms of VSELECT. Construct a mask where vector
1588   // indices less than the EVL/pivot are true. Combine that with the original
1589   // mask for a full-length mask. Use a full-length VSELECT to select between
1590   // the true and false values.
1591   SDLoc DL(Node);
1592
1593   SDValue Mask = Node->getOperand(0);
1594   SDValue Op1 = Node->getOperand(1);
1595   SDValue Op2 = Node->getOperand(2);
1596   SDValue EVL = Node->getOperand(3);
1597
1598   EVT MaskVT = Mask.getValueType();
1599   bool IsFixedLen = MaskVT.isFixedLengthVector();
1600
1601   EVT EVLVecVT = EVT::getVectorVT(*DAG.getContext(), EVL.getValueType(),
1602                                   MaskVT.getVectorElementCount());
1603
1604   // If we can't construct the EVL mask efficiently, it's better to unroll.
1605   if ((IsFixedLen &&
1606        !TLI.isOperationLegalOrCustom(ISD::BUILD_VECTOR, EVLVecVT)) ||
1607       (!IsFixedLen &&
1608        (!TLI.isOperationLegalOrCustom(ISD::STEP_VECTOR, EVLVecVT) ||
1609         !TLI.isOperationLegalOrCustom(ISD::SPLAT_VECTOR, EVLVecVT))))
1610     return SDValue();
1611
1612   // If using a SETCC would result in a different type than the mask type,
1613   // unroll.
1614   if (TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
1615                              EVLVecVT) != MaskVT)
1616     return SDValue();
1617
1618   SDValue StepVec = DAG.getStepVector(DL, EVLVecVT);
1619   SDValue SplatEVL = DAG.getSplat(EVLVecVT, DL, EVL);
1620   SDValue EVLMask =
1621       DAG.getSetCC(DL, MaskVT, StepVec, SplatEVL, ISD::CondCode::SETULT);
1622
1623   SDValue FullMask = DAG.getNode(ISD::AND, DL, MaskVT, Mask, EVLMask);
1624   return DAG.getSelect(DL, Node->getValueType(0), FullMask, Op1, Op2);
1625 }
1626
1627 SDValue VectorLegalizer::ExpandVP_REM(SDNode *Node) {
1628   // Implement VP_SREM/UREM in terms of VP_SDIV/VP_UDIV, VP_MUL, VP_SUB.
1629   EVT VT = Node->getValueType(0);
1630
1631   unsigned DivOpc = Node->getOpcode() == ISD::VP_SREM ? ISD::VP_SDIV : ISD::VP_UDIV;
1632
1633   if (!TLI.isOperationLegalOrCustom(DivOpc, VT) ||
1634       !TLI.isOperationLegalOrCustom(ISD::VP_MUL, VT) ||
1635       !TLI.isOperationLegalOrCustom(ISD::VP_SUB, VT))
1636     return SDValue();
1637
1638   SDLoc DL(Node);
1639
1640   SDValue Dividend = Node->getOperand(0);
1641   SDValue Divisor = Node->getOperand(1);
1642   SDValue Mask = Node->getOperand(2);
1643   SDValue EVL = Node->getOperand(3);
1644
1645   // X % Y -> X-X/Y*Y
1646   SDValue Div = DAG.getNode(DivOpc, DL, VT, Dividend, Divisor, Mask, EVL);
1647   SDValue Mul = DAG.getNode(ISD::VP_MUL, DL, VT, Divisor, Div, Mask, EVL);
1648   return DAG.getNode(ISD::VP_SUB, DL, VT, Dividend, Mul, Mask, EVL);
1649 }
1650
1651 SDValue VectorLegalizer::ExpandVP_FNEG(SDNode *Node) {
1652   EVT VT = Node->getValueType(0);
1653   EVT IntVT = VT.changeVectorElementTypeToInteger();
1654
1655   if (!TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1656     return SDValue();
1657
1658   SDValue Mask = Node->getOperand(1);
1659   SDValue EVL = Node->getOperand(2);
1660
1661   SDLoc DL(Node);
1662   SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1663   SDValue SignMask = DAG.getConstant(
1664       APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1665   SDValue Xor = DAG.getNode(ISD::VP_XOR, DL, IntVT, Cast, SignMask, Mask, EVL);
1666   return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
1667 }
1668
1669 SDValue VectorLegalizer::ExpandVP_FABS(SDNode *Node) {
1670   EVT VT = Node->getValueType(0);
1671   EVT IntVT = VT.changeVectorElementTypeToInteger();
1672
1673   if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT))
1674     return SDValue();
1675
1676   SDValue Mask = Node->getOperand(1);
1677   SDValue EVL = Node->getOperand(2);
1678
1679   SDLoc DL(Node);
1680   SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1681   SDValue ClearSignMask = DAG.getConstant(
1682       APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
1683   SDValue ClearSign =
1684       DAG.getNode(ISD::VP_AND, DL, IntVT, Cast, ClearSignMask, Mask, EVL);
1685   return DAG.getNode(ISD::BITCAST, DL, VT, ClearSign);
1686 }
1687
1688 SDValue VectorLegalizer::ExpandVP_FCOPYSIGN(SDNode *Node) {
1689   EVT VT = Node->getValueType(0);
1690
1691   if (VT != Node->getOperand(1).getValueType())
1692     return SDValue();
1693
1694   EVT IntVT = VT.changeVectorElementTypeToInteger();
1695   if (!TLI.isOperationLegalOrCustom(ISD::VP_AND, IntVT) ||
1696       !TLI.isOperationLegalOrCustom(ISD::VP_XOR, IntVT))
1697     return SDValue();
1698
1699   SDValue Mask = Node->getOperand(2);
1700   SDValue EVL = Node->getOperand(3);
1701
1702   SDLoc DL(Node);
1703   SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1704   SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
1705
1706   SDValue SignMask = DAG.getConstant(
1707       APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1708   SDValue SignBit =
1709       DAG.getNode(ISD::VP_AND, DL, IntVT, Sign, SignMask, Mask, EVL);
1710
1711   SDValue ClearSignMask = DAG.getConstant(
1712       APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
1713   SDValue ClearedSign =
1714       DAG.getNode(ISD::VP_AND, DL, IntVT, Mag, ClearSignMask, Mask, EVL);
1715
1716   SDValue CopiedSign = DAG.getNode(ISD::VP_OR, DL, IntVT, ClearedSign, SignBit,
1717                                    Mask, EVL, SDNodeFlags::Disjoint);
1718
1719   return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
1720 }
1721
1722 void VectorLegalizer::ExpandFP_TO_UINT(SDNode *Node,
1723                                        SmallVectorImpl<SDValue> &Results) {
1724   // Attempt to expand using TargetLowering.
1725   SDValue Result, Chain;
1726   if (TLI.expandFP_TO_UINT(Node, Result, Chain, DAG)) {
1727     Results.push_back(Result);
1728     if (Node->isStrictFPOpcode())
1729       Results.push_back(Chain);
1730     return;
1731   }
1732
1733   // Otherwise go ahead and unroll.
1734   if (Node->isStrictFPOpcode()) {
1735     UnrollStrictFPOp(Node, Results);
1736     return;
1737   }
1738
1739   Results.push_back(DAG.UnrollVectorOp(Node));
1740 }
1741
1742 void VectorLegalizer::ExpandUINT_TO_FLOAT(SDNode *Node,
1743                                           SmallVectorImpl<SDValue> &Results) {
1744   bool IsStrict = Node->isStrictFPOpcode();
1745   unsigned OpNo = IsStrict ? 1 : 0;
1746   SDValue Src = Node->getOperand(OpNo);
1747   EVT SrcVT = Src.getValueType();
1748   EVT DstVT = Node->getValueType(0);
1749   SDLoc DL(Node);
1750
1751   // Attempt to expand using TargetLowering.
1752   SDValue Result;
1753   SDValue Chain;
1754   if (TLI.expandUINT_TO_FP(Node, Result, Chain, DAG)) {
1755     Results.push_back(Result);
1756     if (IsStrict)
1757       Results.push_back(Chain);
1758     return;
1759   }
1760
1761   // Make sure that the SINT_TO_FP and SRL instructions are available.
1762   if (((!IsStrict && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) ==
1763                          TargetLowering::Expand) ||
1764        (IsStrict && TLI.getOperationAction(ISD::STRICT_SINT_TO_FP, SrcVT) ==
1765                         TargetLowering::Expand)) ||
1766       TLI.getOperationAction(ISD::SRL, SrcVT) == TargetLowering::Expand) {
1767     if (IsStrict) {
1768       UnrollStrictFPOp(Node, Results);
1769       return;
1770     }
1771
1772     Results.push_back(DAG.UnrollVectorOp(Node));
1773     return;
1774   }
1775
1776   unsigned BW = SrcVT.getScalarSizeInBits();
1777   assert((BW == 64 || BW == 32) &&
1778          "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
1779
1780   SDValue HalfWord = DAG.getConstant(BW / 2, DL, SrcVT);
1781
1782   // Constants to clear the upper part of the word.
1783   // Notice that we can also use SHL+SHR, but using a constant is slightly
1784   // faster on x86.
1785   uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
1786   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, SrcVT);
1787
1788   // Two to the power of half-word-size.
1789   SDValue TWOHW = DAG.getConstantFP(1ULL << (BW / 2), DL, DstVT);
1790
1791   // Clear upper part of LO, lower HI
1792   SDValue HI = DAG.getNode(ISD::SRL, DL, SrcVT, Src, HalfWord);
1793   SDValue LO = DAG.getNode(ISD::AND, DL, SrcVT, Src, HalfWordMask);
1794
1795   if (IsStrict) {
1796     // Convert hi and lo to floats
1797     // Convert the hi part back to the upper values
1798     // TODO: Can any fast-math-flags be set on these nodes?
1799     SDValue fHI = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
1800                               {Node->getOperand(0), HI});
1801     fHI = DAG.getNode(ISD::STRICT_FMUL, DL, {DstVT, MVT::Other},
1802                       {fHI.getValue(1), fHI, TWOHW});
1803     SDValue fLO = DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {DstVT, MVT::Other},
1804                               {Node->getOperand(0), LO});
1805
1806     SDValue TF = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, fHI.getValue(1),
1807                              fLO.getValue(1));
1808
1809     // Add the two halves
1810     SDValue Result =
1811         DAG.getNode(ISD::STRICT_FADD, DL, {DstVT, MVT::Other}, {TF, fHI, fLO});
1812
1813     Results.push_back(Result);
1814     Results.push_back(Result.getValue(1));
1815     return;
1816   }
1817
1818   // Convert hi and lo to floats
1819   // Convert the hi part back to the upper values
1820   // TODO: Can any fast-math-flags be set on these nodes?
1821   SDValue fHI = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, HI);
1822   fHI = DAG.getNode(ISD::FMUL, DL, DstVT, fHI, TWOHW);
1823   SDValue fLO = DAG.getNode(ISD::SINT_TO_FP, DL, DstVT, LO);
1824
1825   // Add the two halves
1826   Results.push_back(DAG.getNode(ISD::FADD, DL, DstVT, fHI, fLO));
1827 }
1828
1829 SDValue VectorLegalizer::ExpandFNEG(SDNode *Node) {
1830   EVT VT = Node->getValueType(0);
1831   EVT IntVT = VT.changeVectorElementTypeToInteger();
1832
1833   if (!TLI.isOperationLegalOrCustom(ISD::XOR, IntVT))
1834     return SDValue();
1835
1836   // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1837   if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1838       !VT.isScalableVector())
1839     return SDValue();
1840
1841   SDLoc DL(Node);
1842   SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1843   SDValue SignMask = DAG.getConstant(
1844       APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1845   SDValue Xor = DAG.getNode(ISD::XOR, DL, IntVT, Cast, SignMask);
1846   return DAG.getNode(ISD::BITCAST, DL, VT, Xor);
1847 }
1848
1849 SDValue VectorLegalizer::ExpandFABS(SDNode *Node) {
1850   EVT VT = Node->getValueType(0);
1851   EVT IntVT = VT.changeVectorElementTypeToInteger();
1852
1853   if (!TLI.isOperationLegalOrCustom(ISD::AND, IntVT))
1854     return SDValue();
1855
1856   // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1857   if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1858       !VT.isScalableVector())
1859     return SDValue();
1860
1861   SDLoc DL(Node);
1862   SDValue Cast = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1863   SDValue ClearSignMask = DAG.getConstant(
1864       APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
1865   SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Cast, ClearSignMask);
1866   return DAG.getNode(ISD::BITCAST, DL, VT, ClearedSign);
1867 }
1868
1869 SDValue VectorLegalizer::ExpandFCOPYSIGN(SDNode *Node) {
1870   EVT VT = Node->getValueType(0);
1871   EVT IntVT = VT.changeVectorElementTypeToInteger();
1872
1873   if (VT != Node->getOperand(1).getValueType() ||
1874       !TLI.isOperationLegalOrCustom(ISD::AND, IntVT) ||
1875       !TLI.isOperationLegalOrCustom(ISD::OR, IntVT))
1876     return SDValue();
1877
1878   // FIXME: The FSUB check is here to force unrolling v1f64 vectors on AArch64.
1879   if (!TLI.isOperationLegalOrCustomOrPromote(ISD::FSUB, VT) &&
1880       !VT.isScalableVector())
1881     return SDValue();
1882
1883   SDLoc DL(Node);
1884   SDValue Mag = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(0));
1885   SDValue Sign = DAG.getNode(ISD::BITCAST, DL, IntVT, Node->getOperand(1));
1886
1887   SDValue SignMask = DAG.getConstant(
1888       APInt::getSignMask(IntVT.getScalarSizeInBits()), DL, IntVT);
1889   SDValue SignBit = DAG.getNode(ISD::AND, DL, IntVT, Sign, SignMask);
1890
1891   SDValue ClearSignMask = DAG.getConstant(
1892       APInt::getSignedMaxValue(IntVT.getScalarSizeInBits()), DL, IntVT);
1893   SDValue ClearedSign = DAG.getNode(ISD::AND, DL, IntVT, Mag, ClearSignMask);
1894
1895   SDValue CopiedSign = DAG.getNode(ISD::OR, DL, IntVT, ClearedSign, SignBit,
1896                                    SDNodeFlags::Disjoint);
1897
1898   return DAG.getNode(ISD::BITCAST, DL, VT, CopiedSign);
1899 }
1900
1901 void VectorLegalizer::ExpandFSUB(SDNode *Node,
1902                                  SmallVectorImpl<SDValue> &Results) {
1903   // For floating-point values, (a-b) is the same as a+(-b). If FNEG is legal,
1904   // we can defer this to operation legalization where it will be lowered as
1905   // a+(-b).
1906   EVT VT = Node->getValueType(0);
1907   if (TLI.isOperationLegalOrCustom(ISD::FNEG, VT) &&
1908       TLI.isOperationLegalOrCustom(ISD::FADD, VT))
1909     return; // Defer to LegalizeDAG
1910
1911   if (SDValue Expanded = TLI.expandVectorNaryOpBySplitting(Node, DAG)) {
1912     Results.push_back(Expanded);
1913     return;
1914   }
1915
1916   SDValue Tmp = DAG.UnrollVectorOp(Node);
1917   Results.push_back(Tmp);
1918 }
1919
1920 void VectorLegalizer::ExpandSETCC(SDNode *Node,
1921                                   SmallVectorImpl<SDValue> &Results) {
1922   bool NeedInvert = false;
1923   bool IsVP = Node->getOpcode() == ISD::VP_SETCC;
1924   bool IsStrict = Node->getOpcode() == ISD::STRICT_FSETCC ||
1925                   Node->getOpcode() == ISD::STRICT_FSETCCS;
1926   bool IsSignaling = Node->getOpcode() == ISD::STRICT_FSETCCS;
1927   unsigned Offset = IsStrict ? 1 : 0;
1928
1929   SDValue Chain = IsStrict ? Node->getOperand(0) : SDValue();
1930   SDValue LHS = Node->getOperand(0 + Offset);
1931   SDValue RHS = Node->getOperand(1 + Offset);
1932   SDValue CC = Node->getOperand(2 + Offset);
1933
1934   MVT OpVT = LHS.getSimpleValueType();
1935   ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
1936
1937   if (TLI.getCondCodeAction(CCCode, OpVT) != TargetLowering::Expand) {
1938     if (IsStrict) {
1939       UnrollStrictFPOp(Node, Results);
1940       return;
1941     }
1942     Results.push_back(UnrollVSETCC(Node));
1943     return;
1944   }
1945
1946   SDValue Mask, EVL;
1947   if (IsVP) {
1948     Mask = Node->getOperand(3 + Offset);
1949     EVL = Node->getOperand(4 + Offset);
1950   }
1951
1952   SDLoc dl(Node);
1953   bool Legalized =
1954       TLI.LegalizeSetCCCondCode(DAG, Node->getValueType(0), LHS, RHS, CC, Mask,
1955                                 EVL, NeedInvert, dl, Chain, IsSignaling);
1956
1957   if (Legalized) {
1958     // If we expanded the SETCC by swapping LHS and RHS, or by inverting the
1959     // condition code, create a new SETCC node.
1960     if (CC.getNode()) {
1961       if (IsStrict) {
1962         LHS = DAG.getNode(Node->getOpcode(), dl, Node->getVTList(),
1963                           {Chain, LHS, RHS, CC}, Node->getFlags());
1964         Chain = LHS.getValue(1);
1965       } else if (IsVP) {
1966         LHS = DAG.getNode(ISD::VP_SETCC, dl, Node->getValueType(0),
1967                           {LHS, RHS, CC, Mask, EVL}, Node->getFlags());
1968       } else {
1969         LHS = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), LHS, RHS, CC,
1970                           Node->getFlags());
1971       }
1972     }
1973
1974     // If we expanded the SETCC by inverting the condition code, then wrap
1975     // the existing SETCC in a NOT to restore the intended condition.
1976     if (NeedInvert) {
1977       if (!IsVP)
1978         LHS = DAG.getLogicalNOT(dl, LHS, LHS->getValueType(0));
1979       else
1980         LHS = DAG.getVPLogicalNOT(dl, LHS, Mask, EVL, LHS->getValueType(0));
1981     }
1982   } else {
1983     assert(!IsStrict && "Don't know how to expand for strict nodes.");
1984
1985     // Otherwise, SETCC for the given comparison type must be completely
1986     // illegal; expand it into a SELECT_CC.
1987     EVT VT = Node->getValueType(0);
1988     LHS =
1989         DAG.getNode(ISD::SELECT_CC, dl, VT, LHS, RHS,
1990                     DAG.getBoolConstant(true, dl, VT, LHS.getValueType()),
1991                     DAG.getBoolConstant(false, dl, VT, LHS.getValueType()), CC);
1992     LHS->setFlags(Node->getFlags());
1993   }
1994
1995   Results.push_back(LHS);
1996   if (IsStrict)
1997     Results.push_back(Chain);
1998 }
1999
2000 void VectorLegalizer::ExpandUADDSUBO(SDNode *Node,
2001                                      SmallVectorImpl<SDValue> &Results) {
2002   SDValue Result, Overflow;
2003   TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
2004   Results.push_back(Result);
2005   Results.push_back(Overflow);
2006 }
2007
2008 void VectorLegalizer::ExpandSADDSUBO(SDNode *Node,
2009                                      SmallVectorImpl<SDValue> &Results) {
2010   SDValue Result, Overflow;
2011   TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
2012   Results.push_back(Result);
2013   Results.push_back(Overflow);
2014 }
2015
2016 void VectorLegalizer::ExpandMULO(SDNode *Node,
2017                                  SmallVectorImpl<SDValue> &Results) {
2018   SDValue Result, Overflow;
2019   if (!TLI.expandMULO(Node, Result, Overflow, DAG))
2020     std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Node);
2021
2022   Results.push_back(Result);
2023   Results.push_back(Overflow);
2024 }
2025
2026 void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
2027                                           SmallVectorImpl<SDValue> &Results) {
2028   SDNode *N = Node;
2029   if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
2030           N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
2031     Results.push_back(Expanded);
2032 }
2033
2034 void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,
2035                                        SmallVectorImpl<SDValue> &Results) {
2036   if (Node->getOpcode() == ISD::STRICT_UINT_TO_FP) {
2037     ExpandUINT_TO_FLOAT(Node, Results);
2038     return;
2039   }
2040   if (Node->getOpcode() == ISD::STRICT_FP_TO_UINT) {
2041     ExpandFP_TO_UINT(Node, Results);
2042     return;
2043   }
2044
2045   if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2046       Node->getOpcode() == ISD::STRICT_FSETCCS) {
2047     ExpandSETCC(Node, Results);
2048     return;
2049   }
2050
2051   UnrollStrictFPOp(Node, Results);
2052 }
2053
2054 void VectorLegalizer::ExpandREM(SDNode *Node,
2055                                 SmallVectorImpl<SDValue> &Results) {
2056   assert((Node->getOpcode() == ISD::SREM || Node->getOpcode() == ISD::UREM) &&
2057          "Expected REM node");
2058
2059   SDValue Result;
2060   if (!TLI.expandREM(Node, Result, DAG))
2061     Result = DAG.UnrollVectorOp(Node);
2062   Results.push_back(Result);
2063 }
2064
2065 // Try to expand libm nodes into vector math routine calls. Callers provide the
2066 // LibFunc equivalent of the passed in Node, which is used to lookup mappings
2067 // within TargetLibraryInfo. The only mappings considered are those where the
2068 // result and all operands are the same vector type. While predicated nodes are
2069 // not supported, we will emit calls to masked routines by passing in an all
2070 // true mask.
2071 bool VectorLegalizer::tryExpandVecMathCall(SDNode *Node, RTLIB::Libcall LC,
2072                                            SmallVectorImpl<SDValue> &Results) {
2073   // Chain must be propagated but currently strict fp operations are down
2074   // converted to their none strict counterpart.
2075   assert(!Node->isStrictFPOpcode() && "Unexpected strict fp operation!");
2076
2077   const char *LCName = TLI.getLibcallName(LC);
2078   if (!LCName)
2079     return false;
2080   LLVM_DEBUG(dbgs() << "Looking for vector variant of " << LCName << "\n");
2081
2082   EVT VT = Node->getValueType(0);
2083   ElementCount VL = VT.getVectorElementCount();
2084
2085   // Lookup a vector function equivalent to the specified libcall. Prefer
2086   // unmasked variants but we will generate a mask if need be.
2087   const TargetLibraryInfo &TLibInfo = DAG.getLibInfo();
2088   const VecDesc *VD = TLibInfo.getVectorMappingInfo(LCName, VL, false);
2089   if (!VD)
2090     VD = TLibInfo.getVectorMappingInfo(LCName, VL, /*Masked=*/true);
2091   if (!VD)
2092     return false;
2093
2094   LLVMContext *Ctx = DAG.getContext();
2095   Type *Ty = VT.getTypeForEVT(*Ctx);
2096   Type *ScalarTy = Ty->getScalarType();
2097
2098   // Construct a scalar function type based on Node's operands.
2099   SmallVector<Type *, 8> ArgTys;
2100   for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
2101     assert(Node->getOperand(i).getValueType() == VT &&
2102            "Expected matching vector types!");
2103     ArgTys.push_back(ScalarTy);
2104   }
2105   FunctionType *ScalarFTy = FunctionType::get(ScalarTy, ArgTys, false);
2106
2107   // Generate call information for the vector function.
2108   const std::string MangledName = VD->getVectorFunctionABIVariantString();
2109   auto OptVFInfo = VFABI::tryDemangleForVFABI(MangledName, ScalarFTy);
2110   if (!OptVFInfo)
2111     return false;
2112
2113   LLVM_DEBUG(dbgs() << "Found vector variant " << VD->getVectorFnName()
2114                     << "\n");
2115
2116   // Sanity check just in case OptVFInfo has unexpected parameters.
2117   if (OptVFInfo->Shape.Parameters.size() !=
2118       Node->getNumOperands() + VD->isMasked())
2119     return false;
2120
2121   // Collect vector call operands.
2122
2123   SDLoc DL(Node);
2124   TargetLowering::ArgListTy Args;
2125   TargetLowering::ArgListEntry Entry;
2126   Entry.IsSExt = false;
2127   Entry.IsZExt = false;
2128
2129   unsigned OpNum = 0;
2130   for (auto &VFParam : OptVFInfo->Shape.Parameters) {
2131     if (VFParam.ParamKind == VFParamKind::GlobalPredicate) {
2132       EVT MaskVT = TLI.getSetCCResultType(DAG.getDataLayout(), *Ctx, VT);
2133       Entry.Node = DAG.getBoolConstant(true, DL, MaskVT, VT);
2134       Entry.Ty = MaskVT.getTypeForEVT(*Ctx);
2135       Args.push_back(Entry);
2136       continue;
2137     }
2138
2139     // Only vector operands are supported.
2140     if (VFParam.ParamKind != VFParamKind::Vector)
2141       return false;
2142
2143     Entry.Node = Node->getOperand(OpNum++);
2144     Entry.Ty = Ty;
2145     Args.push_back(Entry);
2146   }
2147
2148   // Emit a call to the vector function.
2149   SDValue Callee = DAG.getExternalSymbol(VD->getVectorFnName().data(),
2150                                          TLI.getPointerTy(DAG.getDataLayout()));
2151   TargetLowering::CallLoweringInfo CLI(DAG);
2152   CLI.setDebugLoc(DL)
2153       .setChain(DAG.getEntryNode())
2154       .setLibCallee(CallingConv::C, Ty, Callee, std::move(Args));
2155
2156   std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
2157   Results.push_back(CallResult.first);
2158   return true;
2159 }
2160
2161 /// Try to expand the node to a vector libcall based on the result type.
2162 bool VectorLegalizer::tryExpandVecMathCall(
2163     SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
2164     RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
2165     RTLIB::Libcall Call_PPCF128, SmallVectorImpl<SDValue> &Results) {
2166   RTLIB::Libcall LC = RTLIB::getFPLibCall(
2167       Node->getValueType(0).getVectorElementType(), Call_F32, Call_F64,
2168       Call_F80, Call_F128, Call_PPCF128);
2169
2170   if (LC == RTLIB::UNKNOWN_LIBCALL)
2171     return false;
2172
2173   return tryExpandVecMathCall(Node, LC, Results);
2174 }
2175
2176 void VectorLegalizer::UnrollStrictFPOp(SDNode *Node,
2177                                        SmallVectorImpl<SDValue> &Results) {
2178   EVT VT = Node->getValueType(0);
2179   EVT EltVT = VT.getVectorElementType();
2180   unsigned NumElems = VT.getVectorNumElements();
2181   unsigned NumOpers = Node->getNumOperands();
2182   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2183
2184   EVT TmpEltVT = EltVT;
2185   if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2186       Node->getOpcode() == ISD::STRICT_FSETCCS)
2187     TmpEltVT = TLI.getSetCCResultType(DAG.getDataLayout(),
2188                                       *DAG.getContext(), TmpEltVT);
2189
2190   EVT ValueVTs[] = {TmpEltVT, MVT::Other};
2191   SDValue Chain = Node->getOperand(0);
2192   SDLoc dl(Node);
2193
2194   SmallVector<SDValue, 32> OpValues;
2195   SmallVector<SDValue, 32> OpChains;
2196   for (unsigned i = 0; i < NumElems; ++i) {
2197     SmallVector<SDValue, 4> Opers;
2198     SDValue Idx = DAG.getVectorIdxConstant(i, dl);
2199
2200     // The Chain is the first operand.
2201     Opers.push_back(Chain);
2202
2203     // Now process the remaining operands.
2204     for (unsigned j = 1; j < NumOpers; ++j) {
2205       SDValue Oper = Node->getOperand(j);
2206       EVT OperVT = Oper.getValueType();
2207
2208       if (OperVT.isVector())
2209         Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
2210                            OperVT.getVectorElementType(), Oper, Idx);
2211
2212       Opers.push_back(Oper);
2213     }
2214
2215     SDValue ScalarOp = DAG.getNode(Node->getOpcode(), dl, ValueVTs, Opers);
2216     SDValue ScalarResult = ScalarOp.getValue(0);
2217     SDValue ScalarChain = ScalarOp.getValue(1);
2218
2219     if (Node->getOpcode() == ISD::STRICT_FSETCC ||
2220         Node->getOpcode() == ISD::STRICT_FSETCCS)
2221       ScalarResult = DAG.getSelect(dl, EltVT, ScalarResult,
2222                                    DAG.getAllOnesConstant(dl, EltVT),
2223                                    DAG.getConstant(0, dl, EltVT));
2224
2225     OpValues.push_back(ScalarResult);
2226     OpChains.push_back(ScalarChain);
2227   }
2228
2229   SDValue Result = DAG.getBuildVector(VT, dl, OpValues);
2230   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
2231
2232   Results.push_back(Result);
2233   Results.push_back(NewChain);
2234 }
2235
2236 SDValue VectorLegalizer::UnrollVSETCC(SDNode *Node) {
2237   EVT VT = Node->getValueType(0);
2238   unsigned NumElems = VT.getVectorNumElements();
2239   EVT EltVT = VT.getVectorElementType();
2240   SDValue LHS = Node->getOperand(0);
2241   SDValue RHS = Node->getOperand(1);
2242   SDValue CC = Node->getOperand(2);
2243   EVT TmpEltVT = LHS.getValueType().getVectorElementType();
2244   SDLoc dl(Node);
2245   SmallVector<SDValue, 8> Ops(NumElems);
2246   for (unsigned i = 0; i < NumElems; ++i) {
2247     SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS,
2248                                   DAG.getVectorIdxConstant(i, dl));
2249     SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS,
2250                                   DAG.getVectorIdxConstant(i, dl));
2251     // FIXME: We should use i1 setcc + boolext here, but it causes regressions.
2252     Ops[i] = DAG.getNode(ISD::SETCC, dl,
2253                          TLI.getSetCCResultType(DAG.getDataLayout(),
2254                                                 *DAG.getContext(), TmpEltVT),
2255                          LHSElem, RHSElem, CC);
2256     Ops[i] = DAG.getSelect(dl, EltVT, Ops[i],
2257                            DAG.getBoolConstant(true, dl, EltVT, VT),
2258                            DAG.getConstant(0, dl, EltVT));
2259   }
2260   return DAG.getBuildVector(VT, dl, Ops);
2261 }
2262
2263 bool SelectionDAG::LegalizeVectors() {
2264   return VectorLegalizer(*this).Run();
2265 }