llvm/lib/Target/VE/VEISelLowering.cpp

   1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the interfaces that VE uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "VEISelLowering.h"
  15 #include "MCTargetDesc/VEMCExpr.h"
  16 #include "VECustomDAG.h"
  17 #include "VEInstrBuilder.h"
  18 #include "VEMachineFunctionInfo.h"
  19 #include "VERegisterInfo.h"
  20 #include "VETargetMachine.h"
  21 #include "llvm/ADT/StringSwitch.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  27 #include "llvm/CodeGen/MachineModuleInfo.h"
  28 #include "llvm/CodeGen/MachineRegisterInfo.h"
  29 #include "llvm/CodeGen/SelectionDAG.h"
  30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  31 #include "llvm/IR/DerivedTypes.h"
  32 #include "llvm/IR/Function.h"
  33 #include "llvm/IR/IRBuilder.h"
  34 #include "llvm/IR/Module.h"
  35 #include "llvm/Support/ErrorHandling.h"
  36 using namespace llvm;
  37
  38 #define DEBUG_TYPE "ve-lower"
  39
  40 //===----------------------------------------------------------------------===//
  41 // Calling Convention Implementation
  42 //===----------------------------------------------------------------------===//
  43
  44 #include "VEGenCallingConv.inc"
  45
  46 CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
  47   switch (CallConv) {
  48   default:
  49     return RetCC_VE_C;
  50   case CallingConv::Fast:
  51     return RetCC_VE_Fast;
  52   }
  53 }
  54
  55 CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
  56   if (IsVarArg)
  57     return CC_VE2;
  58   switch (CallConv) {
  59   default:
  60     return CC_VE_C;
  61   case CallingConv::Fast:
  62     return CC_VE_Fast;
  63   }
  64 }
  65
  66 bool VETargetLowering::CanLowerReturn(
  67     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
  68     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
  69   CCAssignFn *RetCC = getReturnCC(CallConv);
  70   SmallVector<CCValAssign, 16> RVLocs;
  71   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
  72   return CCInfo.CheckReturn(Outs, RetCC);
  73 }
  74
  75 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
  76                                    MVT::v256f32, MVT::v512f32, MVT::v256f64};
  77
  78 static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
  79
  80 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
  81
  82 void VETargetLowering::initRegisterClasses() {
  83   // Set up the register classes.
  84   addRegisterClass(MVT::i32, &VE::I32RegClass);
  85   addRegisterClass(MVT::i64, &VE::I64RegClass);
  86   addRegisterClass(MVT::f32, &VE::F32RegClass);
  87   addRegisterClass(MVT::f64, &VE::I64RegClass);
  88   addRegisterClass(MVT::f128, &VE::F128RegClass);
  89
  90   if (Subtarget->enableVPU()) {
  91     for (MVT VecVT : AllVectorVTs)
  92       addRegisterClass(VecVT, &VE::V64RegClass);
  93     addRegisterClass(MVT::v256i1, &VE::VMRegClass);
  94     addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
  95   }
  96 }
  97
  98 void VETargetLowering::initSPUActions() {
  99   const auto &TM = getTargetMachine();
 100   /// Load & Store {
 101
 102   // VE doesn't have i1 sign extending load.
 103   for (MVT VT : MVT::integer_valuetypes()) {
 104     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 105     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
 106     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
 107     setTruncStoreAction(VT, MVT::i1, Expand);
 108   }
 109
 110   // VE doesn't have floating point extload/truncstore, so expand them.
 111   for (MVT FPVT : MVT::fp_valuetypes()) {
 112     for (MVT OtherFPVT : MVT::fp_valuetypes()) {
 113       setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
 114       setTruncStoreAction(FPVT, OtherFPVT, Expand);
 115     }
 116   }
 117
 118   // VE doesn't have fp128 load/store, so expand them in custom lower.
 119   setOperationAction(ISD::LOAD, MVT::f128, Custom);
 120   setOperationAction(ISD::STORE, MVT::f128, Custom);
 121
 122   /// } Load & Store
 123
 124   // Custom legalize address nodes into LO/HI parts.
 125   MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
 126   setOperationAction(ISD::BlockAddress, PtrVT, Custom);
 127   setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
 128   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
 129   setOperationAction(ISD::ConstantPool, PtrVT, Custom);
 130   setOperationAction(ISD::JumpTable, PtrVT, Custom);
 131
 132   /// VAARG handling {
 133   setOperationAction(ISD::VASTART, MVT::Other, Custom);
 134   // VAARG needs to be lowered to access with 8 bytes alignment.
 135   setOperationAction(ISD::VAARG, MVT::Other, Custom);
 136   // Use the default implementation.
 137   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
 138   setOperationAction(ISD::VAEND, MVT::Other, Expand);
 139   /// } VAARG handling
 140
 141   /// Stack {
 142   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
 143   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
 144
 145   // Use the default implementation.
 146   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
 147   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
 148   /// } Stack
 149
 150   /// Branch {
 151
 152   // VE doesn't have BRCOND
 153   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 154
 155   // BR_JT is not implemented yet.
 156   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 157
 158   /// } Branch
 159
 160   /// Int Ops {
 161   for (MVT IntVT : {MVT::i32, MVT::i64}) {
 162     // VE has no REM or DIVREM operations.
 163     setOperationAction(ISD::UREM, IntVT, Expand);
 164     setOperationAction(ISD::SREM, IntVT, Expand);
 165     setOperationAction(ISD::SDIVREM, IntVT, Expand);
 166     setOperationAction(ISD::UDIVREM, IntVT, Expand);
 167
 168     // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
 169     setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
 170     setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
 171     setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
 172
 173     // VE has no MULHU/S or U/SMUL_LOHI operations.
 174     // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
 175     setOperationAction(ISD::MULHU, IntVT, Expand);
 176     setOperationAction(ISD::MULHS, IntVT, Expand);
 177     setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
 178     setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
 179
 180     // VE has no CTTZ, ROTL, ROTR operations.
 181     setOperationAction(ISD::CTTZ, IntVT, Expand);
 182     setOperationAction(ISD::ROTL, IntVT, Expand);
 183     setOperationAction(ISD::ROTR, IntVT, Expand);
 184
 185     // VE has 64 bits instruction which works as i64 BSWAP operation.  This
 186     // instruction works fine as i32 BSWAP operation with an additional
 187     // parameter.  Use isel patterns to lower BSWAP.
 188     setOperationAction(ISD::BSWAP, IntVT, Legal);
 189
 190     // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
 191     // operations.  Use isel patterns for i64, promote for i32.
 192     LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
 193     setOperationAction(ISD::BITREVERSE, IntVT, Act);
 194     setOperationAction(ISD::CTLZ, IntVT, Act);
 195     setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
 196     setOperationAction(ISD::CTPOP, IntVT, Act);
 197
 198     // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
 199     // Use isel patterns for i64, promote for i32.
 200     setOperationAction(ISD::AND, IntVT, Act);
 201     setOperationAction(ISD::OR, IntVT, Act);
 202     setOperationAction(ISD::XOR, IntVT, Act);
 203
 204     // Legal smax and smin
 205     setOperationAction(ISD::SMAX, IntVT, Legal);
 206     setOperationAction(ISD::SMIN, IntVT, Legal);
 207   }
 208   /// } Int Ops
 209
 210   /// Conversion {
 211   // VE doesn't have instructions for fp<->uint, so expand them by llvm
 212   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
 213   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
 214   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 215   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 216
 217   // fp16 not supported
 218   for (MVT FPVT : MVT::fp_valuetypes()) {
 219     setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
 220     setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
 221   }
 222   /// } Conversion
 223
 224   /// Floating-point Ops {
 225   /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
 226   ///       and fcmp.
 227
 228   // VE doesn't have following floating point operations.
 229   for (MVT VT : MVT::fp_valuetypes()) {
 230     setOperationAction(ISD::FNEG, VT, Expand);
 231     setOperationAction(ISD::FREM, VT, Expand);
 232   }
 233
 234   // VE doesn't have fdiv of f128.
 235   setOperationAction(ISD::FDIV, MVT::f128, Expand);
 236
 237   for (MVT FPVT : {MVT::f32, MVT::f64}) {
 238     // f32 and f64 uses ConstantFP.  f128 uses ConstantPool.
 239     setOperationAction(ISD::ConstantFP, FPVT, Legal);
 240   }
 241   /// } Floating-point Ops
 242
 243   /// Floating-point math functions {
 244
 245   // VE doesn't have following floating point math functions.
 246   for (MVT VT : MVT::fp_valuetypes()) {
 247     setOperationAction(ISD::FABS, VT, Expand);
 248     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
 249     setOperationAction(ISD::FCOS, VT, Expand);
 250     setOperationAction(ISD::FMA, VT, Expand);
 251     setOperationAction(ISD::FPOW, VT, Expand);
 252     setOperationAction(ISD::FSIN, VT, Expand);
 253     setOperationAction(ISD::FSQRT, VT, Expand);
 254   }
 255
 256   // VE has single and double FMINNUM and FMAXNUM
 257   for (MVT VT : {MVT::f32, MVT::f64}) {
 258     setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, VT, Legal);
 259   }
 260
 261   /// } Floating-point math functions
 262
 263   /// Atomic instructions {
 264
 265   setMaxAtomicSizeInBitsSupported(64);
 266   setMinCmpXchgSizeInBits(32);
 267   setSupportsUnalignedAtomics(false);
 268
 269   // Use custom inserter for ATOMIC_FENCE.
 270   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 271
 272   // Other atomic instructions.
 273   for (MVT VT : MVT::integer_valuetypes()) {
 274     // Support i8/i16 atomic swap.
 275     setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
 276
 277     // FIXME: Support "atmam" instructions.
 278     setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
 279     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
 280     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
 281     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
 282
 283     // VE doesn't have follwing instructions.
 284     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
 285     setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
 286     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
 287     setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
 288     setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
 289     setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
 290     setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
 291     setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
 292   }
 293
 294   /// } Atomic instructions
 295
 296   /// SJLJ instructions {
 297   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 298   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 299   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
 300   if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
 301     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
 302   /// } SJLJ instructions
 303
 304   // Intrinsic instructions
 305   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 306 }
 307
 308 void VETargetLowering::initVPUActions() {
 309   for (MVT LegalMaskVT : AllMaskVTs)
 310     setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
 311
 312   for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
 313     setOperationAction(Opc, MVT::v512i1, Custom);
 314
 315   for (MVT LegalVecVT : AllVectorVTs) {
 316     setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
 317     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
 318     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
 319     // Translate all vector instructions with legal element types to VVP_*
 320     // nodes.
 321     // TODO We will custom-widen into VVP_* nodes in the future. While we are
 322     // buildling the infrastructure for this, we only do this for legal vector
 323     // VTs.
 324 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME)                                     \
 325   setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
 326 #define ADD_VVP_OP(VVP_NAME, ISD_NAME)                                         \
 327   setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
 328     setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
 329     setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
 330 #include "VVPNodes.def"
 331   }
 332
 333   for (MVT LegalPackedVT : AllPackedVTs) {
 334     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
 335     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
 336   }
 337
 338   // vNt32, vNt64 ops (legal element types)
 339   for (MVT VT : MVT::vector_valuetypes()) {
 340     MVT ElemVT = VT.getVectorElementType();
 341     unsigned ElemBits = ElemVT.getScalarSizeInBits();
 342     if (ElemBits != 32 && ElemBits != 64)
 343       continue;
 344
 345     for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
 346       setOperationAction(MemOpc, VT, Custom);
 347
 348     const ISD::NodeType IntReductionOCs[] = {
 349         ISD::VECREDUCE_ADD,  ISD::VECREDUCE_MUL,  ISD::VECREDUCE_AND,
 350         ISD::VECREDUCE_OR,   ISD::VECREDUCE_XOR,  ISD::VECREDUCE_SMIN,
 351         ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
 352
 353     for (unsigned IntRedOpc : IntReductionOCs)
 354       setOperationAction(IntRedOpc, VT, Custom);
 355   }
 356
 357   // v256i1 and v512i1 ops
 358   for (MVT MaskVT : AllMaskVTs) {
 359     // Custom lower mask ops
 360     setOperationAction(ISD::STORE, MaskVT, Custom);
 361     setOperationAction(ISD::LOAD, MaskVT, Custom);
 362   }
 363 }
 364
 365 SDValue
 366 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 367                               bool IsVarArg,
 368                               const SmallVectorImpl<ISD::OutputArg> &Outs,
 369                               const SmallVectorImpl<SDValue> &OutVals,
 370                               const SDLoc &DL, SelectionDAG &DAG) const {
 371   // CCValAssign - represent the assignment of the return value to locations.
 372   SmallVector<CCValAssign, 16> RVLocs;
 373
 374   // CCState - Info about the registers and stack slot.
 375   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
 376                  *DAG.getContext());
 377
 378   // Analyze return values.
 379   CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
 380
 381   SDValue Glue;
 382   SmallVector<SDValue, 4> RetOps(1, Chain);
 383
 384   // Copy the result values into the output registers.
 385   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 386     CCValAssign &VA = RVLocs[i];
 387     assert(VA.isRegLoc() && "Can only return in registers!");
 388     assert(!VA.needsCustom() && "Unexpected custom lowering");
 389     SDValue OutVal = OutVals[i];
 390
 391     // Integer return values must be sign or zero extended by the callee.
 392     switch (VA.getLocInfo()) {
 393     case CCValAssign::Full:
 394       break;
 395     case CCValAssign::SExt:
 396       OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
 397       break;
 398     case CCValAssign::ZExt:
 399       OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
 400       break;
 401     case CCValAssign::AExt:
 402       OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
 403       break;
 404     case CCValAssign::BCvt: {
 405       // Convert a float return value to i64 with padding.
 406       //     63     31   0
 407       //    +------+------+
 408       //    | float|   0  |
 409       //    +------+------+
 410       assert(VA.getLocVT() == MVT::i64);
 411       assert(VA.getValVT() == MVT::f32);
 412       SDValue Undef = SDValue(
 413           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
 414       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 415       OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
 416                                           MVT::i64, Undef, OutVal, Sub_f32),
 417                        0);
 418       break;
 419     }
 420     default:
 421       llvm_unreachable("Unknown loc info!");
 422     }
 423
 424     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Glue);
 425
 426     // Guarantee that all emitted copies are stuck together with flags.
 427     Glue = Chain.getValue(1);
 428     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
 429   }
 430
 431   RetOps[0] = Chain; // Update chain.
 432
 433   // Add the glue if we have it.
 434   if (Glue.getNode())
 435     RetOps.push_back(Glue);
 436
 437   return DAG.getNode(VEISD::RET_GLUE, DL, MVT::Other, RetOps);
 438 }
 439
 440 SDValue VETargetLowering::LowerFormalArguments(
 441     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
 442     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
 443     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
 444   MachineFunction &MF = DAG.getMachineFunction();
 445
 446   // Get the base offset of the incoming arguments stack space.
 447   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
 448   // Get the size of the preserved arguments area
 449   unsigned ArgsPreserved = 64;
 450
 451   // Analyze arguments according to CC_VE.
 452   SmallVector<CCValAssign, 16> ArgLocs;
 453   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
 454                  *DAG.getContext());
 455   // Allocate the preserved area first.
 456   CCInfo.AllocateStack(ArgsPreserved, Align(8));
 457   // We already allocated the preserved area, so the stack offset computed
 458   // by CC_VE would be correct now.
 459   CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
 460
 461   for (const CCValAssign &VA : ArgLocs) {
 462     assert(!VA.needsCustom() && "Unexpected custom lowering");
 463     if (VA.isRegLoc()) {
 464       // This argument is passed in a register.
 465       // All integer register arguments are promoted by the caller to i64.
 466
 467       // Create a virtual register for the promoted live-in value.
 468       Register VReg =
 469           MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
 470       SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
 471
 472       // The caller promoted the argument, so insert an Assert?ext SDNode so we
 473       // won't promote the value again in this function.
 474       switch (VA.getLocInfo()) {
 475       case CCValAssign::SExt:
 476         Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
 477                           DAG.getValueType(VA.getValVT()));
 478         break;
 479       case CCValAssign::ZExt:
 480         Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
 481                           DAG.getValueType(VA.getValVT()));
 482         break;
 483       case CCValAssign::BCvt: {
 484         // Extract a float argument from i64 with padding.
 485         //     63     31   0
 486         //    +------+------+
 487         //    | float|   0  |
 488         //    +------+------+
 489         assert(VA.getLocVT() == MVT::i64);
 490         assert(VA.getValVT() == MVT::f32);
 491         SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 492         Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
 493                                          MVT::f32, Arg, Sub_f32),
 494                       0);
 495         break;
 496       }
 497       default:
 498         break;
 499       }
 500
 501       // Truncate the register down to the argument type.
 502       if (VA.isExtInLoc())
 503         Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
 504
 505       InVals.push_back(Arg);
 506       continue;
 507     }
 508
 509     // The registers are exhausted. This argument was passed on the stack.
 510     assert(VA.isMemLoc());
 511     // The CC_VE_Full/Half functions compute stack offsets relative to the
 512     // beginning of the arguments area at %fp + the size of reserved area.
 513     unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
 514     unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
 515
 516     // Adjust offset for a float argument by adding 4 since the argument is
 517     // stored in 8 bytes buffer with offset like below.  LLVM generates
 518     // 4 bytes load instruction, so need to adjust offset here.  This
 519     // adjustment is required in only LowerFormalArguments.  In LowerCall,
 520     // a float argument is converted to i64 first, and stored as 8 bytes
 521     // data, which is required by ABI, so no need for adjustment.
 522     //    0      4
 523     //    +------+------+
 524     //    | empty| float|
 525     //    +------+------+
 526     if (VA.getValVT() == MVT::f32)
 527       Offset += 4;
 528
 529     int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
 530     InVals.push_back(
 531         DAG.getLoad(VA.getValVT(), DL, Chain,
 532                     DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
 533                     MachinePointerInfo::getFixedStack(MF, FI)));
 534   }
 535
 536   if (!IsVarArg)
 537     return Chain;
 538
 539   // This function takes variable arguments, some of which may have been passed
 540   // in registers %s0-%s8.
 541   //
 542   // The va_start intrinsic needs to know the offset to the first variable
 543   // argument.
 544   // TODO: need to calculate offset correctly once we support f128.
 545   unsigned ArgOffset = ArgLocs.size() * 8;
 546   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
 547   // Skip the reserved area at the top of stack.
 548   FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
 549
 550   return Chain;
 551 }
 552
 553 // FIXME? Maybe this could be a TableGen attribute on some registers and
 554 // this table could be generated automatically from RegInfo.
 555 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
 556                                              const MachineFunction &MF) const {
 557   Register Reg = StringSwitch<Register>(RegName)
 558                      .Case("sp", VE::SX11)    // Stack pointer
 559                      .Case("fp", VE::SX9)     // Frame pointer
 560                      .Case("sl", VE::SX8)     // Stack limit
 561                      .Case("lr", VE::SX10)    // Link register
 562                      .Case("tp", VE::SX14)    // Thread pointer
 563                      .Case("outer", VE::SX12) // Outer regiser
 564                      .Case("info", VE::SX17)  // Info area register
 565                      .Case("got", VE::SX15)   // Global offset table register
 566                      .Case("plt", VE::SX16) // Procedure linkage table register
 567                      .Default(0);
 568
 569   if (Reg)
 570     return Reg;
 571
 572   report_fatal_error("Invalid register name global variable");
 573 }
 574
 575 //===----------------------------------------------------------------------===//
 576 // TargetLowering Implementation
 577 //===----------------------------------------------------------------------===//
 578
 579 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 580                                     SmallVectorImpl<SDValue> &InVals) const {
 581   SelectionDAG &DAG = CLI.DAG;
 582   SDLoc DL = CLI.DL;
 583   SDValue Chain = CLI.Chain;
 584   auto PtrVT = getPointerTy(DAG.getDataLayout());
 585
 586   // VE target does not yet support tail call optimization.
 587   CLI.IsTailCall = false;
 588
 589   // Get the base offset of the outgoing arguments stack space.
 590   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
 591   // Get the size of the preserved arguments area
 592   unsigned ArgsPreserved = 8 * 8u;
 593
 594   // Analyze operands of the call, assigning locations to each operand.
 595   SmallVector<CCValAssign, 16> ArgLocs;
 596   CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
 597                  *DAG.getContext());
 598   // Allocate the preserved area first.
 599   CCInfo.AllocateStack(ArgsPreserved, Align(8));
 600   // We already allocated the preserved area, so the stack offset computed
 601   // by CC_VE would be correct now.
 602   CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
 603
 604   // VE requires to use both register and stack for varargs or no-prototyped
 605   // functions.
 606   bool UseBoth = CLI.IsVarArg;
 607
 608   // Analyze operands again if it is required to store BOTH.
 609   SmallVector<CCValAssign, 16> ArgLocs2;
 610   CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
 611                   ArgLocs2, *DAG.getContext());
 612   if (UseBoth)
 613     CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
 614
 615   // Get the size of the outgoing arguments stack space requirement.
 616   unsigned ArgsSize = CCInfo.getStackSize();
 617
 618   // Keep stack frames 16-byte aligned.
 619   ArgsSize = alignTo(ArgsSize, 16);
 620
 621   // Adjust the stack pointer to make room for the arguments.
 622   // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
 623   // with more than 6 arguments.
 624   Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
 625
 626   // Collect the set of registers to pass to the function and their values.
 627   // This will be emitted as a sequence of CopyToReg nodes glued to the call
 628   // instruction.
 629   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
 630
 631   // Collect chains from all the memory opeations that copy arguments to the
 632   // stack. They must follow the stack pointer adjustment above and precede the
 633   // call instruction itself.
 634   SmallVector<SDValue, 8> MemOpChains;
 635
 636   // VE needs to get address of callee function in a register
 637   // So, prepare to copy it to SX12 here.
 638
 639   // If the callee is a GlobalAddress node (quite common, every direct call is)
 640   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
 641   // Likewise ExternalSymbol -> TargetExternalSymbol.
 642   SDValue Callee = CLI.Callee;
 643
 644   bool IsPICCall = isPositionIndependent();
 645
 646   // PC-relative references to external symbols should go through $stub.
 647   // If so, we need to prepare GlobalBaseReg first.
 648   const TargetMachine &TM = DAG.getTarget();
 649   const GlobalValue *GV = nullptr;
 650   auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
 651   if (CalleeG)
 652     GV = CalleeG->getGlobal();
 653   bool Local = TM.shouldAssumeDSOLocal(GV);
 654   bool UsePlt = !Local;
 655   MachineFunction &MF = DAG.getMachineFunction();
 656
 657   // Turn GlobalAddress/ExternalSymbol node into a value node
 658   // containing the address of them here.
 659   if (CalleeG) {
 660     if (IsPICCall) {
 661       if (UsePlt)
 662         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
 663       Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
 664       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
 665     } else {
 666       Callee =
 667           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
 668     }
 669   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
 670     if (IsPICCall) {
 671       if (UsePlt)
 672         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
 673       Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
 674       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
 675     } else {
 676       Callee =
 677           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
 678     }
 679   }
 680
 681   RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
 682
 683   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 684     CCValAssign &VA = ArgLocs[i];
 685     SDValue Arg = CLI.OutVals[i];
 686
 687     // Promote the value if needed.
 688     switch (VA.getLocInfo()) {
 689     default:
 690       llvm_unreachable("Unknown location info!");
 691     case CCValAssign::Full:
 692       break;
 693     case CCValAssign::SExt:
 694       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
 695       break;
 696     case CCValAssign::ZExt:
 697       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
 698       break;
 699     case CCValAssign::AExt:
 700       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
 701       break;
 702     case CCValAssign::BCvt: {
 703       // Convert a float argument to i64 with padding.
 704       //     63     31   0
 705       //    +------+------+
 706       //    | float|   0  |
 707       //    +------+------+
 708       assert(VA.getLocVT() == MVT::i64);
 709       assert(VA.getValVT() == MVT::f32);
 710       SDValue Undef = SDValue(
 711           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
 712       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 713       Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
 714                                        MVT::i64, Undef, Arg, Sub_f32),
 715                     0);
 716       break;
 717     }
 718     }
 719
 720     if (VA.isRegLoc()) {
 721       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
 722       if (!UseBoth)
 723         continue;
 724       VA = ArgLocs2[i];
 725     }
 726
 727     assert(VA.isMemLoc());
 728
 729     // Create a store off the stack pointer for this argument.
 730     SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
 731     // The argument area starts at %fp/%sp + the size of reserved area.
 732     SDValue PtrOff =
 733         DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
 734     PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
 735     MemOpChains.push_back(
 736         DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
 737   }
 738
 739   // Emit all stores, make sure they occur before the call.
 740   if (!MemOpChains.empty())
 741     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
 742
 743   // Build a sequence of CopyToReg nodes glued together with token chain and
 744   // glue operands which copy the outgoing args into registers. The InGlue is
 745   // necessary since all emitted instructions must be stuck together in order
 746   // to pass the live physical registers.
 747   SDValue InGlue;
 748   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 749     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
 750                              RegsToPass[i].second, InGlue);
 751     InGlue = Chain.getValue(1);
 752   }
 753
 754   // Build the operands for the call instruction itself.
 755   SmallVector<SDValue, 8> Ops;
 756   Ops.push_back(Chain);
 757   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
 758     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
 759                                   RegsToPass[i].second.getValueType()));
 760
 761   // Add a register mask operand representing the call-preserved registers.
 762   const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
 763   const uint32_t *Mask =
 764       TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
 765   assert(Mask && "Missing call preserved mask for calling convention");
 766   Ops.push_back(DAG.getRegisterMask(Mask));
 767
 768   // Make sure the CopyToReg nodes are glued to the call instruction which
 769   // consumes the registers.
 770   if (InGlue.getNode())
 771     Ops.push_back(InGlue);
 772
 773   // Now the call itself.
 774   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 775   Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
 776   InGlue = Chain.getValue(1);
 777
 778   // Revert the stack pointer immediately after the call.
 779   Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, DL);
 780   InGlue = Chain.getValue(1);
 781
 782   // Now extract the return values. This is more or less the same as
 783   // LowerFormalArguments.
 784
 785   // Assign locations to each value returned by this call.
 786   SmallVector<CCValAssign, 16> RVLocs;
 787   CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
 788                  *DAG.getContext());
 789
 790   // Set inreg flag manually for codegen generated library calls that
 791   // return float.
 792   if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
 793     CLI.Ins[0].Flags.setInReg();
 794
 795   RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
 796
 797   // Copy all of the result registers out of their specified physreg.
 798   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 799     CCValAssign &VA = RVLocs[i];
 800     assert(!VA.needsCustom() && "Unexpected custom lowering");
 801     Register Reg = VA.getLocReg();
 802
 803     // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
 804     // reside in the same register in the high and low bits. Reuse the
 805     // CopyFromReg previous node to avoid duplicate copies.
 806     SDValue RV;
 807     if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
 808       if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
 809         RV = Chain.getValue(0);
 810
 811     // But usually we'll create a new CopyFromReg for a different register.
 812     if (!RV.getNode()) {
 813       RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
 814       Chain = RV.getValue(1);
 815       InGlue = Chain.getValue(2);
 816     }
 817
 818     // The callee promoted the return value, so insert an Assert?ext SDNode so
 819     // we won't promote the value again in this function.
 820     switch (VA.getLocInfo()) {
 821     case CCValAssign::SExt:
 822       RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
 823                        DAG.getValueType(VA.getValVT()));
 824       break;
 825     case CCValAssign::ZExt:
 826       RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
 827                        DAG.getValueType(VA.getValVT()));
 828       break;
 829     case CCValAssign::BCvt: {
 830       // Extract a float return value from i64 with padding.
 831       //     63     31   0
 832       //    +------+------+
 833       //    | float|   0  |
 834       //    +------+------+
 835       assert(VA.getLocVT() == MVT::i64);
 836       assert(VA.getValVT() == MVT::f32);
 837       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 838       RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
 839                                       MVT::f32, RV, Sub_f32),
 840                    0);
 841       break;
 842     }
 843     default:
 844       break;
 845     }
 846
 847     // Truncate the register down to the return value type.
 848     if (VA.isExtInLoc())
 849       RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
 850
 851     InVals.push_back(RV);
 852   }
 853
 854   return Chain;
 855 }
 856
 857 bool VETargetLowering::isOffsetFoldingLegal(
 858     const GlobalAddressSDNode *GA) const {
 859   // VE uses 64 bit addressing, so we need multiple instructions to generate
 860   // an address.  Folding address with offset increases the number of
 861   // instructions, so that we disable it here.  Offsets will be folded in
 862   // the DAG combine later if it worth to do so.
 863   return false;
 864 }
 865
 866 /// isFPImmLegal - Returns true if the target can instruction select the
 867 /// specified FP immediate natively. If false, the legalizer will
 868 /// materialize the FP immediate as a load from a constant pool.
 869 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
 870                                     bool ForCodeSize) const {
 871   return VT == MVT::f32 || VT == MVT::f64;
 872 }
 873
 874 /// Determine if the target supports unaligned memory accesses.
 875 ///
 876 /// This function returns true if the target allows unaligned memory accesses
 877 /// of the specified type in the given address space. If true, it also returns
 878 /// whether the unaligned memory access is "fast" in the last argument by
 879 /// reference. This is used, for example, in situations where an array
 880 /// copy/move/set is converted to a sequence of store operations. Its use
 881 /// helps to ensure that such replacements don't generate code that causes an
 882 /// alignment error (trap) on the target machine.
 883 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
 884                                                       unsigned AddrSpace,
 885                                                       Align A,
 886                                                       MachineMemOperand::Flags,
 887                                                       unsigned *Fast) const {
 888   if (Fast) {
 889     // It's fast anytime on VE
 890     *Fast = 1;
 891   }
 892   return true;
 893 }
 894
 895 VETargetLowering::VETargetLowering(const TargetMachine &TM,
 896                                    const VESubtarget &STI)
 897     : TargetLowering(TM), Subtarget(&STI) {
 898   // Instructions which use registers as conditionals examine all the
 899   // bits (as does the pseudo SELECT_CC expansion). I don't think it
 900   // matters much whether it's ZeroOrOneBooleanContent, or
 901   // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
 902   // former.
 903   setBooleanContents(ZeroOrOneBooleanContent);
 904   setBooleanVectorContents(ZeroOrOneBooleanContent);
 905
 906   initRegisterClasses();
 907   initSPUActions();
 908   initVPUActions();
 909
 910   setStackPointerRegisterToSaveRestore(VE::SX11);
 911
 912   // We have target-specific dag combine patterns for the following nodes:
 913   setTargetDAGCombine(ISD::TRUNCATE);
 914   setTargetDAGCombine(ISD::SELECT);
 915   setTargetDAGCombine(ISD::SELECT_CC);
 916
 917   // Set function alignment to 16 bytes
 918   setMinFunctionAlignment(Align(16));
 919
 920   // VE stores all argument by 8 bytes alignment
 921   setMinStackArgumentAlignment(Align(8));
 922
 923   computeRegisterProperties(Subtarget->getRegisterInfo());
 924 }
 925
 926 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
 927 #define TARGET_NODE_CASE(NAME)                                                 \
 928   case VEISD::NAME:                                                            \
 929     return "VEISD::" #NAME;
 930   switch ((VEISD::NodeType)Opcode) {
 931   case VEISD::FIRST_NUMBER:
 932     break;
 933     TARGET_NODE_CASE(CMPI)
 934     TARGET_NODE_CASE(CMPU)
 935     TARGET_NODE_CASE(CMPF)
 936     TARGET_NODE_CASE(CMPQ)
 937     TARGET_NODE_CASE(CMOV)
 938     TARGET_NODE_CASE(CALL)
 939     TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
 940     TARGET_NODE_CASE(EH_SJLJ_SETJMP)
 941     TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
 942     TARGET_NODE_CASE(GETFUNPLT)
 943     TARGET_NODE_CASE(GETSTACKTOP)
 944     TARGET_NODE_CASE(GETTLSADDR)
 945     TARGET_NODE_CASE(GLOBAL_BASE_REG)
 946     TARGET_NODE_CASE(Hi)
 947     TARGET_NODE_CASE(Lo)
 948     TARGET_NODE_CASE(RET_GLUE)
 949     TARGET_NODE_CASE(TS1AM)
 950     TARGET_NODE_CASE(VEC_UNPACK_LO)
 951     TARGET_NODE_CASE(VEC_UNPACK_HI)
 952     TARGET_NODE_CASE(VEC_PACK)
 953     TARGET_NODE_CASE(VEC_BROADCAST)
 954     TARGET_NODE_CASE(REPL_I32)
 955     TARGET_NODE_CASE(REPL_F32)
 956
 957     TARGET_NODE_CASE(LEGALAVL)
 958
 959     // Register the VVP_* SDNodes.
 960 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
 961 #include "VVPNodes.def"
 962   }
 963 #undef TARGET_NODE_CASE
 964   return nullptr;
 965 }
 966
 967 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
 968                                          EVT VT) const {
 969   return MVT::i32;
 970 }
 971
 972 // Convert to a target node and set target flags.
 973 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
 974                                           SelectionDAG &DAG) const {
 975   if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
 976     return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
 977                                       GA->getValueType(0), GA->getOffset(), TF);
 978
 979   if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
 980     return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
 981                                      0, TF);
 982
 983   if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
 984     return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
 985                                      CP->getAlign(), CP->getOffset(), TF);
 986
 987   if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
 988     return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
 989                                        TF);
 990
 991   if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
 992     return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
 993
 994   llvm_unreachable("Unhandled address SDNode");
 995 }
 996
 997 // Split Op into high and low parts according to HiTF and LoTF.
 998 // Return an ADD node combining the parts.
 999 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1000                                        SelectionDAG &DAG) const {
1001   SDLoc DL(Op);
1002   EVT VT = Op.getValueType();
1003   SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
1004   SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
1005   return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
1006 }
1007
1008 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1009 // or ExternalSymbol SDNode.
1010 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1011   SDLoc DL(Op);
1012   EVT PtrVT = Op.getValueType();
1013
1014   // Handle PIC mode first. VE needs a got load for every variable!
1015   if (isPositionIndependent()) {
1016     auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
1017
1018     if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
1019         (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1020       // Create following instructions for local linkage PIC code.
1021       //     lea %reg, label@gotoff_lo
1022       //     and %reg, %reg, (32)0
1023       //     lea.sl %reg, label@gotoff_hi(%reg, %got)
1024       SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1025                                   VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1026       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1027       return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1028     }
1029     // Create following instructions for not local linkage PIC code.
1030     //     lea %reg, label@got_lo
1031     //     and %reg, %reg, (32)0
1032     //     lea.sl %reg, label@got_hi(%reg)
1033     //     ld %reg, (%reg, %got)
1034     SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
1035                                 VEMCExpr::VK_VE_GOT_LO32, DAG);
1036     SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1037     SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1038     return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1039                        MachinePointerInfo::getGOT(DAG.getMachineFunction()));
1040   }
1041
1042   // This is one of the absolute code models.
1043   switch (getTargetMachine().getCodeModel()) {
1044   default:
1045     llvm_unreachable("Unsupported absolute code model");
1046   case CodeModel::Small:
1047   case CodeModel::Medium:
1048   case CodeModel::Large:
1049     // abs64.
1050     return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1051   }
1052 }
1053
1054 /// Custom Lower {
1055
1056 // The mappings for emitLeading/TrailingFence for VE is designed by following
1057 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1058 Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1059                                                 Instruction *Inst,
1060                                                 AtomicOrdering Ord) const {
1061   switch (Ord) {
1062   case AtomicOrdering::NotAtomic:
1063   case AtomicOrdering::Unordered:
1064     llvm_unreachable("Invalid fence: unordered/non-atomic");
1065   case AtomicOrdering::Monotonic:
1066   case AtomicOrdering::Acquire:
1067     return nullptr; // Nothing to do
1068   case AtomicOrdering::Release:
1069   case AtomicOrdering::AcquireRelease:
1070     return Builder.CreateFence(AtomicOrdering::Release);
1071   case AtomicOrdering::SequentiallyConsistent:
1072     if (!Inst->hasAtomicStore())
1073       return nullptr; // Nothing to do
1074     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1075   }
1076   llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1077 }
1078
1079 Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1080                                                  Instruction *Inst,
1081                                                  AtomicOrdering Ord) const {
1082   switch (Ord) {
1083   case AtomicOrdering::NotAtomic:
1084   case AtomicOrdering::Unordered:
1085     llvm_unreachable("Invalid fence: unordered/not-atomic");
1086   case AtomicOrdering::Monotonic:
1087   case AtomicOrdering::Release:
1088     return nullptr; // Nothing to do
1089   case AtomicOrdering::Acquire:
1090   case AtomicOrdering::AcquireRelease:
1091     return Builder.CreateFence(AtomicOrdering::Acquire);
1092   case AtomicOrdering::SequentiallyConsistent:
1093     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1094   }
1095   llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1096 }
1097
1098 SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1099                                             SelectionDAG &DAG) const {
1100   SDLoc DL(Op);
1101   AtomicOrdering FenceOrdering =
1102       static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
1103   SyncScope::ID FenceSSID =
1104       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1105
1106   // VE uses Release consistency, so need a fence instruction if it is a
1107   // cross-thread fence.
1108   if (FenceSSID == SyncScope::System) {
1109     switch (FenceOrdering) {
1110     case AtomicOrdering::NotAtomic:
1111     case AtomicOrdering::Unordered:
1112     case AtomicOrdering::Monotonic:
1113       // No need to generate fencem instruction here.
1114       break;
1115     case AtomicOrdering::Acquire:
1116       // Generate "fencem 2" as acquire fence.
1117       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1118                                         DAG.getTargetConstant(2, DL, MVT::i32),
1119                                         Op.getOperand(0)),
1120                      0);
1121     case AtomicOrdering::Release:
1122       // Generate "fencem 1" as release fence.
1123       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1124                                         DAG.getTargetConstant(1, DL, MVT::i32),
1125                                         Op.getOperand(0)),
1126                      0);
1127     case AtomicOrdering::AcquireRelease:
1128     case AtomicOrdering::SequentiallyConsistent:
1129       // Generate "fencem 3" as acq_rel and seq_cst fence.
1130       // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1131       //        so  seq_cst may require more instruction for them.
1132       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1133                                         DAG.getTargetConstant(3, DL, MVT::i32),
1134                                         Op.getOperand(0)),
1135                      0);
1136     }
1137   }
1138
1139   // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1140   return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1141 }
1142
1143 TargetLowering::AtomicExpansionKind
1144 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1145   // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1146   if (AI->getOperation() == AtomicRMWInst::Xchg) {
1147     return AtomicExpansionKind::None;
1148   }
1149   // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1150
1151   // Otherwise, expand it using compare and exchange instruction to not call
1152   // __sync_fetch_and_* functions.
1153   return AtomicExpansionKind::CmpXChg;
1154 }
1155
1156 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1157                             SDValue &Bits) {
1158   SDLoc DL(Op);
1159   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1160   SDValue Ptr = N->getOperand(1);
1161   SDValue Val = N->getOperand(2);
1162   EVT PtrVT = Ptr.getValueType();
1163   bool Byte = N->getMemoryVT() == MVT::i8;
1164   //   Remainder = AND Ptr, 3
1165   //   Flag = 1 << Remainder  ; If Byte is true (1 byte swap flag)
1166   //   Flag = 3 << Remainder  ; If Byte is false (2 bytes swap flag)
1167   //   Bits = Remainder << 3
1168   //   NewVal = Val << Bits
1169   SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1170   SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1171   SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1172                       : DAG.getConstant(3, DL, MVT::i32);
1173   Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1174   Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1175   return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1176 }
1177
1178 static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1179                              SDValue Bits) {
1180   SDLoc DL(Op);
1181   EVT VT = Data.getValueType();
1182   bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1183   //   NewData = Data >> Bits
1184   //   Result = NewData & 0xff   ; If Byte is true (1 byte)
1185   //   Result = NewData & 0xffff ; If Byte is false (2 bytes)
1186
1187   SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1188   return DAG.getNode(ISD::AND, DL, VT,
1189                      {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1190 }
1191
1192 SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1193                                            SelectionDAG &DAG) const {
1194   SDLoc DL(Op);
1195   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1196
1197   if (N->getMemoryVT() == MVT::i8) {
1198     // For i8, use "ts1am"
1199     //   Input:
1200     //     ATOMIC_SWAP Ptr, Val, Order
1201     //
1202     //   Output:
1203     //     Remainder = AND Ptr, 3
1204     //     Flag = 1 << Remainder   ; 1 byte swap flag for TS1AM inst.
1205     //     Bits = Remainder << 3
1206     //     NewVal = Val << Bits
1207     //
1208     //     Aligned = AND Ptr, -4
1209     //     Data = TS1AM Aligned, Flag, NewVal
1210     //
1211     //     NewData = Data >> Bits
1212     //     Result = NewData & 0xff ; 1 byte result
1213     SDValue Flag;
1214     SDValue Bits;
1215     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1216
1217     SDValue Ptr = N->getOperand(1);
1218     SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1219                                   {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1220     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1221                                   DAG.getVTList(Op.getNode()->getValueType(0),
1222                                                 Op.getNode()->getValueType(1)),
1223                                   {N->getChain(), Aligned, Flag, NewVal},
1224                                   N->getMemOperand());
1225
1226     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1227     SDValue Chain = TS1AM.getValue(1);
1228     return DAG.getMergeValues({Result, Chain}, DL);
1229   }
1230   if (N->getMemoryVT() == MVT::i16) {
1231     // For i16, use "ts1am"
1232     SDValue Flag;
1233     SDValue Bits;
1234     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1235
1236     SDValue Ptr = N->getOperand(1);
1237     SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1238                                   {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1239     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1240                                   DAG.getVTList(Op.getNode()->getValueType(0),
1241                                                 Op.getNode()->getValueType(1)),
1242                                   {N->getChain(), Aligned, Flag, NewVal},
1243                                   N->getMemOperand());
1244
1245     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1246     SDValue Chain = TS1AM.getValue(1);
1247     return DAG.getMergeValues({Result, Chain}, DL);
1248   }
1249   // Otherwise, let llvm legalize it.
1250   return Op;
1251 }
1252
1253 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1254                                              SelectionDAG &DAG) const {
1255   return makeAddress(Op, DAG);
1256 }
1257
1258 SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1259                                             SelectionDAG &DAG) const {
1260   return makeAddress(Op, DAG);
1261 }
1262
1263 SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1264                                             SelectionDAG &DAG) const {
1265   return makeAddress(Op, DAG);
1266 }
1267
1268 SDValue
1269 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1270                                                 SelectionDAG &DAG) const {
1271   SDLoc DL(Op);
1272
1273   // Generate the following code:
1274   //   t1: ch,glue = callseq_start t0, 0, 0
1275   //   t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1276   //   t3: ch,glue = callseq_end t2, 0, 0, t2:2
1277   //   t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1278   SDValue Label = withTargetFlags(Op, 0, DAG);
1279   EVT PtrVT = Op.getValueType();
1280
1281   // Lowering the machine isd will make sure everything is in the right
1282   // location.
1283   SDValue Chain = DAG.getEntryNode();
1284   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1285   const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1286       DAG.getMachineFunction(), CallingConv::C);
1287   Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1288   SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1289   Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1290   Chain = DAG.getCALLSEQ_END(Chain, 64, 0, Chain.getValue(1), DL);
1291   Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1292
1293   // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1294   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1295   MFI.setHasCalls(true);
1296
1297   // Also generate code to prepare a GOT register if it is PIC.
1298   if (isPositionIndependent()) {
1299     MachineFunction &MF = DAG.getMachineFunction();
1300     Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1301   }
1302
1303   return Chain;
1304 }
1305
1306 SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1307                                                 SelectionDAG &DAG) const {
1308   // The current implementation of nld (2.26) doesn't allow local exec model
1309   // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1310   // generate the general dynamic model code sequence.
1311   //
1312   // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1313   return lowerToTLSGeneralDynamicModel(Op, DAG);
1314 }
1315
1316 SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1317   return makeAddress(Op, DAG);
1318 }
1319
1320 // Lower a f128 load into two f64 loads.
1321 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1322   SDLoc DL(Op);
1323   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1324   assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1325   Align Alignment = LdNode->getAlign();
1326   if (Alignment > 8)
1327     Alignment = Align(8);
1328
1329   SDValue Lo64 =
1330       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1331                   LdNode->getPointerInfo(), Alignment,
1332                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1333                                        : MachineMemOperand::MONone);
1334   EVT AddrVT = LdNode->getBasePtr().getValueType();
1335   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1336                               DAG.getConstant(8, DL, AddrVT));
1337   SDValue Hi64 =
1338       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1339                   LdNode->getPointerInfo(), Alignment,
1340                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1341                                        : MachineMemOperand::MONone);
1342
1343   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1344   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1345
1346   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1347   SDNode *InFP128 =
1348       DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1349   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1350                                SDValue(InFP128, 0), Hi64, SubRegEven);
1351   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1352                                SDValue(InFP128, 0), Lo64, SubRegOdd);
1353   SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1354                           SDValue(Hi64.getNode(), 1)};
1355   SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1356   SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1357   return DAG.getMergeValues(Ops, DL);
1358 }
1359
1360 // Lower a vXi1 load into following instructions
1361 //   LDrii %1, (,%addr)
1362 //   LVMxir  %vm, 0, %1
1363 //   LDrii %2, 8(,%addr)
1364 //   LVMxir  %vm, 0, %2
1365 //   ...
1366 static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1367   SDLoc DL(Op);
1368   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1369   assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1370
1371   SDValue BasePtr = LdNode->getBasePtr();
1372   Align Alignment = LdNode->getAlign();
1373   if (Alignment > 8)
1374     Alignment = Align(8);
1375
1376   EVT AddrVT = BasePtr.getValueType();
1377   EVT MemVT = LdNode->getMemoryVT();
1378   if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1379     SDValue OutChains[4];
1380     SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1381     for (int i = 0; i < 4; ++i) {
1382       // Generate load dag and prepare chains.
1383       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1384                                  DAG.getConstant(8 * i, DL, AddrVT));
1385       SDValue Val =
1386           DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1387                       LdNode->getPointerInfo(), Alignment,
1388                       LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1389                                            : MachineMemOperand::MONone);
1390       OutChains[i] = SDValue(Val.getNode(), 1);
1391
1392       VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1393                               DAG.getTargetConstant(i, DL, MVT::i64), Val,
1394                               SDValue(VM, 0));
1395     }
1396     SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1397     SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1398     return DAG.getMergeValues(Ops, DL);
1399   } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1400     SDValue OutChains[8];
1401     SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1402     for (int i = 0; i < 8; ++i) {
1403       // Generate load dag and prepare chains.
1404       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1405                                  DAG.getConstant(8 * i, DL, AddrVT));
1406       SDValue Val =
1407           DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1408                       LdNode->getPointerInfo(), Alignment,
1409                       LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1410                                            : MachineMemOperand::MONone);
1411       OutChains[i] = SDValue(Val.getNode(), 1);
1412
1413       VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1414                               DAG.getTargetConstant(i, DL, MVT::i64), Val,
1415                               SDValue(VM, 0));
1416     }
1417     SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1418     SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1419     return DAG.getMergeValues(Ops, DL);
1420   } else {
1421     // Otherwise, ask llvm to expand it.
1422     return SDValue();
1423   }
1424 }
1425
1426 SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1427   LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1428   EVT MemVT = LdNode->getMemoryVT();
1429
1430   // If VPU is enabled, always expand non-mask vector loads to VVP
1431   if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1432     return lowerToVVP(Op, DAG);
1433
1434   SDValue BasePtr = LdNode->getBasePtr();
1435   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1436     // Do not expand store instruction with frame index here because of
1437     // dependency problems.  We expand it later in eliminateFrameIndex().
1438     return Op;
1439   }
1440
1441   if (MemVT == MVT::f128)
1442     return lowerLoadF128(Op, DAG);
1443   if (isMaskType(MemVT))
1444     return lowerLoadI1(Op, DAG);
1445
1446   return Op;
1447 }
1448
1449 // Lower a f128 store into two f64 stores.
1450 static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1451   SDLoc DL(Op);
1452   StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1453   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1454
1455   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1456   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1457
1458   SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1459                                     StNode->getValue(), SubRegEven);
1460   SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1461                                     StNode->getValue(), SubRegOdd);
1462
1463   Align Alignment = StNode->getAlign();
1464   if (Alignment > 8)
1465     Alignment = Align(8);
1466
1467   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1468   SDValue OutChains[2];
1469   OutChains[0] =
1470       DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1471                    StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1472                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1473                                         : MachineMemOperand::MONone);
1474   EVT AddrVT = StNode->getBasePtr().getValueType();
1475   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1476                               DAG.getConstant(8, DL, AddrVT));
1477   OutChains[1] =
1478       DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1479                    MachinePointerInfo(), Alignment,
1480                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1481                                         : MachineMemOperand::MONone);
1482   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1483 }
1484
1485 // Lower a vXi1 store into following instructions
1486 //   SVMi  %1, %vm, 0
1487 //   STrii %1, (,%addr)
1488 //   SVMi  %2, %vm, 1
1489 //   STrii %2, 8(,%addr)
1490 //   ...
1491 static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1492   SDLoc DL(Op);
1493   StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1494   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1495
1496   SDValue BasePtr = StNode->getBasePtr();
1497   Align Alignment = StNode->getAlign();
1498   if (Alignment > 8)
1499     Alignment = Align(8);
1500   EVT AddrVT = BasePtr.getValueType();
1501   EVT MemVT = StNode->getMemoryVT();
1502   if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1503     SDValue OutChains[4];
1504     for (int i = 0; i < 4; ++i) {
1505       SDNode *V =
1506           DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1507                              DAG.getTargetConstant(i, DL, MVT::i64));
1508       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1509                                  DAG.getConstant(8 * i, DL, AddrVT));
1510       OutChains[i] =
1511           DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1512                        MachinePointerInfo(), Alignment,
1513                        StNode->isVolatile() ? MachineMemOperand::MOVolatile
1514                                             : MachineMemOperand::MONone);
1515     }
1516     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1517   } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1518     SDValue OutChains[8];
1519     for (int i = 0; i < 8; ++i) {
1520       SDNode *V =
1521           DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1522                              DAG.getTargetConstant(i, DL, MVT::i64));
1523       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1524                                  DAG.getConstant(8 * i, DL, AddrVT));
1525       OutChains[i] =
1526           DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1527                        MachinePointerInfo(), Alignment,
1528                        StNode->isVolatile() ? MachineMemOperand::MOVolatile
1529                                             : MachineMemOperand::MONone);
1530     }
1531     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1532   } else {
1533     // Otherwise, ask llvm to expand it.
1534     return SDValue();
1535   }
1536 }
1537
1538 SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1539   StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1540   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1541   EVT MemVT = StNode->getMemoryVT();
1542
1543   // If VPU is enabled, always expand non-mask vector stores to VVP
1544   if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1545     return lowerToVVP(Op, DAG);
1546
1547   SDValue BasePtr = StNode->getBasePtr();
1548   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1549     // Do not expand store instruction with frame index here because of
1550     // dependency problems.  We expand it later in eliminateFrameIndex().
1551     return Op;
1552   }
1553
1554   if (MemVT == MVT::f128)
1555     return lowerStoreF128(Op, DAG);
1556   if (isMaskType(MemVT))
1557     return lowerStoreI1(Op, DAG);
1558
1559   // Otherwise, ask llvm to expand it.
1560   return SDValue();
1561 }
1562
1563 SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1564   MachineFunction &MF = DAG.getMachineFunction();
1565   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1566   auto PtrVT = getPointerTy(DAG.getDataLayout());
1567
1568   // Need frame address to find the address of VarArgsFrameIndex.
1569   MF.getFrameInfo().setFrameAddressIsTaken(true);
1570
1571   // vastart just stores the address of the VarArgsFrameIndex slot into the
1572   // memory location argument.
1573   SDLoc DL(Op);
1574   SDValue Offset =
1575       DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1576                   DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1577   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1578   return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1579                       MachinePointerInfo(SV));
1580 }
1581
1582 SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1583   SDNode *Node = Op.getNode();
1584   EVT VT = Node->getValueType(0);
1585   SDValue InChain = Node->getOperand(0);
1586   SDValue VAListPtr = Node->getOperand(1);
1587   EVT PtrVT = VAListPtr.getValueType();
1588   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1589   SDLoc DL(Node);
1590   SDValue VAList =
1591       DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1592   SDValue Chain = VAList.getValue(1);
1593   SDValue NextPtr;
1594
1595   if (VT == MVT::f128) {
1596     // VE f128 values must be stored with 16 bytes alignment.  We don't
1597     // know the actual alignment of VAList, so we take alignment of it
1598     // dynamically.
1599     int Align = 16;
1600     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1601                          DAG.getConstant(Align - 1, DL, PtrVT));
1602     VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1603                          DAG.getConstant(-Align, DL, PtrVT));
1604     // Increment the pointer, VAList, by 16 to the next vaarg.
1605     NextPtr =
1606         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1607   } else if (VT == MVT::f32) {
1608     // float --> need special handling like below.
1609     //    0      4
1610     //    +------+------+
1611     //    | empty| float|
1612     //    +------+------+
1613     // Increment the pointer, VAList, by 8 to the next vaarg.
1614     NextPtr =
1615         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1616     // Then, adjust VAList.
1617     unsigned InternalOffset = 4;
1618     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1619                          DAG.getConstant(InternalOffset, DL, PtrVT));
1620   } else {
1621     // Increment the pointer, VAList, by 8 to the next vaarg.
1622     NextPtr =
1623         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1624   }
1625
1626   // Store the incremented VAList to the legalized pointer.
1627   InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1628
1629   // Load the actual argument out of the pointer VAList.
1630   // We can't count on greater alignment than the word size.
1631   return DAG.getLoad(
1632       VT, DL, InChain, VAList, MachinePointerInfo(),
1633       Align(std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8));
1634 }
1635
1636 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1637                                                   SelectionDAG &DAG) const {
1638   // Generate following code.
1639   //   (void)__llvm_grow_stack(size);
1640   //   ret = GETSTACKTOP;        // pseudo instruction
1641   SDLoc DL(Op);
1642
1643   // Get the inputs.
1644   SDNode *Node = Op.getNode();
1645   SDValue Chain = Op.getOperand(0);
1646   SDValue Size = Op.getOperand(1);
1647   MaybeAlign Alignment(Op.getConstantOperandVal(2));
1648   EVT VT = Node->getValueType(0);
1649
1650   // Chain the dynamic stack allocation so that it doesn't modify the stack
1651   // pointer when other instructions are using the stack.
1652   Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1653
1654   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1655   Align StackAlign = TFI.getStackAlign();
1656   bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1657
1658   // Prepare arguments
1659   TargetLowering::ArgListTy Args;
1660   TargetLowering::ArgListEntry Entry;
1661   Entry.Node = Size;
1662   Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1663   Args.push_back(Entry);
1664   if (NeedsAlign) {
1665     Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1666     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1667     Args.push_back(Entry);
1668   }
1669   Type *RetTy = Type::getVoidTy(*DAG.getContext());
1670
1671   EVT PtrVT = Op.getValueType();
1672   SDValue Callee;
1673   if (NeedsAlign) {
1674     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1675   } else {
1676     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1677   }
1678
1679   TargetLowering::CallLoweringInfo CLI(DAG);
1680   CLI.setDebugLoc(DL)
1681       .setChain(Chain)
1682       .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1683       .setDiscardResult(true);
1684   std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1685   Chain = pair.second;
1686   SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1687   if (NeedsAlign) {
1688     Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1689                          DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1690     Result = DAG.getNode(ISD::AND, DL, VT, Result,
1691                          DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1692   }
1693   //  Chain = Result.getValue(1);
1694   Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);
1695
1696   SDValue Ops[2] = {Result, Chain};
1697   return DAG.getMergeValues(Ops, DL);
1698 }
1699
1700 SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1701                                                SelectionDAG &DAG) const {
1702   SDLoc DL(Op);
1703   return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1704                      Op.getOperand(1));
1705 }
1706
1707 SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1708                                               SelectionDAG &DAG) const {
1709   SDLoc DL(Op);
1710   return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1711                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1712                      Op.getOperand(1));
1713 }
1714
1715 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1716                                                       SelectionDAG &DAG) const {
1717   SDLoc DL(Op);
1718   return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1719                      Op.getOperand(0));
1720 }
1721
1722 static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1723                               const VETargetLowering &TLI,
1724                               const VESubtarget *Subtarget) {
1725   SDLoc DL(Op);
1726   MachineFunction &MF = DAG.getMachineFunction();
1727   EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1728
1729   MachineFrameInfo &MFI = MF.getFrameInfo();
1730   MFI.setFrameAddressIsTaken(true);
1731
1732   unsigned Depth = Op.getConstantOperandVal(0);
1733   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1734   Register FrameReg = RegInfo->getFrameRegister(MF);
1735   SDValue FrameAddr =
1736       DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1737   while (Depth--)
1738     FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1739                             FrameAddr, MachinePointerInfo());
1740   return FrameAddr;
1741 }
1742
1743 static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1744                                const VETargetLowering &TLI,
1745                                const VESubtarget *Subtarget) {
1746   MachineFunction &MF = DAG.getMachineFunction();
1747   MachineFrameInfo &MFI = MF.getFrameInfo();
1748   MFI.setReturnAddressIsTaken(true);
1749
1750   if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1751     return SDValue();
1752
1753   SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1754
1755   SDLoc DL(Op);
1756   EVT VT = Op.getValueType();
1757   SDValue Offset = DAG.getConstant(8, DL, VT);
1758   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1759                      DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1760                      MachinePointerInfo());
1761 }
1762
1763 SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1764                                                   SelectionDAG &DAG) const {
1765   SDLoc DL(Op);
1766   unsigned IntNo = Op.getConstantOperandVal(0);
1767   switch (IntNo) {
1768   default: // Don't custom lower most intrinsics.
1769     return SDValue();
1770   case Intrinsic::eh_sjlj_lsda: {
1771     MachineFunction &MF = DAG.getMachineFunction();
1772     MVT VT = Op.getSimpleValueType();
1773     const VETargetMachine *TM =
1774         static_cast<const VETargetMachine *>(&DAG.getTarget());
1775
1776     // Create GCC_except_tableXX string.  The real symbol for that will be
1777     // generated in EHStreamer::emitExceptionTable() later.  So, we just
1778     // borrow it's name here.
1779     TM->getStrList()->push_back(std::string(
1780         (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1781     SDValue Addr =
1782         DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1783     if (isPositionIndependent()) {
1784       Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1785                           VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1786       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1787       return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1788     }
1789     return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1790   }
1791   }
1792 }
1793
1794 static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1795   if (!isa<BuildVectorSDNode>(N))
1796     return false;
1797   const auto *BVN = cast<BuildVectorSDNode>(N);
1798
1799   // Find first non-undef insertion.
1800   unsigned Idx;
1801   for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1802     auto ElemV = BVN->getOperand(Idx);
1803     if (!ElemV->isUndef())
1804       break;
1805   }
1806   // Catch the (hypothetical) all-undef case.
1807   if (Idx == BVN->getNumOperands())
1808     return false;
1809   // Remember insertion.
1810   UniqueIdx = Idx++;
1811   // Verify that all other insertions are undef.
1812   for (; Idx < BVN->getNumOperands(); ++Idx) {
1813     auto ElemV = BVN->getOperand(Idx);
1814     if (!ElemV->isUndef())
1815       return false;
1816   }
1817   return true;
1818 }
1819
1820 static SDValue getSplatValue(SDNode *N) {
1821   if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1822     return BuildVec->getSplatValue();
1823   }
1824   return SDValue();
1825 }
1826
1827 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1828                                             SelectionDAG &DAG) const {
1829   VECustomDAG CDAG(DAG, Op);
1830   MVT ResultVT = Op.getSimpleValueType();
1831
1832   // If there is just one element, expand to INSERT_VECTOR_ELT.
1833   unsigned UniqueIdx;
1834   if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1835     SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1836     auto ElemV = Op->getOperand(UniqueIdx);
1837     SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1838     return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1839   }
1840
1841   // Else emit a broadcast.
1842   if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1843     unsigned NumEls = ResultVT.getVectorNumElements();
1844     auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1845     return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1846   }
1847
1848   // Expand
1849   return SDValue();
1850 }
1851
1852 TargetLowering::LegalizeAction
1853 VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1854   // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1855   // these operations (transform nodes such that their AVL parameter refers to
1856   // packs of 64bit, instead of number of elements.
1857
1858   // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1859   // re-visit them.
1860   if (isPackingSupportOpcode(Op.getOpcode()))
1861     return Legal;
1862
1863   // Custom lower to legalize AVL for packed mode.
1864   if (isVVPOrVEC(Op.getOpcode()))
1865     return Custom;
1866   return Legal;
1867 }
1868
1869 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1870   LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1871   unsigned Opcode = Op.getOpcode();
1872
1873   /// Scalar isel.
1874   switch (Opcode) {
1875   case ISD::ATOMIC_FENCE:
1876     return lowerATOMIC_FENCE(Op, DAG);
1877   case ISD::ATOMIC_SWAP:
1878     return lowerATOMIC_SWAP(Op, DAG);
1879   case ISD::BlockAddress:
1880     return lowerBlockAddress(Op, DAG);
1881   case ISD::ConstantPool:
1882     return lowerConstantPool(Op, DAG);
1883   case ISD::DYNAMIC_STACKALLOC:
1884     return lowerDYNAMIC_STACKALLOC(Op, DAG);
1885   case ISD::EH_SJLJ_LONGJMP:
1886     return lowerEH_SJLJ_LONGJMP(Op, DAG);
1887   case ISD::EH_SJLJ_SETJMP:
1888     return lowerEH_SJLJ_SETJMP(Op, DAG);
1889   case ISD::EH_SJLJ_SETUP_DISPATCH:
1890     return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1891   case ISD::FRAMEADDR:
1892     return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1893   case ISD::GlobalAddress:
1894     return lowerGlobalAddress(Op, DAG);
1895   case ISD::GlobalTLSAddress:
1896     return lowerGlobalTLSAddress(Op, DAG);
1897   case ISD::INTRINSIC_WO_CHAIN:
1898     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1899   case ISD::JumpTable:
1900     return lowerJumpTable(Op, DAG);
1901   case ISD::LOAD:
1902     return lowerLOAD(Op, DAG);
1903   case ISD::RETURNADDR:
1904     return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1905   case ISD::BUILD_VECTOR:
1906     return lowerBUILD_VECTOR(Op, DAG);
1907   case ISD::STORE:
1908     return lowerSTORE(Op, DAG);
1909   case ISD::VASTART:
1910     return lowerVASTART(Op, DAG);
1911   case ISD::VAARG:
1912     return lowerVAARG(Op, DAG);
1913
1914   case ISD::INSERT_VECTOR_ELT:
1915     return lowerINSERT_VECTOR_ELT(Op, DAG);
1916   case ISD::EXTRACT_VECTOR_ELT:
1917     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1918   }
1919
1920   /// Vector isel.
1921   if (ISD::isVPOpcode(Opcode))
1922     return lowerToVVP(Op, DAG);
1923
1924   switch (Opcode) {
1925   default:
1926     llvm_unreachable("Should not custom lower this!");
1927
1928   // Legalize the AVL of this internal node.
1929   case VEISD::VEC_BROADCAST:
1930 #define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1931 #include "VVPNodes.def"
1932     // AVL already legalized.
1933     if (getAnnotatedNodeAVL(Op).second)
1934       return Op;
1935     return legalizeInternalVectorOp(Op, DAG);
1936
1937     // Translate into a VEC_*/VVP_* layer operation.
1938   case ISD::MLOAD:
1939   case ISD::MSTORE:
1940 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1941 #include "VVPNodes.def"
1942     if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1943       return splitMaskArithmetic(Op, DAG);
1944     return lowerToVVP(Op, DAG);
1945   }
1946 }
1947 /// } Custom Lower
1948
1949 void VETargetLowering::ReplaceNodeResults(SDNode *N,
1950                                           SmallVectorImpl<SDValue> &Results,
1951                                           SelectionDAG &DAG) const {
1952   switch (N->getOpcode()) {
1953   case ISD::ATOMIC_SWAP:
1954     // Let LLVM expand atomic swap instruction through LowerOperation.
1955     return;
1956   default:
1957     LLVM_DEBUG(N->dumpr(&DAG));
1958     llvm_unreachable("Do not know how to custom type legalize this operation!");
1959   }
1960 }
1961
1962 /// JumpTable for VE.
1963 ///
1964 ///   VE cannot generate relocatable symbol in jump table.  VE cannot
1965 ///   generate expressions using symbols in both text segment and data
1966 ///   segment like below.
1967 ///             .4byte  .LBB0_2-.LJTI0_0
1968 ///   So, we generate offset from the top of function like below as
1969 ///   a custom label.
1970 ///             .4byte  .LBB0_2-<function name>
1971
1972 unsigned VETargetLowering::getJumpTableEncoding() const {
1973   // Use custom label for PIC.
1974   if (isPositionIndependent())
1975     return MachineJumpTableInfo::EK_Custom32;
1976
1977   // Otherwise, use the normal jump table encoding heuristics.
1978   return TargetLowering::getJumpTableEncoding();
1979 }
1980
1981 const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1982     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1983     unsigned Uid, MCContext &Ctx) const {
1984   assert(isPositionIndependent());
1985
1986   // Generate custom label for PIC like below.
1987   //    .4bytes  .LBB0_2-<function name>
1988   const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1989   MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1990   const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1991   return MCBinaryExpr::createSub(Value, Base, Ctx);
1992 }
1993
1994 SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1995                                                    SelectionDAG &DAG) const {
1996   assert(isPositionIndependent());
1997   SDLoc DL(Table);
1998   Function *Function = &DAG.getMachineFunction().getFunction();
1999   assert(Function != nullptr);
2000   auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
2001
2002   // In the jump table, we have following values in PIC mode.
2003   //    .4bytes  .LBB0_2-<function name>
2004   // We need to add this value and the address of this function to generate
2005   // .LBB0_2 label correctly under PIC mode.  So, we want to generate following
2006   // instructions:
2007   //     lea %reg, fun@gotoff_lo
2008   //     and %reg, %reg, (32)0
2009   //     lea.sl %reg, fun@gotoff_hi(%reg, %got)
2010   // In order to do so, we need to genarate correctly marked DAG node using
2011   // makeHiLoPair.
2012   SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
2013   SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
2014                               VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2015   SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
2016   return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
2017 }
2018
2019 Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2020                                       MachineBasicBlock::iterator I,
2021                                       MachineBasicBlock *TargetBB,
2022                                       const DebugLoc &DL) const {
2023   MachineFunction *MF = MBB.getParent();
2024   MachineRegisterInfo &MRI = MF->getRegInfo();
2025   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2026
2027   const TargetRegisterClass *RC = &VE::I64RegClass;
2028   Register Tmp1 = MRI.createVirtualRegister(RC);
2029   Register Tmp2 = MRI.createVirtualRegister(RC);
2030   Register Result = MRI.createVirtualRegister(RC);
2031
2032   if (isPositionIndependent()) {
2033     // Create following instructions for local linkage PIC code.
2034     //     lea %Tmp1, TargetBB@gotoff_lo
2035     //     and %Tmp2, %Tmp1, (32)0
2036     //     lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2037     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2038         .addImm(0)
2039         .addImm(0)
2040         .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
2041     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2042         .addReg(Tmp1, getKillRegState(true))
2043         .addImm(M0(32));
2044     BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2045         .addReg(VE::SX15)
2046         .addReg(Tmp2, getKillRegState(true))
2047         .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
2048   } else {
2049     // Create following instructions for non-PIC code.
2050     //     lea     %Tmp1, TargetBB@lo
2051     //     and     %Tmp2, %Tmp1, (32)0
2052     //     lea.sl  %Result, TargetBB@hi(%Tmp2)
2053     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2054         .addImm(0)
2055         .addImm(0)
2056         .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
2057     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2058         .addReg(Tmp1, getKillRegState(true))
2059         .addImm(M0(32));
2060     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2061         .addReg(Tmp2, getKillRegState(true))
2062         .addImm(0)
2063         .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
2064   }
2065   return Result;
2066 }
2067
2068 Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2069                                          MachineBasicBlock::iterator I,
2070                                          StringRef Symbol, const DebugLoc &DL,
2071                                          bool IsLocal = false,
2072                                          bool IsCall = false) const {
2073   MachineFunction *MF = MBB.getParent();
2074   MachineRegisterInfo &MRI = MF->getRegInfo();
2075   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2076
2077   const TargetRegisterClass *RC = &VE::I64RegClass;
2078   Register Result = MRI.createVirtualRegister(RC);
2079
2080   if (isPositionIndependent()) {
2081     if (IsCall && !IsLocal) {
2082       // Create following instructions for non-local linkage PIC code function
2083       // calls.  These instructions uses IC and magic number -24, so we expand
2084       // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2085       //     lea %Reg, Symbol@plt_lo(-24)
2086       //     and %Reg, %Reg, (32)0
2087       //     sic %s16
2088       //     lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2089       BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2090           .addExternalSymbol("abort");
2091     } else if (IsLocal) {
2092       Register Tmp1 = MRI.createVirtualRegister(RC);
2093       Register Tmp2 = MRI.createVirtualRegister(RC);
2094       // Create following instructions for local linkage PIC code.
2095       //     lea %Tmp1, Symbol@gotoff_lo
2096       //     and %Tmp2, %Tmp1, (32)0
2097       //     lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2098       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2099           .addImm(0)
2100           .addImm(0)
2101           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
2102       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2103           .addReg(Tmp1, getKillRegState(true))
2104           .addImm(M0(32));
2105       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2106           .addReg(VE::SX15)
2107           .addReg(Tmp2, getKillRegState(true))
2108           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
2109     } else {
2110       Register Tmp1 = MRI.createVirtualRegister(RC);
2111       Register Tmp2 = MRI.createVirtualRegister(RC);
2112       // Create following instructions for not local linkage PIC code.
2113       //     lea %Tmp1, Symbol@got_lo
2114       //     and %Tmp2, %Tmp1, (32)0
2115       //     lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2116       //     ld %Result, 0(%Tmp3)
2117       Register Tmp3 = MRI.createVirtualRegister(RC);
2118       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2119           .addImm(0)
2120           .addImm(0)
2121           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
2122       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2123           .addReg(Tmp1, getKillRegState(true))
2124           .addImm(M0(32));
2125       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2126           .addReg(VE::SX15)
2127           .addReg(Tmp2, getKillRegState(true))
2128           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
2129       BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2130           .addReg(Tmp3, getKillRegState(true))
2131           .addImm(0)
2132           .addImm(0);
2133     }
2134   } else {
2135     Register Tmp1 = MRI.createVirtualRegister(RC);
2136     Register Tmp2 = MRI.createVirtualRegister(RC);
2137     // Create following instructions for non-PIC code.
2138     //     lea     %Tmp1, Symbol@lo
2139     //     and     %Tmp2, %Tmp1, (32)0
2140     //     lea.sl  %Result, Symbol@hi(%Tmp2)
2141     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2142         .addImm(0)
2143         .addImm(0)
2144         .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
2145     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2146         .addReg(Tmp1, getKillRegState(true))
2147         .addImm(M0(32));
2148     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2149         .addReg(Tmp2, getKillRegState(true))
2150         .addImm(0)
2151         .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2152   }
2153   return Result;
2154 }
2155
2156 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2157                                               MachineBasicBlock *MBB,
2158                                               MachineBasicBlock *DispatchBB,
2159                                               int FI, int Offset) const {
2160   DebugLoc DL = MI.getDebugLoc();
2161   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2162
2163   Register LabelReg =
2164       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
2165
2166   // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2167   // referenced by longjmp (throw) later.
2168   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2169   addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2170   MIB.addReg(LabelReg, getKillRegState(true));
2171 }
2172
2173 MachineBasicBlock *
2174 VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2175                                    MachineBasicBlock *MBB) const {
2176   DebugLoc DL = MI.getDebugLoc();
2177   MachineFunction *MF = MBB->getParent();
2178   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2179   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2180   MachineRegisterInfo &MRI = MF->getRegInfo();
2181
2182   const BasicBlock *BB = MBB->getBasicBlock();
2183   MachineFunction::iterator I = ++MBB->getIterator();
2184
2185   // Memory Reference.
2186   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2187   Register BufReg = MI.getOperand(1).getReg();
2188
2189   Register DstReg;
2190
2191   DstReg = MI.getOperand(0).getReg();
2192   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2193   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2194   (void)TRI;
2195   Register MainDestReg = MRI.createVirtualRegister(RC);
2196   Register RestoreDestReg = MRI.createVirtualRegister(RC);
2197
2198   // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2199   // instructions.  SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2200   //
2201   // ThisMBB:
2202   //   buf[3] = %s17 iff %s17 is used as BP
2203   //   buf[1] = RestoreMBB as IC after longjmp
2204   //   # SjLjSetup RestoreMBB
2205   //
2206   // MainMBB:
2207   //   v_main = 0
2208   //
2209   // SinkMBB:
2210   //   v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2211   //   ...
2212   //
2213   // RestoreMBB:
2214   //   %s17 = buf[3] = iff %s17 is used as BP
2215   //   v_restore = 1
2216   //   goto SinkMBB
2217
2218   MachineBasicBlock *ThisMBB = MBB;
2219   MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2220   MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2221   MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2222   MF->insert(I, MainMBB);
2223   MF->insert(I, SinkMBB);
2224   MF->push_back(RestoreMBB);
2225   RestoreMBB->setMachineBlockAddressTaken();
2226
2227   // Transfer the remainder of BB and its successor edges to SinkMBB.
2228   SinkMBB->splice(SinkMBB->begin(), MBB,
2229                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2230   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2231
2232   // ThisMBB:
2233   Register LabelReg =
2234       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2235
2236   // Store BP in buf[3] iff this function is using BP.
2237   const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2238   if (TFI->hasBP(*MF)) {
2239     MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2240     MIB.addReg(BufReg);
2241     MIB.addImm(0);
2242     MIB.addImm(24);
2243     MIB.addReg(VE::SX17);
2244     MIB.setMemRefs(MMOs);
2245   }
2246
2247   // Store IP in buf[1].
2248   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2249   MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2250   MIB.addImm(0);
2251   MIB.addImm(8);
2252   MIB.addReg(LabelReg, getKillRegState(true));
2253   MIB.setMemRefs(MMOs);
2254
2255   // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2256
2257   // Insert setup.
2258   MIB =
2259       BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2260
2261   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2262   MIB.addRegMask(RegInfo->getNoPreservedMask());
2263   ThisMBB->addSuccessor(MainMBB);
2264   ThisMBB->addSuccessor(RestoreMBB);
2265
2266   // MainMBB:
2267   BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2268       .addImm(0)
2269       .addImm(0)
2270       .addImm(0);
2271   MainMBB->addSuccessor(SinkMBB);
2272
2273   // SinkMBB:
2274   BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2275       .addReg(MainDestReg)
2276       .addMBB(MainMBB)
2277       .addReg(RestoreDestReg)
2278       .addMBB(RestoreMBB);
2279
2280   // RestoreMBB:
2281   // Restore BP from buf[3] iff this function is using BP.  The address of
2282   // buf is in SX10.
2283   // FIXME: Better to not use SX10 here
2284   if (TFI->hasBP(*MF)) {
2285     MachineInstrBuilder MIB =
2286         BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2287     MIB.addReg(VE::SX10);
2288     MIB.addImm(0);
2289     MIB.addImm(24);
2290     MIB.setMemRefs(MMOs);
2291   }
2292   BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2293       .addImm(0)
2294       .addImm(0)
2295       .addImm(1);
2296   BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2297   RestoreMBB->addSuccessor(SinkMBB);
2298
2299   MI.eraseFromParent();
2300   return SinkMBB;
2301 }
2302
2303 MachineBasicBlock *
2304 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2305                                     MachineBasicBlock *MBB) const {
2306   DebugLoc DL = MI.getDebugLoc();
2307   MachineFunction *MF = MBB->getParent();
2308   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2309   MachineRegisterInfo &MRI = MF->getRegInfo();
2310
2311   // Memory Reference.
2312   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands());
2313   Register BufReg = MI.getOperand(0).getReg();
2314
2315   Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2316   // Since FP is only updated here but NOT referenced, it's treated as GPR.
2317   Register FP = VE::SX9;
2318   Register SP = VE::SX11;
2319
2320   MachineInstrBuilder MIB;
2321
2322   MachineBasicBlock *ThisMBB = MBB;
2323
2324   // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2325   //
2326   // ThisMBB:
2327   //   %fp = load buf[0]
2328   //   %jmp = load buf[1]
2329   //   %s10 = buf        ; Store an address of buf to SX10 for RestoreMBB
2330   //   %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2331   //   jmp %jmp
2332
2333   // Reload FP.
2334   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2335   MIB.addReg(BufReg);
2336   MIB.addImm(0);
2337   MIB.addImm(0);
2338   MIB.setMemRefs(MMOs);
2339
2340   // Reload IP.
2341   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2342   MIB.addReg(BufReg);
2343   MIB.addImm(0);
2344   MIB.addImm(8);
2345   MIB.setMemRefs(MMOs);
2346
2347   // Copy BufReg to SX10 for later use in setjmp.
2348   // FIXME: Better to not use SX10 here
2349   BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2350       .addReg(BufReg)
2351       .addImm(0);
2352
2353   // Reload SP.
2354   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2355   MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2356   MIB.addImm(0);
2357   MIB.addImm(16);
2358   MIB.setMemRefs(MMOs);
2359
2360   // Jump.
2361   BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2362       .addReg(Tmp, getKillRegState(true))
2363       .addImm(0);
2364
2365   MI.eraseFromParent();
2366   return ThisMBB;
2367 }
2368
2369 MachineBasicBlock *
2370 VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2371                                         MachineBasicBlock *BB) const {
2372   DebugLoc DL = MI.getDebugLoc();
2373   MachineFunction *MF = BB->getParent();
2374   MachineFrameInfo &MFI = MF->getFrameInfo();
2375   MachineRegisterInfo &MRI = MF->getRegInfo();
2376   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2377   int FI = MFI.getFunctionContextIndex();
2378
2379   // Get a mapping of the call site numbers to all of the landing pads they're
2380   // associated with.
2381   DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2382   unsigned MaxCSNum = 0;
2383   for (auto &MBB : *MF) {
2384     if (!MBB.isEHPad())
2385       continue;
2386
2387     MCSymbol *Sym = nullptr;
2388     for (const auto &MI : MBB) {
2389       if (MI.isDebugInstr())
2390         continue;
2391
2392       assert(MI.isEHLabel() && "expected EH_LABEL");
2393       Sym = MI.getOperand(0).getMCSymbol();
2394       break;
2395     }
2396
2397     if (!MF->hasCallSiteLandingPad(Sym))
2398       continue;
2399
2400     for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2401       CallSiteNumToLPad[CSI].push_back(&MBB);
2402       MaxCSNum = std::max(MaxCSNum, CSI);
2403     }
2404   }
2405
2406   // Get an ordered list of the machine basic blocks for the jump table.
2407   std::vector<MachineBasicBlock *> LPadList;
2408   SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2409   LPadList.reserve(CallSiteNumToLPad.size());
2410
2411   for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2412     for (auto &LP : CallSiteNumToLPad[CSI]) {
2413       LPadList.push_back(LP);
2414       InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2415     }
2416   }
2417
2418   assert(!LPadList.empty() &&
2419          "No landing pad destinations for the dispatch jump table!");
2420
2421   // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2422   //   %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2423   //
2424   // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2425   // First `i64` is callsite, so callsite is FI+8.
2426   static const int OffsetIC = 72;
2427   static const int OffsetCS = 8;
2428
2429   // Create the MBBs for the dispatch code like following:
2430   //
2431   // ThisMBB:
2432   //   Prepare DispatchBB address and store it to buf[1].
2433   //   ...
2434   //
2435   // DispatchBB:
2436   //   %s15 = GETGOT iff isPositionIndependent
2437   //   %callsite = load callsite
2438   //   brgt.l.t #size of callsites, %callsite, DispContBB
2439   //
2440   // TrapBB:
2441   //   Call abort.
2442   //
2443   // DispContBB:
2444   //   %breg = address of jump table
2445   //   %pc = load and calculate next pc from %breg and %callsite
2446   //   jmp %pc
2447
2448   // Shove the dispatch's address into the return slot in the function context.
2449   MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2450   DispatchBB->setIsEHPad(true);
2451
2452   // Trap BB will causes trap like `assert(0)`.
2453   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2454   DispatchBB->addSuccessor(TrapBB);
2455
2456   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2457   DispatchBB->addSuccessor(DispContBB);
2458
2459   // Insert MBBs.
2460   MF->push_back(DispatchBB);
2461   MF->push_back(DispContBB);
2462   MF->push_back(TrapBB);
2463
2464   // Insert code to call abort in the TrapBB.
2465   Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2466                                  /* Local */ false, /* Call */ true);
2467   BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2468       .addReg(Abort, getKillRegState(true))
2469       .addImm(0)
2470       .addImm(0);
2471
2472   // Insert code into the entry block that creates and registers the function
2473   // context.
2474   setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2475
2476   // Create the jump table and associated information
2477   unsigned JTE = getJumpTableEncoding();
2478   MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2479   unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2480
2481   const VERegisterInfo &RI = TII->getRegisterInfo();
2482   // Add a register mask with no preserved registers.  This results in all
2483   // registers being marked as clobbered.
2484   BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2485       .addRegMask(RI.getNoPreservedMask());
2486
2487   if (isPositionIndependent()) {
2488     // Force to generate GETGOT, since current implementation doesn't store GOT
2489     // register.
2490     BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2491   }
2492
2493   // IReg is used as an index in a memory operand and therefore can't be SP
2494   const TargetRegisterClass *RC = &VE::I64RegClass;
2495   Register IReg = MRI.createVirtualRegister(RC);
2496   addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2497                     OffsetCS);
2498   if (LPadList.size() < 64) {
2499     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2500         .addImm(VECC::CC_ILE)
2501         .addImm(LPadList.size())
2502         .addReg(IReg)
2503         .addMBB(TrapBB);
2504   } else {
2505     assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2506     Register TmpReg = MRI.createVirtualRegister(RC);
2507     BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2508         .addImm(0)
2509         .addImm(0)
2510         .addImm(LPadList.size());
2511     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2512         .addImm(VECC::CC_ILE)
2513         .addReg(TmpReg, getKillRegState(true))
2514         .addReg(IReg)
2515         .addMBB(TrapBB);
2516   }
2517
2518   Register BReg = MRI.createVirtualRegister(RC);
2519   Register Tmp1 = MRI.createVirtualRegister(RC);
2520   Register Tmp2 = MRI.createVirtualRegister(RC);
2521
2522   if (isPositionIndependent()) {
2523     // Create following instructions for local linkage PIC code.
2524     //     lea    %Tmp1, .LJTI0_0@gotoff_lo
2525     //     and    %Tmp2, %Tmp1, (32)0
2526     //     lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2527     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2528         .addImm(0)
2529         .addImm(0)
2530         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2531     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2532         .addReg(Tmp1, getKillRegState(true))
2533         .addImm(M0(32));
2534     BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2535         .addReg(VE::SX15)
2536         .addReg(Tmp2, getKillRegState(true))
2537         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2538   } else {
2539     // Create following instructions for non-PIC code.
2540     //     lea     %Tmp1, .LJTI0_0@lo
2541     //     and     %Tmp2, %Tmp1, (32)0
2542     //     lea.sl  %BReg, .LJTI0_0@hi(%Tmp2)
2543     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2544         .addImm(0)
2545         .addImm(0)
2546         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2547     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2548         .addReg(Tmp1, getKillRegState(true))
2549         .addImm(M0(32));
2550     BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2551         .addReg(Tmp2, getKillRegState(true))
2552         .addImm(0)
2553         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2554   }
2555
2556   switch (JTE) {
2557   case MachineJumpTableInfo::EK_BlockAddress: {
2558     // Generate simple block address code for no-PIC model.
2559     //     sll %Tmp1, %IReg, 3
2560     //     lds %TReg, 0(%Tmp1, %BReg)
2561     //     bcfla %TReg
2562
2563     Register TReg = MRI.createVirtualRegister(RC);
2564     Register Tmp1 = MRI.createVirtualRegister(RC);
2565
2566     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2567         .addReg(IReg, getKillRegState(true))
2568         .addImm(3);
2569     BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2570         .addReg(BReg, getKillRegState(true))
2571         .addReg(Tmp1, getKillRegState(true))
2572         .addImm(0);
2573     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2574         .addReg(TReg, getKillRegState(true))
2575         .addImm(0);
2576     break;
2577   }
2578   case MachineJumpTableInfo::EK_Custom32: {
2579     // Generate block address code using differences from the function pointer
2580     // for PIC model.
2581     //     sll %Tmp1, %IReg, 2
2582     //     ldl.zx %OReg, 0(%Tmp1, %BReg)
2583     //     Prepare function address in BReg2.
2584     //     adds.l %TReg, %BReg2, %OReg
2585     //     bcfla %TReg
2586
2587     assert(isPositionIndependent());
2588     Register OReg = MRI.createVirtualRegister(RC);
2589     Register TReg = MRI.createVirtualRegister(RC);
2590     Register Tmp1 = MRI.createVirtualRegister(RC);
2591
2592     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2593         .addReg(IReg, getKillRegState(true))
2594         .addImm(2);
2595     BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2596         .addReg(BReg, getKillRegState(true))
2597         .addReg(Tmp1, getKillRegState(true))
2598         .addImm(0);
2599     Register BReg2 =
2600         prepareSymbol(*DispContBB, DispContBB->end(),
2601                       DispContBB->getParent()->getName(), DL, /* Local */ true);
2602     BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2603         .addReg(OReg, getKillRegState(true))
2604         .addReg(BReg2, getKillRegState(true));
2605     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2606         .addReg(TReg, getKillRegState(true))
2607         .addImm(0);
2608     break;
2609   }
2610   default:
2611     llvm_unreachable("Unexpected jump table encoding");
2612   }
2613
2614   // Add the jump table entries as successors to the MBB.
2615   SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2616   for (auto &LP : LPadList)
2617     if (SeenMBBs.insert(LP).second)
2618       DispContBB->addSuccessor(LP);
2619
2620   // N.B. the order the invoke BBs are processed in doesn't matter here.
2621   SmallVector<MachineBasicBlock *, 64> MBBLPads;
2622   const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2623   for (MachineBasicBlock *MBB : InvokeBBs) {
2624     // Remove the landing pad successor from the invoke block and replace it
2625     // with the new dispatch block.
2626     // Keep a copy of Successors since it's modified inside the loop.
2627     SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2628                                                    MBB->succ_rend());
2629     // FIXME: Avoid quadratic complexity.
2630     for (auto *MBBS : Successors) {
2631       if (MBBS->isEHPad()) {
2632         MBB->removeSuccessor(MBBS);
2633         MBBLPads.push_back(MBBS);
2634       }
2635     }
2636
2637     MBB->addSuccessor(DispatchBB);
2638
2639     // Find the invoke call and mark all of the callee-saved registers as
2640     // 'implicit defined' so that they're spilled.  This prevents code from
2641     // moving instructions to before the EH block, where they will never be
2642     // executed.
2643     for (auto &II : reverse(*MBB)) {
2644       if (!II.isCall())
2645         continue;
2646
2647       DenseMap<Register, bool> DefRegs;
2648       for (auto &MOp : II.operands())
2649         if (MOp.isReg())
2650           DefRegs[MOp.getReg()] = true;
2651
2652       MachineInstrBuilder MIB(*MF, &II);
2653       for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2654         Register Reg = SavedRegs[RI];
2655         if (!DefRegs[Reg])
2656           MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2657       }
2658
2659       break;
2660     }
2661   }
2662
2663   // Mark all former landing pads as non-landing pads.  The dispatch is the only
2664   // landing pad now.
2665   for (auto &LP : MBBLPads)
2666     LP->setIsEHPad(false);
2667
2668   // The instruction is gone now.
2669   MI.eraseFromParent();
2670   return BB;
2671 }
2672
2673 MachineBasicBlock *
2674 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2675                                               MachineBasicBlock *BB) const {
2676   switch (MI.getOpcode()) {
2677   default:
2678     llvm_unreachable("Unknown Custom Instruction!");
2679   case VE::EH_SjLj_LongJmp:
2680     return emitEHSjLjLongJmp(MI, BB);
2681   case VE::EH_SjLj_SetJmp:
2682     return emitEHSjLjSetJmp(MI, BB);
2683   case VE::EH_SjLj_Setup_Dispatch:
2684     return emitSjLjDispatchBlock(MI, BB);
2685   }
2686 }
2687
2688 static bool isSimm7(SDValue V) {
2689   EVT VT = V.getValueType();
2690   if (VT.isVector())
2691     return false;
2692
2693   if (VT.isInteger()) {
2694     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2695       return isInt<7>(C->getSExtValue());
2696   } else if (VT.isFloatingPoint()) {
2697     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2698       if (VT == MVT::f32 || VT == MVT::f64) {
2699         const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2700         uint64_t Val = Imm.getSExtValue();
2701         if (Imm.getBitWidth() == 32)
2702           Val <<= 32; // Immediate value of float place at higher bits on VE.
2703         return isInt<7>(Val);
2704       }
2705     }
2706   }
2707   return false;
2708 }
2709
2710 static bool isMImm(SDValue V) {
2711   EVT VT = V.getValueType();
2712   if (VT.isVector())
2713     return false;
2714
2715   if (VT.isInteger()) {
2716     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2717       return isMImmVal(getImmVal(C));
2718   } else if (VT.isFloatingPoint()) {
2719     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2720       if (VT == MVT::f32) {
2721         // Float value places at higher bits, so ignore lower 32 bits.
2722         return isMImm32Val(getFpImmVal(C) >> 32);
2723       } else if (VT == MVT::f64) {
2724         return isMImmVal(getFpImmVal(C));
2725       }
2726     }
2727   }
2728   return false;
2729 }
2730
2731 static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2732   if (SrcVT.isFloatingPoint()) {
2733     if (SrcVT == MVT::f128)
2734       return VEISD::CMPQ;
2735     return VEISD::CMPF;
2736   }
2737   return isSignedIntSetCC(CC) ? VEISD::CMPI : VEISD::CMPU;
2738 }
2739
2740 static EVT decideCompType(EVT SrcVT) {
2741   if (SrcVT == MVT::f128)
2742     return MVT::f64;
2743   return SrcVT;
2744 }
2745
2746 static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2747                                     bool WithCMov) {
2748   if (SrcVT.isFloatingPoint()) {
2749     // For the case of floating point setcc, only unordered comparison
2750     // or general comparison with -enable-no-nans-fp-math option reach
2751     // here, so it is safe even if values are NaN.  Only f128 doesn't
2752     // safe since VE uses f64 result of f128 comparison.
2753     return SrcVT != MVT::f128;
2754   }
2755   if (isIntEqualitySetCC(CC)) {
2756     // For the case of equal or not equal, it is safe without comparison with 0.
2757     return true;
2758   }
2759   if (WithCMov) {
2760     // For the case of integer setcc with cmov, all signed comparison with 0
2761     // are safe.
2762     return isSignedIntSetCC(CC);
2763   }
2764   // For the case of integer setcc, only signed 64 bits comparison is safe.
2765   // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2766   // less than 0 witout CMPU.  For 32 bits, other half of 32 bits are
2767   // uncoditional, so it is not safe too without CMPI..
2768   return isSignedIntSetCC(CC) && SrcVT == MVT::i64;
2769 }
2770
2771 static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2772                                   ISD::CondCode CC, bool WithCMov,
2773                                   const SDLoc &DL, SelectionDAG &DAG) {
2774   // Compare values.  If RHS is 0 and it is safe to calculate without
2775   // comparison, we don't generate an instruction for comparison.
2776   EVT CompVT = decideCompType(VT);
2777   if (CompVT == VT && safeWithoutCompWithNull(VT, CC, WithCMov) &&
2778       (isNullConstant(RHS) || isNullFPConstant(RHS))) {
2779     return LHS;
2780   }
2781   return DAG.getNode(decideComp(VT, CC), DL, CompVT, LHS, RHS);
2782 }
2783
2784 SDValue VETargetLowering::combineSelect(SDNode *N,
2785                                         DAGCombinerInfo &DCI) const {
2786   assert(N->getOpcode() == ISD::SELECT &&
2787          "Should be called with a SELECT node");
2788   ISD::CondCode CC = ISD::CondCode::SETNE;
2789   SDValue Cond = N->getOperand(0);
2790   SDValue True = N->getOperand(1);
2791   SDValue False = N->getOperand(2);
2792
2793   // We handle only scalar SELECT.
2794   EVT VT = N->getValueType(0);
2795   if (VT.isVector())
2796     return SDValue();
2797
2798   // Peform combineSelect after leagalize DAG.
2799   if (!DCI.isAfterLegalizeDAG())
2800     return SDValue();
2801
2802   EVT VT0 = Cond.getValueType();
2803   if (isMImm(True)) {
2804     // VE's condition move can handle MImm in True clause, so nothing to do.
2805   } else if (isMImm(False)) {
2806     // VE's condition move can handle MImm in True clause, so swap True and
2807     // False clauses if False has MImm value.  And, update condition code.
2808     std::swap(True, False);
2809     CC = getSetCCInverse(CC, VT0);
2810   }
2811
2812   SDLoc DL(N);
2813   SelectionDAG &DAG = DCI.DAG;
2814   VECC::CondCode VECCVal;
2815   if (VT0.isFloatingPoint()) {
2816     VECCVal = fpCondCode2Fcc(CC);
2817   } else {
2818     VECCVal = intCondCode2Icc(CC);
2819   }
2820   SDValue Ops[] = {Cond, True, False,
2821                    DAG.getConstant(VECCVal, DL, MVT::i32)};
2822   return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2823 }
2824
2825 SDValue VETargetLowering::combineSelectCC(SDNode *N,
2826                                           DAGCombinerInfo &DCI) const {
2827   assert(N->getOpcode() == ISD::SELECT_CC &&
2828          "Should be called with a SELECT_CC node");
2829   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2830   SDValue LHS = N->getOperand(0);
2831   SDValue RHS = N->getOperand(1);
2832   SDValue True = N->getOperand(2);
2833   SDValue False = N->getOperand(3);
2834
2835   // We handle only scalar SELECT_CC.
2836   EVT VT = N->getValueType(0);
2837   if (VT.isVector())
2838     return SDValue();
2839
2840   // Peform combineSelectCC after leagalize DAG.
2841   if (!DCI.isAfterLegalizeDAG())
2842     return SDValue();
2843
2844   // We handle only i32/i64/f32/f64/f128 comparisons.
2845   EVT LHSVT = LHS.getValueType();
2846   assert(LHSVT == RHS.getValueType());
2847   switch (LHSVT.getSimpleVT().SimpleTy) {
2848   case MVT::i32:
2849   case MVT::i64:
2850   case MVT::f32:
2851   case MVT::f64:
2852   case MVT::f128:
2853     break;
2854   default:
2855     // Return SDValue to let llvm handle other types.
2856     return SDValue();
2857   }
2858
2859   if (isMImm(RHS)) {
2860     // VE's comparison can handle MImm in RHS, so nothing to do.
2861   } else if (isSimm7(RHS)) {
2862     // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2863     // update condition code.
2864     std::swap(LHS, RHS);
2865     CC = getSetCCSwappedOperands(CC);
2866   }
2867   if (isMImm(True)) {
2868     // VE's condition move can handle MImm in True clause, so nothing to do.
2869   } else if (isMImm(False)) {
2870     // VE's condition move can handle MImm in True clause, so swap True and
2871     // False clauses if False has MImm value.  And, update condition code.
2872     std::swap(True, False);
2873     CC = getSetCCInverse(CC, LHSVT);
2874   }
2875
2876   SDLoc DL(N);
2877   SelectionDAG &DAG = DCI.DAG;
2878
2879   bool WithCMov = true;
2880   SDValue CompNode = generateComparison(LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2881
2882   VECC::CondCode VECCVal;
2883   if (LHSVT.isFloatingPoint()) {
2884     VECCVal = fpCondCode2Fcc(CC);
2885   } else {
2886     VECCVal = intCondCode2Icc(CC);
2887   }
2888   SDValue Ops[] = {CompNode, True, False,
2889                    DAG.getConstant(VECCVal, DL, MVT::i32)};
2890   return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2891 }
2892
2893 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
2894 static bool isI32Insn(const SDNode *User, const SDNode *N) {
2895   switch (User->getOpcode()) {
2896   default:
2897     return false;
2898   case ISD::ADD:
2899   case ISD::SUB:
2900   case ISD::MUL:
2901   case ISD::SDIV:
2902   case ISD::UDIV:
2903   case ISD::SETCC:
2904   case ISD::SMIN:
2905   case ISD::SMAX:
2906   case ISD::SHL:
2907   case ISD::SRA:
2908   case ISD::BSWAP:
2909   case ISD::SINT_TO_FP:
2910   case ISD::UINT_TO_FP:
2911   case ISD::BR_CC:
2912   case ISD::BITCAST:
2913   case ISD::ATOMIC_CMP_SWAP:
2914   case ISD::ATOMIC_SWAP:
2915   case VEISD::CMPU:
2916   case VEISD::CMPI:
2917     return true;
2918   case ISD::SRL:
2919     if (N->getOperand(0).getOpcode() != ISD::SRL)
2920       return true;
2921     // (srl (trunc (srl ...))) may be optimized by combining srl, so
2922     // doesn't optimize trunc now.
2923     return false;
2924   case ISD::SELECT_CC:
2925     if (User->getOperand(2).getNode() != N &&
2926         User->getOperand(3).getNode() != N)
2927       return true;
2928     return isI32InsnAllUses(User, N);
2929   case VEISD::CMOV:
2930     // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2931     // However, trunc in true or false clauses is not safe.
2932     if (User->getOperand(1).getNode() != N &&
2933         User->getOperand(2).getNode() != N &&
2934         isa<ConstantSDNode>(User->getOperand(3))) {
2935       VECC::CondCode VECCVal =
2936           static_cast<VECC::CondCode>(User->getConstantOperandVal(3));
2937       return isIntVECondCode(VECCVal);
2938     }
2939     [[fallthrough]];
2940   case ISD::AND:
2941   case ISD::OR:
2942   case ISD::XOR:
2943   case ISD::SELECT:
2944   case ISD::CopyToReg:
2945     // Check all use of selections, bit operations, and copies.  If all of them
2946     // are safe, optimize truncate to extract_subreg.
2947     return isI32InsnAllUses(User, N);
2948   }
2949 }
2950
2951 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2952   // Check all use of User node.  If all of them are safe, optimize
2953   // truncate to extract_subreg.
2954   for (const SDNode *U : User->users()) {
2955     switch (U->getOpcode()) {
2956     default:
2957       // If the use is an instruction which treats the source operand as i32,
2958       // it is safe to avoid truncate here.
2959       if (isI32Insn(U, N))
2960         continue;
2961       break;
2962     case ISD::ANY_EXTEND:
2963     case ISD::SIGN_EXTEND:
2964     case ISD::ZERO_EXTEND: {
2965       // Special optimizations to the combination of ext and trunc.
2966       // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2967       // since this truncate instruction clears higher 32 bits which is filled
2968       // by one of ext instructions later.
2969       assert(N->getValueType(0) == MVT::i32 &&
2970              "find truncate to not i32 integer");
2971       if (User->getOpcode() == ISD::SELECT_CC ||
2972           User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2973         continue;
2974       break;
2975     }
2976     }
2977     return false;
2978   }
2979   return true;
2980 }
2981
2982 // Optimize TRUNCATE in DAG combining.  Optimizing it in CUSTOM lower is
2983 // sometime too early.  Optimizing it in DAG pattern matching in VEInstrInfo.td
2984 // is sometime too late.  So, doing it at here.
2985 SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2986                                           DAGCombinerInfo &DCI) const {
2987   assert(N->getOpcode() == ISD::TRUNCATE &&
2988          "Should be called with a TRUNCATE node");
2989
2990   SelectionDAG &DAG = DCI.DAG;
2991   SDLoc DL(N);
2992   EVT VT = N->getValueType(0);
2993
2994   // We prefer to do this when all types are legal.
2995   if (!DCI.isAfterLegalizeDAG())
2996     return SDValue();
2997
2998   // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2999   if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
3000       isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
3001       isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
3002     return SDValue();
3003
3004   // Check all use of this TRUNCATE.
3005   for (const SDNode *User : N->users()) {
3006     // Make sure that we're not going to replace TRUNCATE for non i32
3007     // instructions.
3008     //
3009     // FIXME: Although we could sometimes handle this, and it does occur in
3010     // practice that one of the condition inputs to the select is also one of
3011     // the outputs, we currently can't deal with this.
3012     if (isI32Insn(User, N))
3013       continue;
3014
3015     return SDValue();
3016   }
3017
3018   SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3019   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
3020                                     N->getOperand(0), SubI32),
3021                  0);
3022 }
3023
3024 SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3025                                             DAGCombinerInfo &DCI) const {
3026   switch (N->getOpcode()) {
3027   default:
3028     break;
3029   case ISD::SELECT:
3030     return combineSelect(N, DCI);
3031   case ISD::SELECT_CC:
3032     return combineSelectCC(N, DCI);
3033   case ISD::TRUNCATE:
3034     return combineTRUNCATE(N, DCI);
3035   }
3036
3037   return SDValue();
3038 }
3039
3040 //===----------------------------------------------------------------------===//
3041 // VE Inline Assembly Support
3042 //===----------------------------------------------------------------------===//
3043
3044 VETargetLowering::ConstraintType
3045 VETargetLowering::getConstraintType(StringRef Constraint) const {
3046   if (Constraint.size() == 1) {
3047     switch (Constraint[0]) {
3048     default:
3049       break;
3050     case 'v': // vector registers
3051       return C_RegisterClass;
3052     }
3053   }
3054   return TargetLowering::getConstraintType(Constraint);
3055 }
3056
3057 std::pair<unsigned, const TargetRegisterClass *>
3058 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3059                                                StringRef Constraint,
3060                                                MVT VT) const {
3061   const TargetRegisterClass *RC = nullptr;
3062   if (Constraint.size() == 1) {
3063     switch (Constraint[0]) {
3064     default:
3065       return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3066     case 'r':
3067       RC = &VE::I64RegClass;
3068       break;
3069     case 'v':
3070       RC = &VE::V64RegClass;
3071       break;
3072     }
3073     return std::make_pair(0U, RC);
3074   }
3075
3076   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3077 }
3078
3079 //===----------------------------------------------------------------------===//
3080 // VE Target Optimization Support
3081 //===----------------------------------------------------------------------===//
3082
3083 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3084   // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3085   if (isJumpTableRelative())
3086     return 8;
3087
3088   return TargetLowering::getMinimumJumpTableEntries();
3089 }
3090
3091 bool VETargetLowering::hasAndNot(SDValue Y) const {
3092   EVT VT = Y.getValueType();
3093
3094   // VE doesn't have vector and not instruction.
3095   if (VT.isVector())
3096     return false;
3097
3098   // VE allows different immediate values for X and Y where ~X & Y.
3099   // Only simm7 works for X, and only mimm works for Y on VE.  However, this
3100   // function is used to check whether an immediate value is OK for and-not
3101   // instruction as both X and Y.  Generating additional instruction to
3102   // retrieve an immediate value is no good since the purpose of this
3103   // function is to convert a series of 3 instructions to another series of
3104   // 3 instructions with better parallelism.  Therefore, we return false
3105   // for all immediate values now.
3106   // FIXME: Change hasAndNot function to have two operands to make it work
3107   //        correctly with Aurora VE.
3108   if (isa<ConstantSDNode>(Y))
3109     return false;
3110
3111   // It's ok for generic registers.
3112   return true;
3113 }
3114
3115 SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3116                                                   SelectionDAG &DAG) const {
3117   assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3118   MVT VT = Op.getOperand(0).getSimpleValueType();
3119
3120   // Special treatment for packed V64 types.
3121   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3122   (void)VT;
3123   // Example of codes:
3124   //   %packed_v = extractelt %vr, %idx / 2
3125   //   %v = %packed_v >> (%idx % 2 * 32)
3126   //   %res = %v & 0xffffffff
3127
3128   SDValue Vec = Op.getOperand(0);
3129   SDValue Idx = Op.getOperand(1);
3130   SDLoc DL(Op);
3131   SDValue Result = Op;
3132   if (false /* Idx->isConstant() */) {
3133     // TODO: optimized implementation using constant values
3134   } else {
3135     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3136     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3137     SDValue PackedElt =
3138         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3139     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3140     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3141     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3142     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3143     PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
3144     SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
3145     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3146     SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3147     Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
3148                                         MVT::i32, PackedElt, SubI32),
3149                      0);
3150
3151     if (Op.getSimpleValueType() == MVT::f32) {
3152       Result = DAG.getBitcast(MVT::f32, Result);
3153     } else {
3154       assert(Op.getSimpleValueType() == MVT::i32);
3155     }
3156   }
3157   return Result;
3158 }
3159
3160 SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3161                                                  SelectionDAG &DAG) const {
3162   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3163   MVT VT = Op.getOperand(0).getSimpleValueType();
3164
3165   // Special treatment for packed V64 types.
3166   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3167   (void)VT;
3168   // The v512i32 and v512f32 starts from upper bits (0..31).  This "upper
3169   // bits" required `val << 32` from C implementation's point of view.
3170   //
3171   // Example of codes:
3172   //   %packed_elt = extractelt %vr, (%idx >> 1)
3173   //   %shift = ((%idx & 1) ^ 1) << 5
3174   //   %packed_elt &= 0xffffffff00000000 >> shift
3175   //   %packed_elt |= (zext %val) << shift
3176   //   %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3177
3178   SDLoc DL(Op);
3179   SDValue Vec = Op.getOperand(0);
3180   SDValue Val = Op.getOperand(1);
3181   SDValue Idx = Op.getOperand(2);
3182   if (Idx.getSimpleValueType() == MVT::i32)
3183     Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
3184   if (Val.getSimpleValueType() == MVT::f32)
3185     Val = DAG.getBitcast(MVT::i32, Val);
3186   assert(Val.getSimpleValueType() == MVT::i32);
3187   Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3188
3189   SDValue Result = Op;
3190   if (false /* Idx->isConstant()*/) {
3191     // TODO: optimized implementation using constant values
3192   } else {
3193     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3194     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3195     SDValue PackedElt =
3196         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3197     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3198     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3199     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3200     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3201     SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
3202     Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
3203     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3204     Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
3205     PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
3206     Result =
3207         SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
3208                                    {HalfIdx, PackedElt, Vec}),
3209                 0);
3210   }
3211   return Result;
3212 }