llvm/lib/Target/VE/VEISelLowering.cpp

   1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the interfaces that VE uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "VEISelLowering.h"
  15 #include "MCTargetDesc/VEMCExpr.h"
  16 #include "VECustomDAG.h"
  17 #include "VEInstrBuilder.h"
  18 #include "VEMachineFunctionInfo.h"
  19 #include "VERegisterInfo.h"
  20 #include "VETargetMachine.h"
  21 #include "llvm/ADT/StringSwitch.h"
  22 #include "llvm/CodeGen/CallingConvLower.h"
  23 #include "llvm/CodeGen/MachineFrameInfo.h"
  24 #include "llvm/CodeGen/MachineFunction.h"
  25 #include "llvm/CodeGen/MachineInstrBuilder.h"
  26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  27 #include "llvm/CodeGen/MachineModuleInfo.h"
  28 #include "llvm/CodeGen/MachineRegisterInfo.h"
  29 #include "llvm/CodeGen/SelectionDAG.h"
  30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  31 #include "llvm/IR/DerivedTypes.h"
  32 #include "llvm/IR/Function.h"
  33 #include "llvm/IR/IRBuilder.h"
  34 #include "llvm/IR/Module.h"
  35 #include "llvm/Support/ErrorHandling.h"
  36 #include "llvm/Support/KnownBits.h"
  37 using namespace llvm;
  38
  39 #define DEBUG_TYPE "ve-lower"
  40
  41 //===----------------------------------------------------------------------===//
  42 // Calling Convention Implementation
  43 //===----------------------------------------------------------------------===//
  44
  45 #include "VEGenCallingConv.inc"
  46
  47 CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
  48   switch (CallConv) {
  49   default:
  50     return RetCC_VE_C;
  51   case CallingConv::Fast:
  52     return RetCC_VE_Fast;
  53   }
  54 }
  55
  56 CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
  57   if (IsVarArg)
  58     return CC_VE2;
  59   switch (CallConv) {
  60   default:
  61     return CC_VE_C;
  62   case CallingConv::Fast:
  63     return CC_VE_Fast;
  64   }
  65 }
  66
  67 bool VETargetLowering::CanLowerReturn(
  68     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
  69     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
  70   CCAssignFn *RetCC = getReturnCC(CallConv);
  71   SmallVector<CCValAssign, 16> RVLocs;
  72   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
  73   return CCInfo.CheckReturn(Outs, RetCC);
  74 }
  75
  76 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
  77                                    MVT::v256f32, MVT::v512f32, MVT::v256f64};
  78
  79 static const MVT AllMaskVTs[] = {MVT::v256i1, MVT::v512i1};
  80
  81 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
  82
  83 void VETargetLowering::initRegisterClasses() {
  84   // Set up the register classes.
  85   addRegisterClass(MVT::i32, &VE::I32RegClass);
  86   addRegisterClass(MVT::i64, &VE::I64RegClass);
  87   addRegisterClass(MVT::f32, &VE::F32RegClass);
  88   addRegisterClass(MVT::f64, &VE::I64RegClass);
  89   addRegisterClass(MVT::f128, &VE::F128RegClass);
  90
  91   if (Subtarget->enableVPU()) {
  92     for (MVT VecVT : AllVectorVTs)
  93       addRegisterClass(VecVT, &VE::V64RegClass);
  94     addRegisterClass(MVT::v256i1, &VE::VMRegClass);
  95     addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
  96   }
  97 }
  98
  99 void VETargetLowering::initSPUActions() {
 100   const auto &TM = getTargetMachine();
 101   /// Load & Store {
 102
 103   // VE doesn't have i1 sign extending load.
 104   for (MVT VT : MVT::integer_valuetypes()) {
 105     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 106     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
 107     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
 108     setTruncStoreAction(VT, MVT::i1, Expand);
 109   }
 110
 111   // VE doesn't have floating point extload/truncstore, so expand them.
 112   for (MVT FPVT : MVT::fp_valuetypes()) {
 113     for (MVT OtherFPVT : MVT::fp_valuetypes()) {
 114       setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
 115       setTruncStoreAction(FPVT, OtherFPVT, Expand);
 116     }
 117   }
 118
 119   // VE doesn't have fp128 load/store, so expand them in custom lower.
 120   setOperationAction(ISD::LOAD, MVT::f128, Custom);
 121   setOperationAction(ISD::STORE, MVT::f128, Custom);
 122
 123   /// } Load & Store
 124
 125   // Custom legalize address nodes into LO/HI parts.
 126   MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
 127   setOperationAction(ISD::BlockAddress, PtrVT, Custom);
 128   setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
 129   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
 130   setOperationAction(ISD::ConstantPool, PtrVT, Custom);
 131   setOperationAction(ISD::JumpTable, PtrVT, Custom);
 132
 133   /// VAARG handling {
 134   setOperationAction(ISD::VASTART, MVT::Other, Custom);
 135   // VAARG needs to be lowered to access with 8 bytes alignment.
 136   setOperationAction(ISD::VAARG, MVT::Other, Custom);
 137   // Use the default implementation.
 138   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
 139   setOperationAction(ISD::VAEND, MVT::Other, Expand);
 140   /// } VAARG handling
 141
 142   /// Stack {
 143   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
 144   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
 145
 146   // Use the default implementation.
 147   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
 148   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
 149   /// } Stack
 150
 151   /// Branch {
 152
 153   // VE doesn't have BRCOND
 154   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 155
 156   // BR_JT is not implemented yet.
 157   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 158
 159   /// } Branch
 160
 161   /// Int Ops {
 162   for (MVT IntVT : {MVT::i32, MVT::i64}) {
 163     // VE has no REM or DIVREM operations.
 164     setOperationAction(ISD::UREM, IntVT, Expand);
 165     setOperationAction(ISD::SREM, IntVT, Expand);
 166     setOperationAction(ISD::SDIVREM, IntVT, Expand);
 167     setOperationAction(ISD::UDIVREM, IntVT, Expand);
 168
 169     // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
 170     setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
 171     setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
 172     setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
 173
 174     // VE has no MULHU/S or U/SMUL_LOHI operations.
 175     // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
 176     setOperationAction(ISD::MULHU, IntVT, Expand);
 177     setOperationAction(ISD::MULHS, IntVT, Expand);
 178     setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
 179     setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
 180
 181     // VE has no CTTZ, ROTL, ROTR operations.
 182     setOperationAction(ISD::CTTZ, IntVT, Expand);
 183     setOperationAction(ISD::ROTL, IntVT, Expand);
 184     setOperationAction(ISD::ROTR, IntVT, Expand);
 185
 186     // VE has 64 bits instruction which works as i64 BSWAP operation.  This
 187     // instruction works fine as i32 BSWAP operation with an additional
 188     // parameter.  Use isel patterns to lower BSWAP.
 189     setOperationAction(ISD::BSWAP, IntVT, Legal);
 190
 191     // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
 192     // operations.  Use isel patterns for i64, promote for i32.
 193     LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
 194     setOperationAction(ISD::BITREVERSE, IntVT, Act);
 195     setOperationAction(ISD::CTLZ, IntVT, Act);
 196     setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
 197     setOperationAction(ISD::CTPOP, IntVT, Act);
 198
 199     // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
 200     // Use isel patterns for i64, promote for i32.
 201     setOperationAction(ISD::AND, IntVT, Act);
 202     setOperationAction(ISD::OR, IntVT, Act);
 203     setOperationAction(ISD::XOR, IntVT, Act);
 204
 205     // Legal smax and smin
 206     setOperationAction(ISD::SMAX, IntVT, Legal);
 207     setOperationAction(ISD::SMIN, IntVT, Legal);
 208   }
 209   /// } Int Ops
 210
 211   /// Conversion {
 212   // VE doesn't have instructions for fp<->uint, so expand them by llvm
 213   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
 214   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
 215   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 216   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 217
 218   // fp16 not supported
 219   for (MVT FPVT : MVT::fp_valuetypes()) {
 220     setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
 221     setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
 222   }
 223   /// } Conversion
 224
 225   /// Floating-point Ops {
 226   /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
 227   ///       and fcmp.
 228
 229   // VE doesn't have following floating point operations.
 230   for (MVT VT : MVT::fp_valuetypes()) {
 231     setOperationAction(ISD::FNEG, VT, Expand);
 232     setOperationAction(ISD::FREM, VT, Expand);
 233   }
 234
 235   // VE doesn't have fdiv of f128.
 236   setOperationAction(ISD::FDIV, MVT::f128, Expand);
 237
 238   for (MVT FPVT : {MVT::f32, MVT::f64}) {
 239     // f32 and f64 uses ConstantFP.  f128 uses ConstantPool.
 240     setOperationAction(ISD::ConstantFP, FPVT, Legal);
 241   }
 242   /// } Floating-point Ops
 243
 244   /// Floating-point math functions {
 245
 246   // VE doesn't have following floating point math functions.
 247   for (MVT VT : MVT::fp_valuetypes()) {
 248     setOperationAction(ISD::FABS, VT, Expand);
 249     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
 250     setOperationAction(ISD::FCOS, VT, Expand);
 251     setOperationAction(ISD::FMA, VT, Expand);
 252     setOperationAction(ISD::FPOW, VT, Expand);
 253     setOperationAction(ISD::FSIN, VT, Expand);
 254     setOperationAction(ISD::FSQRT, VT, Expand);
 255   }
 256
 257   // VE has single and double FMINNUM and FMAXNUM
 258   for (MVT VT : {MVT::f32, MVT::f64}) {
 259     setOperationAction({ISD::FMAXNUM, ISD::FMINNUM}, VT, Legal);
 260   }
 261
 262   /// } Floating-point math functions
 263
 264   /// Atomic instructions {
 265
 266   setMaxAtomicSizeInBitsSupported(64);
 267   setMinCmpXchgSizeInBits(32);
 268   setSupportsUnalignedAtomics(false);
 269
 270   // Use custom inserter for ATOMIC_FENCE.
 271   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 272
 273   // Other atomic instructions.
 274   for (MVT VT : MVT::integer_valuetypes()) {
 275     // Support i8/i16 atomic swap.
 276     setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
 277
 278     // FIXME: Support "atmam" instructions.
 279     setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
 280     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
 281     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
 282     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
 283
 284     // VE doesn't have follwing instructions.
 285     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
 286     setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
 287     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
 288     setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
 289     setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
 290     setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
 291     setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
 292     setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
 293   }
 294
 295   /// } Atomic instructions
 296
 297   /// SJLJ instructions {
 298   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 299   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 300   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
 301   if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
 302     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
 303   /// } SJLJ instructions
 304
 305   // Intrinsic instructions
 306   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 307 }
 308
 309 void VETargetLowering::initVPUActions() {
 310   for (MVT LegalMaskVT : AllMaskVTs)
 311     setOperationAction(ISD::BUILD_VECTOR, LegalMaskVT, Custom);
 312
 313   for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
 314     setOperationAction(Opc, MVT::v512i1, Custom);
 315
 316   for (MVT LegalVecVT : AllVectorVTs) {
 317     setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
 318     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
 319     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
 320     // Translate all vector instructions with legal element types to VVP_*
 321     // nodes.
 322     // TODO We will custom-widen into VVP_* nodes in the future. While we are
 323     // buildling the infrastructure for this, we only do this for legal vector
 324     // VTs.
 325 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME)                                     \
 326   setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
 327 #define ADD_VVP_OP(VVP_NAME, ISD_NAME)                                         \
 328   setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
 329     setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
 330     setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
 331 #include "VVPNodes.def"
 332   }
 333
 334   for (MVT LegalPackedVT : AllPackedVTs) {
 335     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
 336     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
 337   }
 338
 339   // vNt32, vNt64 ops (legal element types)
 340   for (MVT VT : MVT::vector_valuetypes()) {
 341     MVT ElemVT = VT.getVectorElementType();
 342     unsigned ElemBits = ElemVT.getScalarSizeInBits();
 343     if (ElemBits != 32 && ElemBits != 64)
 344       continue;
 345
 346     for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
 347       setOperationAction(MemOpc, VT, Custom);
 348
 349     const ISD::NodeType IntReductionOCs[] = {
 350         ISD::VECREDUCE_ADD,  ISD::VECREDUCE_MUL,  ISD::VECREDUCE_AND,
 351         ISD::VECREDUCE_OR,   ISD::VECREDUCE_XOR,  ISD::VECREDUCE_SMIN,
 352         ISD::VECREDUCE_SMAX, ISD::VECREDUCE_UMIN, ISD::VECREDUCE_UMAX};
 353
 354     for (unsigned IntRedOpc : IntReductionOCs)
 355       setOperationAction(IntRedOpc, VT, Custom);
 356   }
 357
 358   // v256i1 and v512i1 ops
 359   for (MVT MaskVT : AllMaskVTs) {
 360     // Custom lower mask ops
 361     setOperationAction(ISD::STORE, MaskVT, Custom);
 362     setOperationAction(ISD::LOAD, MaskVT, Custom);
 363   }
 364 }
 365
 366 SDValue
 367 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 368                               bool IsVarArg,
 369                               const SmallVectorImpl<ISD::OutputArg> &Outs,
 370                               const SmallVectorImpl<SDValue> &OutVals,
 371                               const SDLoc &DL, SelectionDAG &DAG) const {
 372   // CCValAssign - represent the assignment of the return value to locations.
 373   SmallVector<CCValAssign, 16> RVLocs;
 374
 375   // CCState - Info about the registers and stack slot.
 376   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
 377                  *DAG.getContext());
 378
 379   // Analyze return values.
 380   CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
 381
 382   SDValue Glue;
 383   SmallVector<SDValue, 4> RetOps(1, Chain);
 384
 385   // Copy the result values into the output registers.
 386   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 387     CCValAssign &VA = RVLocs[i];
 388     assert(VA.isRegLoc() && "Can only return in registers!");
 389     assert(!VA.needsCustom() && "Unexpected custom lowering");
 390     SDValue OutVal = OutVals[i];
 391
 392     // Integer return values must be sign or zero extended by the callee.
 393     switch (VA.getLocInfo()) {
 394     case CCValAssign::Full:
 395       break;
 396     case CCValAssign::SExt:
 397       OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
 398       break;
 399     case CCValAssign::ZExt:
 400       OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
 401       break;
 402     case CCValAssign::AExt:
 403       OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
 404       break;
 405     case CCValAssign::BCvt: {
 406       // Convert a float return value to i64 with padding.
 407       //     63     31   0
 408       //    +------+------+
 409       //    | float|   0  |
 410       //    +------+------+
 411       assert(VA.getLocVT() == MVT::i64);
 412       assert(VA.getValVT() == MVT::f32);
 413       SDValue Undef = SDValue(
 414           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
 415       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 416       OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
 417                                           MVT::i64, Undef, OutVal, Sub_f32),
 418                        0);
 419       break;
 420     }
 421     default:
 422       llvm_unreachable("Unknown loc info!");
 423     }
 424
 425     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Glue);
 426
 427     // Guarantee that all emitted copies are stuck together with flags.
 428     Glue = Chain.getValue(1);
 429     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
 430   }
 431
 432   RetOps[0] = Chain; // Update chain.
 433
 434   // Add the glue if we have it.
 435   if (Glue.getNode())
 436     RetOps.push_back(Glue);
 437
 438   return DAG.getNode(VEISD::RET_GLUE, DL, MVT::Other, RetOps);
 439 }
 440
 441 SDValue VETargetLowering::LowerFormalArguments(
 442     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
 443     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
 444     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
 445   MachineFunction &MF = DAG.getMachineFunction();
 446
 447   // Get the base offset of the incoming arguments stack space.
 448   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
 449   // Get the size of the preserved arguments area
 450   unsigned ArgsPreserved = 64;
 451
 452   // Analyze arguments according to CC_VE.
 453   SmallVector<CCValAssign, 16> ArgLocs;
 454   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
 455                  *DAG.getContext());
 456   // Allocate the preserved area first.
 457   CCInfo.AllocateStack(ArgsPreserved, Align(8));
 458   // We already allocated the preserved area, so the stack offset computed
 459   // by CC_VE would be correct now.
 460   CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
 461
 462   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 463     CCValAssign &VA = ArgLocs[i];
 464     assert(!VA.needsCustom() && "Unexpected custom lowering");
 465     if (VA.isRegLoc()) {
 466       // This argument is passed in a register.
 467       // All integer register arguments are promoted by the caller to i64.
 468
 469       // Create a virtual register for the promoted live-in value.
 470       Register VReg =
 471           MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
 472       SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
 473
 474       // The caller promoted the argument, so insert an Assert?ext SDNode so we
 475       // won't promote the value again in this function.
 476       switch (VA.getLocInfo()) {
 477       case CCValAssign::SExt:
 478         Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
 479                           DAG.getValueType(VA.getValVT()));
 480         break;
 481       case CCValAssign::ZExt:
 482         Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
 483                           DAG.getValueType(VA.getValVT()));
 484         break;
 485       case CCValAssign::BCvt: {
 486         // Extract a float argument from i64 with padding.
 487         //     63     31   0
 488         //    +------+------+
 489         //    | float|   0  |
 490         //    +------+------+
 491         assert(VA.getLocVT() == MVT::i64);
 492         assert(VA.getValVT() == MVT::f32);
 493         SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 494         Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
 495                                          MVT::f32, Arg, Sub_f32),
 496                       0);
 497         break;
 498       }
 499       default:
 500         break;
 501       }
 502
 503       // Truncate the register down to the argument type.
 504       if (VA.isExtInLoc())
 505         Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
 506
 507       InVals.push_back(Arg);
 508       continue;
 509     }
 510
 511     // The registers are exhausted. This argument was passed on the stack.
 512     assert(VA.isMemLoc());
 513     // The CC_VE_Full/Half functions compute stack offsets relative to the
 514     // beginning of the arguments area at %fp + the size of reserved area.
 515     unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
 516     unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
 517
 518     // Adjust offset for a float argument by adding 4 since the argument is
 519     // stored in 8 bytes buffer with offset like below.  LLVM generates
 520     // 4 bytes load instruction, so need to adjust offset here.  This
 521     // adjustment is required in only LowerFormalArguments.  In LowerCall,
 522     // a float argument is converted to i64 first, and stored as 8 bytes
 523     // data, which is required by ABI, so no need for adjustment.
 524     //    0      4
 525     //    +------+------+
 526     //    | empty| float|
 527     //    +------+------+
 528     if (VA.getValVT() == MVT::f32)
 529       Offset += 4;
 530
 531     int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
 532     InVals.push_back(
 533         DAG.getLoad(VA.getValVT(), DL, Chain,
 534                     DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
 535                     MachinePointerInfo::getFixedStack(MF, FI)));
 536   }
 537
 538   if (!IsVarArg)
 539     return Chain;
 540
 541   // This function takes variable arguments, some of which may have been passed
 542   // in registers %s0-%s8.
 543   //
 544   // The va_start intrinsic needs to know the offset to the first variable
 545   // argument.
 546   // TODO: need to calculate offset correctly once we support f128.
 547   unsigned ArgOffset = ArgLocs.size() * 8;
 548   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
 549   // Skip the reserved area at the top of stack.
 550   FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
 551
 552   return Chain;
 553 }
 554
 555 // FIXME? Maybe this could be a TableGen attribute on some registers and
 556 // this table could be generated automatically from RegInfo.
 557 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
 558                                              const MachineFunction &MF) const {
 559   Register Reg = StringSwitch<Register>(RegName)
 560                      .Case("sp", VE::SX11)    // Stack pointer
 561                      .Case("fp", VE::SX9)     // Frame pointer
 562                      .Case("sl", VE::SX8)     // Stack limit
 563                      .Case("lr", VE::SX10)    // Link register
 564                      .Case("tp", VE::SX14)    // Thread pointer
 565                      .Case("outer", VE::SX12) // Outer regiser
 566                      .Case("info", VE::SX17)  // Info area register
 567                      .Case("got", VE::SX15)   // Global offset table register
 568                      .Case("plt", VE::SX16) // Procedure linkage table register
 569                      .Default(0);
 570
 571   if (Reg)
 572     return Reg;
 573
 574   report_fatal_error("Invalid register name global variable");
 575 }
 576
 577 //===----------------------------------------------------------------------===//
 578 // TargetLowering Implementation
 579 //===----------------------------------------------------------------------===//
 580
 581 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 582                                     SmallVectorImpl<SDValue> &InVals) const {
 583   SelectionDAG &DAG = CLI.DAG;
 584   SDLoc DL = CLI.DL;
 585   SDValue Chain = CLI.Chain;
 586   auto PtrVT = getPointerTy(DAG.getDataLayout());
 587
 588   // VE target does not yet support tail call optimization.
 589   CLI.IsTailCall = false;
 590
 591   // Get the base offset of the outgoing arguments stack space.
 592   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
 593   // Get the size of the preserved arguments area
 594   unsigned ArgsPreserved = 8 * 8u;
 595
 596   // Analyze operands of the call, assigning locations to each operand.
 597   SmallVector<CCValAssign, 16> ArgLocs;
 598   CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
 599                  *DAG.getContext());
 600   // Allocate the preserved area first.
 601   CCInfo.AllocateStack(ArgsPreserved, Align(8));
 602   // We already allocated the preserved area, so the stack offset computed
 603   // by CC_VE would be correct now.
 604   CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
 605
 606   // VE requires to use both register and stack for varargs or no-prototyped
 607   // functions.
 608   bool UseBoth = CLI.IsVarArg;
 609
 610   // Analyze operands again if it is required to store BOTH.
 611   SmallVector<CCValAssign, 16> ArgLocs2;
 612   CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
 613                   ArgLocs2, *DAG.getContext());
 614   if (UseBoth)
 615     CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
 616
 617   // Get the size of the outgoing arguments stack space requirement.
 618   unsigned ArgsSize = CCInfo.getStackSize();
 619
 620   // Keep stack frames 16-byte aligned.
 621   ArgsSize = alignTo(ArgsSize, 16);
 622
 623   // Adjust the stack pointer to make room for the arguments.
 624   // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
 625   // with more than 6 arguments.
 626   Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
 627
 628   // Collect the set of registers to pass to the function and their values.
 629   // This will be emitted as a sequence of CopyToReg nodes glued to the call
 630   // instruction.
 631   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
 632
 633   // Collect chains from all the memory opeations that copy arguments to the
 634   // stack. They must follow the stack pointer adjustment above and precede the
 635   // call instruction itself.
 636   SmallVector<SDValue, 8> MemOpChains;
 637
 638   // VE needs to get address of callee function in a register
 639   // So, prepare to copy it to SX12 here.
 640
 641   // If the callee is a GlobalAddress node (quite common, every direct call is)
 642   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
 643   // Likewise ExternalSymbol -> TargetExternalSymbol.
 644   SDValue Callee = CLI.Callee;
 645
 646   bool IsPICCall = isPositionIndependent();
 647
 648   // PC-relative references to external symbols should go through $stub.
 649   // If so, we need to prepare GlobalBaseReg first.
 650   const TargetMachine &TM = DAG.getTarget();
 651   const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
 652   const GlobalValue *GV = nullptr;
 653   auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
 654   if (CalleeG)
 655     GV = CalleeG->getGlobal();
 656   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
 657   bool UsePlt = !Local;
 658   MachineFunction &MF = DAG.getMachineFunction();
 659
 660   // Turn GlobalAddress/ExternalSymbol node into a value node
 661   // containing the address of them here.
 662   if (CalleeG) {
 663     if (IsPICCall) {
 664       if (UsePlt)
 665         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
 666       Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
 667       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
 668     } else {
 669       Callee =
 670           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
 671     }
 672   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
 673     if (IsPICCall) {
 674       if (UsePlt)
 675         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
 676       Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
 677       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
 678     } else {
 679       Callee =
 680           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
 681     }
 682   }
 683
 684   RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
 685
 686   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 687     CCValAssign &VA = ArgLocs[i];
 688     SDValue Arg = CLI.OutVals[i];
 689
 690     // Promote the value if needed.
 691     switch (VA.getLocInfo()) {
 692     default:
 693       llvm_unreachable("Unknown location info!");
 694     case CCValAssign::Full:
 695       break;
 696     case CCValAssign::SExt:
 697       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
 698       break;
 699     case CCValAssign::ZExt:
 700       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
 701       break;
 702     case CCValAssign::AExt:
 703       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
 704       break;
 705     case CCValAssign::BCvt: {
 706       // Convert a float argument to i64 with padding.
 707       //     63     31   0
 708       //    +------+------+
 709       //    | float|   0  |
 710       //    +------+------+
 711       assert(VA.getLocVT() == MVT::i64);
 712       assert(VA.getValVT() == MVT::f32);
 713       SDValue Undef = SDValue(
 714           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
 715       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 716       Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
 717                                        MVT::i64, Undef, Arg, Sub_f32),
 718                     0);
 719       break;
 720     }
 721     }
 722
 723     if (VA.isRegLoc()) {
 724       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
 725       if (!UseBoth)
 726         continue;
 727       VA = ArgLocs2[i];
 728     }
 729
 730     assert(VA.isMemLoc());
 731
 732     // Create a store off the stack pointer for this argument.
 733     SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
 734     // The argument area starts at %fp/%sp + the size of reserved area.
 735     SDValue PtrOff =
 736         DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
 737     PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
 738     MemOpChains.push_back(
 739         DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
 740   }
 741
 742   // Emit all stores, make sure they occur before the call.
 743   if (!MemOpChains.empty())
 744     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
 745
 746   // Build a sequence of CopyToReg nodes glued together with token chain and
 747   // glue operands which copy the outgoing args into registers. The InGlue is
 748   // necessary since all emitted instructions must be stuck together in order
 749   // to pass the live physical registers.
 750   SDValue InGlue;
 751   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 752     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
 753                              RegsToPass[i].second, InGlue);
 754     InGlue = Chain.getValue(1);
 755   }
 756
 757   // Build the operands for the call instruction itself.
 758   SmallVector<SDValue, 8> Ops;
 759   Ops.push_back(Chain);
 760   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
 761     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
 762                                   RegsToPass[i].second.getValueType()));
 763
 764   // Add a register mask operand representing the call-preserved registers.
 765   const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
 766   const uint32_t *Mask =
 767       TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
 768   assert(Mask && "Missing call preserved mask for calling convention");
 769   Ops.push_back(DAG.getRegisterMask(Mask));
 770
 771   // Make sure the CopyToReg nodes are glued to the call instruction which
 772   // consumes the registers.
 773   if (InGlue.getNode())
 774     Ops.push_back(InGlue);
 775
 776   // Now the call itself.
 777   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 778   Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
 779   InGlue = Chain.getValue(1);
 780
 781   // Revert the stack pointer immediately after the call.
 782   Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, DL);
 783   InGlue = Chain.getValue(1);
 784
 785   // Now extract the return values. This is more or less the same as
 786   // LowerFormalArguments.
 787
 788   // Assign locations to each value returned by this call.
 789   SmallVector<CCValAssign, 16> RVLocs;
 790   CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
 791                  *DAG.getContext());
 792
 793   // Set inreg flag manually for codegen generated library calls that
 794   // return float.
 795   if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
 796     CLI.Ins[0].Flags.setInReg();
 797
 798   RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
 799
 800   // Copy all of the result registers out of their specified physreg.
 801   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 802     CCValAssign &VA = RVLocs[i];
 803     assert(!VA.needsCustom() && "Unexpected custom lowering");
 804     Register Reg = VA.getLocReg();
 805
 806     // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
 807     // reside in the same register in the high and low bits. Reuse the
 808     // CopyFromReg previous node to avoid duplicate copies.
 809     SDValue RV;
 810     if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
 811       if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
 812         RV = Chain.getValue(0);
 813
 814     // But usually we'll create a new CopyFromReg for a different register.
 815     if (!RV.getNode()) {
 816       RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
 817       Chain = RV.getValue(1);
 818       InGlue = Chain.getValue(2);
 819     }
 820
 821     // The callee promoted the return value, so insert an Assert?ext SDNode so
 822     // we won't promote the value again in this function.
 823     switch (VA.getLocInfo()) {
 824     case CCValAssign::SExt:
 825       RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
 826                        DAG.getValueType(VA.getValVT()));
 827       break;
 828     case CCValAssign::ZExt:
 829       RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
 830                        DAG.getValueType(VA.getValVT()));
 831       break;
 832     case CCValAssign::BCvt: {
 833       // Extract a float return value from i64 with padding.
 834       //     63     31   0
 835       //    +------+------+
 836       //    | float|   0  |
 837       //    +------+------+
 838       assert(VA.getLocVT() == MVT::i64);
 839       assert(VA.getValVT() == MVT::f32);
 840       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 841       RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
 842                                       MVT::f32, RV, Sub_f32),
 843                    0);
 844       break;
 845     }
 846     default:
 847       break;
 848     }
 849
 850     // Truncate the register down to the return value type.
 851     if (VA.isExtInLoc())
 852       RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
 853
 854     InVals.push_back(RV);
 855   }
 856
 857   return Chain;
 858 }
 859
 860 bool VETargetLowering::isOffsetFoldingLegal(
 861     const GlobalAddressSDNode *GA) const {
 862   // VE uses 64 bit addressing, so we need multiple instructions to generate
 863   // an address.  Folding address with offset increases the number of
 864   // instructions, so that we disable it here.  Offsets will be folded in
 865   // the DAG combine later if it worth to do so.
 866   return false;
 867 }
 868
 869 /// isFPImmLegal - Returns true if the target can instruction select the
 870 /// specified FP immediate natively. If false, the legalizer will
 871 /// materialize the FP immediate as a load from a constant pool.
 872 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
 873                                     bool ForCodeSize) const {
 874   return VT == MVT::f32 || VT == MVT::f64;
 875 }
 876
 877 /// Determine if the target supports unaligned memory accesses.
 878 ///
 879 /// This function returns true if the target allows unaligned memory accesses
 880 /// of the specified type in the given address space. If true, it also returns
 881 /// whether the unaligned memory access is "fast" in the last argument by
 882 /// reference. This is used, for example, in situations where an array
 883 /// copy/move/set is converted to a sequence of store operations. Its use
 884 /// helps to ensure that such replacements don't generate code that causes an
 885 /// alignment error (trap) on the target machine.
 886 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
 887                                                       unsigned AddrSpace,
 888                                                       Align A,
 889                                                       MachineMemOperand::Flags,
 890                                                       unsigned *Fast) const {
 891   if (Fast) {
 892     // It's fast anytime on VE
 893     *Fast = 1;
 894   }
 895   return true;
 896 }
 897
 898 VETargetLowering::VETargetLowering(const TargetMachine &TM,
 899                                    const VESubtarget &STI)
 900     : TargetLowering(TM), Subtarget(&STI) {
 901   // Instructions which use registers as conditionals examine all the
 902   // bits (as does the pseudo SELECT_CC expansion). I don't think it
 903   // matters much whether it's ZeroOrOneBooleanContent, or
 904   // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
 905   // former.
 906   setBooleanContents(ZeroOrOneBooleanContent);
 907   setBooleanVectorContents(ZeroOrOneBooleanContent);
 908
 909   initRegisterClasses();
 910   initSPUActions();
 911   initVPUActions();
 912
 913   setStackPointerRegisterToSaveRestore(VE::SX11);
 914
 915   // We have target-specific dag combine patterns for the following nodes:
 916   setTargetDAGCombine(ISD::TRUNCATE);
 917   setTargetDAGCombine(ISD::SELECT);
 918   setTargetDAGCombine(ISD::SELECT_CC);
 919
 920   // Set function alignment to 16 bytes
 921   setMinFunctionAlignment(Align(16));
 922
 923   // VE stores all argument by 8 bytes alignment
 924   setMinStackArgumentAlignment(Align(8));
 925
 926   computeRegisterProperties(Subtarget->getRegisterInfo());
 927 }
 928
 929 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
 930 #define TARGET_NODE_CASE(NAME)                                                 \
 931   case VEISD::NAME:                                                            \
 932     return "VEISD::" #NAME;
 933   switch ((VEISD::NodeType)Opcode) {
 934   case VEISD::FIRST_NUMBER:
 935     break;
 936     TARGET_NODE_CASE(CMPI)
 937     TARGET_NODE_CASE(CMPU)
 938     TARGET_NODE_CASE(CMPF)
 939     TARGET_NODE_CASE(CMPQ)
 940     TARGET_NODE_CASE(CMOV)
 941     TARGET_NODE_CASE(CALL)
 942     TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
 943     TARGET_NODE_CASE(EH_SJLJ_SETJMP)
 944     TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
 945     TARGET_NODE_CASE(GETFUNPLT)
 946     TARGET_NODE_CASE(GETSTACKTOP)
 947     TARGET_NODE_CASE(GETTLSADDR)
 948     TARGET_NODE_CASE(GLOBAL_BASE_REG)
 949     TARGET_NODE_CASE(Hi)
 950     TARGET_NODE_CASE(Lo)
 951     TARGET_NODE_CASE(RET_GLUE)
 952     TARGET_NODE_CASE(TS1AM)
 953     TARGET_NODE_CASE(VEC_UNPACK_LO)
 954     TARGET_NODE_CASE(VEC_UNPACK_HI)
 955     TARGET_NODE_CASE(VEC_PACK)
 956     TARGET_NODE_CASE(VEC_BROADCAST)
 957     TARGET_NODE_CASE(REPL_I32)
 958     TARGET_NODE_CASE(REPL_F32)
 959
 960     TARGET_NODE_CASE(LEGALAVL)
 961
 962     // Register the VVP_* SDNodes.
 963 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
 964 #include "VVPNodes.def"
 965   }
 966 #undef TARGET_NODE_CASE
 967   return nullptr;
 968 }
 969
 970 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
 971                                          EVT VT) const {
 972   return MVT::i32;
 973 }
 974
 975 // Convert to a target node and set target flags.
 976 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
 977                                           SelectionDAG &DAG) const {
 978   if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
 979     return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
 980                                       GA->getValueType(0), GA->getOffset(), TF);
 981
 982   if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
 983     return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
 984                                      0, TF);
 985
 986   if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
 987     return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
 988                                      CP->getAlign(), CP->getOffset(), TF);
 989
 990   if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
 991     return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
 992                                        TF);
 993
 994   if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
 995     return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
 996
 997   llvm_unreachable("Unhandled address SDNode");
 998 }
 999
1000 // Split Op into high and low parts according to HiTF and LoTF.
1001 // Return an ADD node combining the parts.
1002 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1003                                        SelectionDAG &DAG) const {
1004   SDLoc DL(Op);
1005   EVT VT = Op.getValueType();
1006   SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
1007   SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
1008   return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
1009 }
1010
1011 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1012 // or ExternalSymbol SDNode.
1013 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
1014   SDLoc DL(Op);
1015   EVT PtrVT = Op.getValueType();
1016
1017   // Handle PIC mode first. VE needs a got load for every variable!
1018   if (isPositionIndependent()) {
1019     auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
1020
1021     if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
1022         (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1023       // Create following instructions for local linkage PIC code.
1024       //     lea %reg, label@gotoff_lo
1025       //     and %reg, %reg, (32)0
1026       //     lea.sl %reg, label@gotoff_hi(%reg, %got)
1027       SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1028                                   VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1029       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1030       return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1031     }
1032     // Create following instructions for not local linkage PIC code.
1033     //     lea %reg, label@got_lo
1034     //     and %reg, %reg, (32)0
1035     //     lea.sl %reg, label@got_hi(%reg)
1036     //     ld %reg, (%reg, %got)
1037     SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
1038                                 VEMCExpr::VK_VE_GOT_LO32, DAG);
1039     SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1040     SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1041     return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1042                        MachinePointerInfo::getGOT(DAG.getMachineFunction()));
1043   }
1044
1045   // This is one of the absolute code models.
1046   switch (getTargetMachine().getCodeModel()) {
1047   default:
1048     llvm_unreachable("Unsupported absolute code model");
1049   case CodeModel::Small:
1050   case CodeModel::Medium:
1051   case CodeModel::Large:
1052     // abs64.
1053     return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1054   }
1055 }
1056
1057 /// Custom Lower {
1058
1059 // The mappings for emitLeading/TrailingFence for VE is designed by following
1060 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1061 Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1062                                                 Instruction *Inst,
1063                                                 AtomicOrdering Ord) const {
1064   switch (Ord) {
1065   case AtomicOrdering::NotAtomic:
1066   case AtomicOrdering::Unordered:
1067     llvm_unreachable("Invalid fence: unordered/non-atomic");
1068   case AtomicOrdering::Monotonic:
1069   case AtomicOrdering::Acquire:
1070     return nullptr; // Nothing to do
1071   case AtomicOrdering::Release:
1072   case AtomicOrdering::AcquireRelease:
1073     return Builder.CreateFence(AtomicOrdering::Release);
1074   case AtomicOrdering::SequentiallyConsistent:
1075     if (!Inst->hasAtomicStore())
1076       return nullptr; // Nothing to do
1077     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1078   }
1079   llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1080 }
1081
1082 Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1083                                                  Instruction *Inst,
1084                                                  AtomicOrdering Ord) const {
1085   switch (Ord) {
1086   case AtomicOrdering::NotAtomic:
1087   case AtomicOrdering::Unordered:
1088     llvm_unreachable("Invalid fence: unordered/not-atomic");
1089   case AtomicOrdering::Monotonic:
1090   case AtomicOrdering::Release:
1091     return nullptr; // Nothing to do
1092   case AtomicOrdering::Acquire:
1093   case AtomicOrdering::AcquireRelease:
1094     return Builder.CreateFence(AtomicOrdering::Acquire);
1095   case AtomicOrdering::SequentiallyConsistent:
1096     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1097   }
1098   llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1099 }
1100
1101 SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1102                                             SelectionDAG &DAG) const {
1103   SDLoc DL(Op);
1104   AtomicOrdering FenceOrdering =
1105       static_cast<AtomicOrdering>(Op.getConstantOperandVal(1));
1106   SyncScope::ID FenceSSID =
1107       static_cast<SyncScope::ID>(Op.getConstantOperandVal(2));
1108
1109   // VE uses Release consistency, so need a fence instruction if it is a
1110   // cross-thread fence.
1111   if (FenceSSID == SyncScope::System) {
1112     switch (FenceOrdering) {
1113     case AtomicOrdering::NotAtomic:
1114     case AtomicOrdering::Unordered:
1115     case AtomicOrdering::Monotonic:
1116       // No need to generate fencem instruction here.
1117       break;
1118     case AtomicOrdering::Acquire:
1119       // Generate "fencem 2" as acquire fence.
1120       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1121                                         DAG.getTargetConstant(2, DL, MVT::i32),
1122                                         Op.getOperand(0)),
1123                      0);
1124     case AtomicOrdering::Release:
1125       // Generate "fencem 1" as release fence.
1126       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1127                                         DAG.getTargetConstant(1, DL, MVT::i32),
1128                                         Op.getOperand(0)),
1129                      0);
1130     case AtomicOrdering::AcquireRelease:
1131     case AtomicOrdering::SequentiallyConsistent:
1132       // Generate "fencem 3" as acq_rel and seq_cst fence.
1133       // FIXME: "fencem 3" doesn't wait for PCIe deveices accesses,
1134       //        so  seq_cst may require more instruction for them.
1135       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1136                                         DAG.getTargetConstant(3, DL, MVT::i32),
1137                                         Op.getOperand(0)),
1138                      0);
1139     }
1140   }
1141
1142   // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1143   return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1144 }
1145
1146 TargetLowering::AtomicExpansionKind
1147 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1148   // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1149   if (AI->getOperation() == AtomicRMWInst::Xchg) {
1150     return AtomicExpansionKind::None;
1151   }
1152   // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1153
1154   // Otherwise, expand it using compare and exchange instruction to not call
1155   // __sync_fetch_and_* functions.
1156   return AtomicExpansionKind::CmpXChg;
1157 }
1158
1159 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1160                             SDValue &Bits) {
1161   SDLoc DL(Op);
1162   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1163   SDValue Ptr = N->getOperand(1);
1164   SDValue Val = N->getOperand(2);
1165   EVT PtrVT = Ptr.getValueType();
1166   bool Byte = N->getMemoryVT() == MVT::i8;
1167   //   Remainder = AND Ptr, 3
1168   //   Flag = 1 << Remainder  ; If Byte is true (1 byte swap flag)
1169   //   Flag = 3 << Remainder  ; If Byte is false (2 bytes swap flag)
1170   //   Bits = Remainder << 3
1171   //   NewVal = Val << Bits
1172   SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1173   SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1174   SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1175                       : DAG.getConstant(3, DL, MVT::i32);
1176   Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1177   Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1178   return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1179 }
1180
1181 static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1182                              SDValue Bits) {
1183   SDLoc DL(Op);
1184   EVT VT = Data.getValueType();
1185   bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1186   //   NewData = Data >> Bits
1187   //   Result = NewData & 0xff   ; If Byte is true (1 byte)
1188   //   Result = NewData & 0xffff ; If Byte is false (2 bytes)
1189
1190   SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1191   return DAG.getNode(ISD::AND, DL, VT,
1192                      {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1193 }
1194
1195 SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1196                                            SelectionDAG &DAG) const {
1197   SDLoc DL(Op);
1198   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1199
1200   if (N->getMemoryVT() == MVT::i8) {
1201     // For i8, use "ts1am"
1202     //   Input:
1203     //     ATOMIC_SWAP Ptr, Val, Order
1204     //
1205     //   Output:
1206     //     Remainder = AND Ptr, 3
1207     //     Flag = 1 << Remainder   ; 1 byte swap flag for TS1AM inst.
1208     //     Bits = Remainder << 3
1209     //     NewVal = Val << Bits
1210     //
1211     //     Aligned = AND Ptr, -4
1212     //     Data = TS1AM Aligned, Flag, NewVal
1213     //
1214     //     NewData = Data >> Bits
1215     //     Result = NewData & 0xff ; 1 byte result
1216     SDValue Flag;
1217     SDValue Bits;
1218     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1219
1220     SDValue Ptr = N->getOperand(1);
1221     SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1222                                   {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1223     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1224                                   DAG.getVTList(Op.getNode()->getValueType(0),
1225                                                 Op.getNode()->getValueType(1)),
1226                                   {N->getChain(), Aligned, Flag, NewVal},
1227                                   N->getMemOperand());
1228
1229     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1230     SDValue Chain = TS1AM.getValue(1);
1231     return DAG.getMergeValues({Result, Chain}, DL);
1232   }
1233   if (N->getMemoryVT() == MVT::i16) {
1234     // For i16, use "ts1am"
1235     SDValue Flag;
1236     SDValue Bits;
1237     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1238
1239     SDValue Ptr = N->getOperand(1);
1240     SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1241                                   {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1242     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1243                                   DAG.getVTList(Op.getNode()->getValueType(0),
1244                                                 Op.getNode()->getValueType(1)),
1245                                   {N->getChain(), Aligned, Flag, NewVal},
1246                                   N->getMemOperand());
1247
1248     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1249     SDValue Chain = TS1AM.getValue(1);
1250     return DAG.getMergeValues({Result, Chain}, DL);
1251   }
1252   // Otherwise, let llvm legalize it.
1253   return Op;
1254 }
1255
1256 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1257                                              SelectionDAG &DAG) const {
1258   return makeAddress(Op, DAG);
1259 }
1260
1261 SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1262                                             SelectionDAG &DAG) const {
1263   return makeAddress(Op, DAG);
1264 }
1265
1266 SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1267                                             SelectionDAG &DAG) const {
1268   return makeAddress(Op, DAG);
1269 }
1270
1271 SDValue
1272 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1273                                                 SelectionDAG &DAG) const {
1274   SDLoc DL(Op);
1275
1276   // Generate the following code:
1277   //   t1: ch,glue = callseq_start t0, 0, 0
1278   //   t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1279   //   t3: ch,glue = callseq_end t2, 0, 0, t2:2
1280   //   t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1281   SDValue Label = withTargetFlags(Op, 0, DAG);
1282   EVT PtrVT = Op.getValueType();
1283
1284   // Lowering the machine isd will make sure everything is in the right
1285   // location.
1286   SDValue Chain = DAG.getEntryNode();
1287   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1288   const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1289       DAG.getMachineFunction(), CallingConv::C);
1290   Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1291   SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1292   Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1293   Chain = DAG.getCALLSEQ_END(Chain, 64, 0, Chain.getValue(1), DL);
1294   Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1295
1296   // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1297   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1298   MFI.setHasCalls(true);
1299
1300   // Also generate code to prepare a GOT register if it is PIC.
1301   if (isPositionIndependent()) {
1302     MachineFunction &MF = DAG.getMachineFunction();
1303     Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1304   }
1305
1306   return Chain;
1307 }
1308
1309 SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1310                                                 SelectionDAG &DAG) const {
1311   // The current implementation of nld (2.26) doesn't allow local exec model
1312   // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1313   // generate the general dynamic model code sequence.
1314   //
1315   // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1316   return lowerToTLSGeneralDynamicModel(Op, DAG);
1317 }
1318
1319 SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1320   return makeAddress(Op, DAG);
1321 }
1322
1323 // Lower a f128 load into two f64 loads.
1324 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1325   SDLoc DL(Op);
1326   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1327   assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1328   Align Alignment = LdNode->getAlign();
1329   if (Alignment > 8)
1330     Alignment = Align(8);
1331
1332   SDValue Lo64 =
1333       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1334                   LdNode->getPointerInfo(), Alignment,
1335                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1336                                        : MachineMemOperand::MONone);
1337   EVT AddrVT = LdNode->getBasePtr().getValueType();
1338   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1339                               DAG.getConstant(8, DL, AddrVT));
1340   SDValue Hi64 =
1341       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1342                   LdNode->getPointerInfo(), Alignment,
1343                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1344                                        : MachineMemOperand::MONone);
1345
1346   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1347   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1348
1349   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1350   SDNode *InFP128 =
1351       DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1352   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1353                                SDValue(InFP128, 0), Hi64, SubRegEven);
1354   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1355                                SDValue(InFP128, 0), Lo64, SubRegOdd);
1356   SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1357                           SDValue(Hi64.getNode(), 1)};
1358   SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1359   SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1360   return DAG.getMergeValues(Ops, DL);
1361 }
1362
1363 // Lower a vXi1 load into following instructions
1364 //   LDrii %1, (,%addr)
1365 //   LVMxir  %vm, 0, %1
1366 //   LDrii %2, 8(,%addr)
1367 //   LVMxir  %vm, 0, %2
1368 //   ...
1369 static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG) {
1370   SDLoc DL(Op);
1371   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1372   assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1373
1374   SDValue BasePtr = LdNode->getBasePtr();
1375   Align Alignment = LdNode->getAlign();
1376   if (Alignment > 8)
1377     Alignment = Align(8);
1378
1379   EVT AddrVT = BasePtr.getValueType();
1380   EVT MemVT = LdNode->getMemoryVT();
1381   if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1382     SDValue OutChains[4];
1383     SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1384     for (int i = 0; i < 4; ++i) {
1385       // Generate load dag and prepare chains.
1386       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1387                                  DAG.getConstant(8 * i, DL, AddrVT));
1388       SDValue Val =
1389           DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1390                       LdNode->getPointerInfo(), Alignment,
1391                       LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1392                                            : MachineMemOperand::MONone);
1393       OutChains[i] = SDValue(Val.getNode(), 1);
1394
1395       VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1396                               DAG.getTargetConstant(i, DL, MVT::i64), Val,
1397                               SDValue(VM, 0));
1398     }
1399     SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1400     SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1401     return DAG.getMergeValues(Ops, DL);
1402   } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1403     SDValue OutChains[8];
1404     SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1405     for (int i = 0; i < 8; ++i) {
1406       // Generate load dag and prepare chains.
1407       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1408                                  DAG.getConstant(8 * i, DL, AddrVT));
1409       SDValue Val =
1410           DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1411                       LdNode->getPointerInfo(), Alignment,
1412                       LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1413                                            : MachineMemOperand::MONone);
1414       OutChains[i] = SDValue(Val.getNode(), 1);
1415
1416       VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1417                               DAG.getTargetConstant(i, DL, MVT::i64), Val,
1418                               SDValue(VM, 0));
1419     }
1420     SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1421     SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1422     return DAG.getMergeValues(Ops, DL);
1423   } else {
1424     // Otherwise, ask llvm to expand it.
1425     return SDValue();
1426   }
1427 }
1428
1429 SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1430   LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1431   EVT MemVT = LdNode->getMemoryVT();
1432
1433   // If VPU is enabled, always expand non-mask vector loads to VVP
1434   if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1435     return lowerToVVP(Op, DAG);
1436
1437   SDValue BasePtr = LdNode->getBasePtr();
1438   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1439     // Do not expand store instruction with frame index here because of
1440     // dependency problems.  We expand it later in eliminateFrameIndex().
1441     return Op;
1442   }
1443
1444   if (MemVT == MVT::f128)
1445     return lowerLoadF128(Op, DAG);
1446   if (isMaskType(MemVT))
1447     return lowerLoadI1(Op, DAG);
1448
1449   return Op;
1450 }
1451
1452 // Lower a f128 store into two f64 stores.
1453 static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1454   SDLoc DL(Op);
1455   StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1456   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1457
1458   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1459   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1460
1461   SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1462                                     StNode->getValue(), SubRegEven);
1463   SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1464                                     StNode->getValue(), SubRegOdd);
1465
1466   Align Alignment = StNode->getAlign();
1467   if (Alignment > 8)
1468     Alignment = Align(8);
1469
1470   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1471   SDValue OutChains[2];
1472   OutChains[0] =
1473       DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1474                    StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1475                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1476                                         : MachineMemOperand::MONone);
1477   EVT AddrVT = StNode->getBasePtr().getValueType();
1478   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1479                               DAG.getConstant(8, DL, AddrVT));
1480   OutChains[1] =
1481       DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1482                    MachinePointerInfo(), Alignment,
1483                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1484                                         : MachineMemOperand::MONone);
1485   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1486 }
1487
1488 // Lower a vXi1 store into following instructions
1489 //   SVMi  %1, %vm, 0
1490 //   STrii %1, (,%addr)
1491 //   SVMi  %2, %vm, 1
1492 //   STrii %2, 8(,%addr)
1493 //   ...
1494 static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG) {
1495   SDLoc DL(Op);
1496   StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1497   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1498
1499   SDValue BasePtr = StNode->getBasePtr();
1500   Align Alignment = StNode->getAlign();
1501   if (Alignment > 8)
1502     Alignment = Align(8);
1503   EVT AddrVT = BasePtr.getValueType();
1504   EVT MemVT = StNode->getMemoryVT();
1505   if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1506     SDValue OutChains[4];
1507     for (int i = 0; i < 4; ++i) {
1508       SDNode *V =
1509           DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1510                              DAG.getTargetConstant(i, DL, MVT::i64));
1511       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1512                                  DAG.getConstant(8 * i, DL, AddrVT));
1513       OutChains[i] =
1514           DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1515                        MachinePointerInfo(), Alignment,
1516                        StNode->isVolatile() ? MachineMemOperand::MOVolatile
1517                                             : MachineMemOperand::MONone);
1518     }
1519     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1520   } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1521     SDValue OutChains[8];
1522     for (int i = 0; i < 8; ++i) {
1523       SDNode *V =
1524           DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1525                              DAG.getTargetConstant(i, DL, MVT::i64));
1526       SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1527                                  DAG.getConstant(8 * i, DL, AddrVT));
1528       OutChains[i] =
1529           DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1530                        MachinePointerInfo(), Alignment,
1531                        StNode->isVolatile() ? MachineMemOperand::MOVolatile
1532                                             : MachineMemOperand::MONone);
1533     }
1534     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1535   } else {
1536     // Otherwise, ask llvm to expand it.
1537     return SDValue();
1538   }
1539 }
1540
1541 SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1542   StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1543   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1544   EVT MemVT = StNode->getMemoryVT();
1545
1546   // If VPU is enabled, always expand non-mask vector stores to VVP
1547   if (Subtarget->enableVPU() && MemVT.isVector() && !isMaskType(MemVT))
1548     return lowerToVVP(Op, DAG);
1549
1550   SDValue BasePtr = StNode->getBasePtr();
1551   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1552     // Do not expand store instruction with frame index here because of
1553     // dependency problems.  We expand it later in eliminateFrameIndex().
1554     return Op;
1555   }
1556
1557   if (MemVT == MVT::f128)
1558     return lowerStoreF128(Op, DAG);
1559   if (isMaskType(MemVT))
1560     return lowerStoreI1(Op, DAG);
1561
1562   // Otherwise, ask llvm to expand it.
1563   return SDValue();
1564 }
1565
1566 SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1567   MachineFunction &MF = DAG.getMachineFunction();
1568   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1569   auto PtrVT = getPointerTy(DAG.getDataLayout());
1570
1571   // Need frame address to find the address of VarArgsFrameIndex.
1572   MF.getFrameInfo().setFrameAddressIsTaken(true);
1573
1574   // vastart just stores the address of the VarArgsFrameIndex slot into the
1575   // memory location argument.
1576   SDLoc DL(Op);
1577   SDValue Offset =
1578       DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1579                   DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1580   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1581   return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1582                       MachinePointerInfo(SV));
1583 }
1584
1585 SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1586   SDNode *Node = Op.getNode();
1587   EVT VT = Node->getValueType(0);
1588   SDValue InChain = Node->getOperand(0);
1589   SDValue VAListPtr = Node->getOperand(1);
1590   EVT PtrVT = VAListPtr.getValueType();
1591   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1592   SDLoc DL(Node);
1593   SDValue VAList =
1594       DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1595   SDValue Chain = VAList.getValue(1);
1596   SDValue NextPtr;
1597
1598   if (VT == MVT::f128) {
1599     // VE f128 values must be stored with 16 bytes alignment.  We don't
1600     // know the actual alignment of VAList, so we take alignment of it
1601     // dynamically.
1602     int Align = 16;
1603     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1604                          DAG.getConstant(Align - 1, DL, PtrVT));
1605     VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1606                          DAG.getConstant(-Align, DL, PtrVT));
1607     // Increment the pointer, VAList, by 16 to the next vaarg.
1608     NextPtr =
1609         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1610   } else if (VT == MVT::f32) {
1611     // float --> need special handling like below.
1612     //    0      4
1613     //    +------+------+
1614     //    | empty| float|
1615     //    +------+------+
1616     // Increment the pointer, VAList, by 8 to the next vaarg.
1617     NextPtr =
1618         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1619     // Then, adjust VAList.
1620     unsigned InternalOffset = 4;
1621     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1622                          DAG.getConstant(InternalOffset, DL, PtrVT));
1623   } else {
1624     // Increment the pointer, VAList, by 8 to the next vaarg.
1625     NextPtr =
1626         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1627   }
1628
1629   // Store the incremented VAList to the legalized pointer.
1630   InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1631
1632   // Load the actual argument out of the pointer VAList.
1633   // We can't count on greater alignment than the word size.
1634   return DAG.getLoad(
1635       VT, DL, InChain, VAList, MachinePointerInfo(),
1636       Align(std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8));
1637 }
1638
1639 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1640                                                   SelectionDAG &DAG) const {
1641   // Generate following code.
1642   //   (void)__llvm_grow_stack(size);
1643   //   ret = GETSTACKTOP;        // pseudo instruction
1644   SDLoc DL(Op);
1645
1646   // Get the inputs.
1647   SDNode *Node = Op.getNode();
1648   SDValue Chain = Op.getOperand(0);
1649   SDValue Size = Op.getOperand(1);
1650   MaybeAlign Alignment(Op.getConstantOperandVal(2));
1651   EVT VT = Node->getValueType(0);
1652
1653   // Chain the dynamic stack allocation so that it doesn't modify the stack
1654   // pointer when other instructions are using the stack.
1655   Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1656
1657   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1658   Align StackAlign = TFI.getStackAlign();
1659   bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1660
1661   // Prepare arguments
1662   TargetLowering::ArgListTy Args;
1663   TargetLowering::ArgListEntry Entry;
1664   Entry.Node = Size;
1665   Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1666   Args.push_back(Entry);
1667   if (NeedsAlign) {
1668     Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1669     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1670     Args.push_back(Entry);
1671   }
1672   Type *RetTy = Type::getVoidTy(*DAG.getContext());
1673
1674   EVT PtrVT = Op.getValueType();
1675   SDValue Callee;
1676   if (NeedsAlign) {
1677     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1678   } else {
1679     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1680   }
1681
1682   TargetLowering::CallLoweringInfo CLI(DAG);
1683   CLI.setDebugLoc(DL)
1684       .setChain(Chain)
1685       .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1686       .setDiscardResult(true);
1687   std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1688   Chain = pair.second;
1689   SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1690   if (NeedsAlign) {
1691     Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1692                          DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1693     Result = DAG.getNode(ISD::AND, DL, VT, Result,
1694                          DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1695   }
1696   //  Chain = Result.getValue(1);
1697   Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);
1698
1699   SDValue Ops[2] = {Result, Chain};
1700   return DAG.getMergeValues(Ops, DL);
1701 }
1702
1703 SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1704                                                SelectionDAG &DAG) const {
1705   SDLoc DL(Op);
1706   return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1707                      Op.getOperand(1));
1708 }
1709
1710 SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1711                                               SelectionDAG &DAG) const {
1712   SDLoc DL(Op);
1713   return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1714                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1715                      Op.getOperand(1));
1716 }
1717
1718 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1719                                                       SelectionDAG &DAG) const {
1720   SDLoc DL(Op);
1721   return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1722                      Op.getOperand(0));
1723 }
1724
1725 static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1726                               const VETargetLowering &TLI,
1727                               const VESubtarget *Subtarget) {
1728   SDLoc DL(Op);
1729   MachineFunction &MF = DAG.getMachineFunction();
1730   EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1731
1732   MachineFrameInfo &MFI = MF.getFrameInfo();
1733   MFI.setFrameAddressIsTaken(true);
1734
1735   unsigned Depth = Op.getConstantOperandVal(0);
1736   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1737   Register FrameReg = RegInfo->getFrameRegister(MF);
1738   SDValue FrameAddr =
1739       DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1740   while (Depth--)
1741     FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1742                             FrameAddr, MachinePointerInfo());
1743   return FrameAddr;
1744 }
1745
1746 static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1747                                const VETargetLowering &TLI,
1748                                const VESubtarget *Subtarget) {
1749   MachineFunction &MF = DAG.getMachineFunction();
1750   MachineFrameInfo &MFI = MF.getFrameInfo();
1751   MFI.setReturnAddressIsTaken(true);
1752
1753   if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1754     return SDValue();
1755
1756   SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1757
1758   SDLoc DL(Op);
1759   EVT VT = Op.getValueType();
1760   SDValue Offset = DAG.getConstant(8, DL, VT);
1761   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1762                      DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1763                      MachinePointerInfo());
1764 }
1765
1766 SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1767                                                   SelectionDAG &DAG) const {
1768   SDLoc DL(Op);
1769   unsigned IntNo = Op.getConstantOperandVal(0);
1770   switch (IntNo) {
1771   default: // Don't custom lower most intrinsics.
1772     return SDValue();
1773   case Intrinsic::eh_sjlj_lsda: {
1774     MachineFunction &MF = DAG.getMachineFunction();
1775     MVT VT = Op.getSimpleValueType();
1776     const VETargetMachine *TM =
1777         static_cast<const VETargetMachine *>(&DAG.getTarget());
1778
1779     // Create GCC_except_tableXX string.  The real symbol for that will be
1780     // generated in EHStreamer::emitExceptionTable() later.  So, we just
1781     // borrow it's name here.
1782     TM->getStrList()->push_back(std::string(
1783         (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1784     SDValue Addr =
1785         DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1786     if (isPositionIndependent()) {
1787       Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1788                           VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1789       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1790       return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1791     }
1792     return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1793   }
1794   }
1795 }
1796
1797 static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1798   if (!isa<BuildVectorSDNode>(N))
1799     return false;
1800   const auto *BVN = cast<BuildVectorSDNode>(N);
1801
1802   // Find first non-undef insertion.
1803   unsigned Idx;
1804   for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1805     auto ElemV = BVN->getOperand(Idx);
1806     if (!ElemV->isUndef())
1807       break;
1808   }
1809   // Catch the (hypothetical) all-undef case.
1810   if (Idx == BVN->getNumOperands())
1811     return false;
1812   // Remember insertion.
1813   UniqueIdx = Idx++;
1814   // Verify that all other insertions are undef.
1815   for (; Idx < BVN->getNumOperands(); ++Idx) {
1816     auto ElemV = BVN->getOperand(Idx);
1817     if (!ElemV->isUndef())
1818       return false;
1819   }
1820   return true;
1821 }
1822
1823 static SDValue getSplatValue(SDNode *N) {
1824   if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1825     return BuildVec->getSplatValue();
1826   }
1827   return SDValue();
1828 }
1829
1830 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1831                                             SelectionDAG &DAG) const {
1832   VECustomDAG CDAG(DAG, Op);
1833   MVT ResultVT = Op.getSimpleValueType();
1834
1835   // If there is just one element, expand to INSERT_VECTOR_ELT.
1836   unsigned UniqueIdx;
1837   if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1838     SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1839     auto ElemV = Op->getOperand(UniqueIdx);
1840     SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1841     return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1842   }
1843
1844   // Else emit a broadcast.
1845   if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1846     unsigned NumEls = ResultVT.getVectorNumElements();
1847     auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1848     return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1849   }
1850
1851   // Expand
1852   return SDValue();
1853 }
1854
1855 TargetLowering::LegalizeAction
1856 VETargetLowering::getCustomOperationAction(SDNode &Op) const {
1857   // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1858   // these operations (transform nodes such that their AVL parameter refers to
1859   // packs of 64bit, instead of number of elements.
1860
1861   // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1862   // re-visit them.
1863   if (isPackingSupportOpcode(Op.getOpcode()))
1864     return Legal;
1865
1866   // Custom lower to legalize AVL for packed mode.
1867   if (isVVPOrVEC(Op.getOpcode()))
1868     return Custom;
1869   return Legal;
1870 }
1871
1872 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1873   LLVM_DEBUG(dbgs() << "::LowerOperation "; Op.dump(&DAG));
1874   unsigned Opcode = Op.getOpcode();
1875
1876   /// Scalar isel.
1877   switch (Opcode) {
1878   case ISD::ATOMIC_FENCE:
1879     return lowerATOMIC_FENCE(Op, DAG);
1880   case ISD::ATOMIC_SWAP:
1881     return lowerATOMIC_SWAP(Op, DAG);
1882   case ISD::BlockAddress:
1883     return lowerBlockAddress(Op, DAG);
1884   case ISD::ConstantPool:
1885     return lowerConstantPool(Op, DAG);
1886   case ISD::DYNAMIC_STACKALLOC:
1887     return lowerDYNAMIC_STACKALLOC(Op, DAG);
1888   case ISD::EH_SJLJ_LONGJMP:
1889     return lowerEH_SJLJ_LONGJMP(Op, DAG);
1890   case ISD::EH_SJLJ_SETJMP:
1891     return lowerEH_SJLJ_SETJMP(Op, DAG);
1892   case ISD::EH_SJLJ_SETUP_DISPATCH:
1893     return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1894   case ISD::FRAMEADDR:
1895     return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1896   case ISD::GlobalAddress:
1897     return lowerGlobalAddress(Op, DAG);
1898   case ISD::GlobalTLSAddress:
1899     return lowerGlobalTLSAddress(Op, DAG);
1900   case ISD::INTRINSIC_WO_CHAIN:
1901     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1902   case ISD::JumpTable:
1903     return lowerJumpTable(Op, DAG);
1904   case ISD::LOAD:
1905     return lowerLOAD(Op, DAG);
1906   case ISD::RETURNADDR:
1907     return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1908   case ISD::BUILD_VECTOR:
1909     return lowerBUILD_VECTOR(Op, DAG);
1910   case ISD::STORE:
1911     return lowerSTORE(Op, DAG);
1912   case ISD::VASTART:
1913     return lowerVASTART(Op, DAG);
1914   case ISD::VAARG:
1915     return lowerVAARG(Op, DAG);
1916
1917   case ISD::INSERT_VECTOR_ELT:
1918     return lowerINSERT_VECTOR_ELT(Op, DAG);
1919   case ISD::EXTRACT_VECTOR_ELT:
1920     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1921   }
1922
1923   /// Vector isel.
1924   if (ISD::isVPOpcode(Opcode))
1925     return lowerToVVP(Op, DAG);
1926
1927   switch (Opcode) {
1928   default:
1929     llvm_unreachable("Should not custom lower this!");
1930
1931   // Legalize the AVL of this internal node.
1932   case VEISD::VEC_BROADCAST:
1933 #define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1934 #include "VVPNodes.def"
1935     // AVL already legalized.
1936     if (getAnnotatedNodeAVL(Op).second)
1937       return Op;
1938     return legalizeInternalVectorOp(Op, DAG);
1939
1940     // Translate into a VEC_*/VVP_* layer operation.
1941   case ISD::MLOAD:
1942   case ISD::MSTORE:
1943 #define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1944 #include "VVPNodes.def"
1945     if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1946       return splitMaskArithmetic(Op, DAG);
1947     return lowerToVVP(Op, DAG);
1948   }
1949 }
1950 /// } Custom Lower
1951
1952 void VETargetLowering::ReplaceNodeResults(SDNode *N,
1953                                           SmallVectorImpl<SDValue> &Results,
1954                                           SelectionDAG &DAG) const {
1955   switch (N->getOpcode()) {
1956   case ISD::ATOMIC_SWAP:
1957     // Let LLVM expand atomic swap instruction through LowerOperation.
1958     return;
1959   default:
1960     LLVM_DEBUG(N->dumpr(&DAG));
1961     llvm_unreachable("Do not know how to custom type legalize this operation!");
1962   }
1963 }
1964
1965 /// JumpTable for VE.
1966 ///
1967 ///   VE cannot generate relocatable symbol in jump table.  VE cannot
1968 ///   generate expressions using symbols in both text segment and data
1969 ///   segment like below.
1970 ///             .4byte  .LBB0_2-.LJTI0_0
1971 ///   So, we generate offset from the top of function like below as
1972 ///   a custom label.
1973 ///             .4byte  .LBB0_2-<function name>
1974
1975 unsigned VETargetLowering::getJumpTableEncoding() const {
1976   // Use custom label for PIC.
1977   if (isPositionIndependent())
1978     return MachineJumpTableInfo::EK_Custom32;
1979
1980   // Otherwise, use the normal jump table encoding heuristics.
1981   return TargetLowering::getJumpTableEncoding();
1982 }
1983
1984 const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1985     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1986     unsigned Uid, MCContext &Ctx) const {
1987   assert(isPositionIndependent());
1988
1989   // Generate custom label for PIC like below.
1990   //    .4bytes  .LBB0_2-<function name>
1991   const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1992   MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1993   const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1994   return MCBinaryExpr::createSub(Value, Base, Ctx);
1995 }
1996
1997 SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1998                                                    SelectionDAG &DAG) const {
1999   assert(isPositionIndependent());
2000   SDLoc DL(Table);
2001   Function *Function = &DAG.getMachineFunction().getFunction();
2002   assert(Function != nullptr);
2003   auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
2004
2005   // In the jump table, we have following values in PIC mode.
2006   //    .4bytes  .LBB0_2-<function name>
2007   // We need to add this value and the address of this function to generate
2008   // .LBB0_2 label correctly under PIC mode.  So, we want to generate following
2009   // instructions:
2010   //     lea %reg, fun@gotoff_lo
2011   //     and %reg, %reg, (32)0
2012   //     lea.sl %reg, fun@gotoff_hi(%reg, %got)
2013   // In order to do so, we need to genarate correctly marked DAG node using
2014   // makeHiLoPair.
2015   SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
2016   SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
2017                               VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
2018   SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
2019   return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
2020 }
2021
2022 Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
2023                                       MachineBasicBlock::iterator I,
2024                                       MachineBasicBlock *TargetBB,
2025                                       const DebugLoc &DL) const {
2026   MachineFunction *MF = MBB.getParent();
2027   MachineRegisterInfo &MRI = MF->getRegInfo();
2028   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2029
2030   const TargetRegisterClass *RC = &VE::I64RegClass;
2031   Register Tmp1 = MRI.createVirtualRegister(RC);
2032   Register Tmp2 = MRI.createVirtualRegister(RC);
2033   Register Result = MRI.createVirtualRegister(RC);
2034
2035   if (isPositionIndependent()) {
2036     // Create following instructions for local linkage PIC code.
2037     //     lea %Tmp1, TargetBB@gotoff_lo
2038     //     and %Tmp2, %Tmp1, (32)0
2039     //     lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2040     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2041         .addImm(0)
2042         .addImm(0)
2043         .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
2044     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2045         .addReg(Tmp1, getKillRegState(true))
2046         .addImm(M0(32));
2047     BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2048         .addReg(VE::SX15)
2049         .addReg(Tmp2, getKillRegState(true))
2050         .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
2051   } else {
2052     // Create following instructions for non-PIC code.
2053     //     lea     %Tmp1, TargetBB@lo
2054     //     and     %Tmp2, %Tmp1, (32)0
2055     //     lea.sl  %Result, TargetBB@hi(%Tmp2)
2056     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2057         .addImm(0)
2058         .addImm(0)
2059         .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
2060     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2061         .addReg(Tmp1, getKillRegState(true))
2062         .addImm(M0(32));
2063     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2064         .addReg(Tmp2, getKillRegState(true))
2065         .addImm(0)
2066         .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
2067   }
2068   return Result;
2069 }
2070
2071 Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
2072                                          MachineBasicBlock::iterator I,
2073                                          StringRef Symbol, const DebugLoc &DL,
2074                                          bool IsLocal = false,
2075                                          bool IsCall = false) const {
2076   MachineFunction *MF = MBB.getParent();
2077   MachineRegisterInfo &MRI = MF->getRegInfo();
2078   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2079
2080   const TargetRegisterClass *RC = &VE::I64RegClass;
2081   Register Result = MRI.createVirtualRegister(RC);
2082
2083   if (isPositionIndependent()) {
2084     if (IsCall && !IsLocal) {
2085       // Create following instructions for non-local linkage PIC code function
2086       // calls.  These instructions uses IC and magic number -24, so we expand
2087       // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2088       //     lea %Reg, Symbol@plt_lo(-24)
2089       //     and %Reg, %Reg, (32)0
2090       //     sic %s16
2091       //     lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2092       BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2093           .addExternalSymbol("abort");
2094     } else if (IsLocal) {
2095       Register Tmp1 = MRI.createVirtualRegister(RC);
2096       Register Tmp2 = MRI.createVirtualRegister(RC);
2097       // Create following instructions for local linkage PIC code.
2098       //     lea %Tmp1, Symbol@gotoff_lo
2099       //     and %Tmp2, %Tmp1, (32)0
2100       //     lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2101       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2102           .addImm(0)
2103           .addImm(0)
2104           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
2105       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2106           .addReg(Tmp1, getKillRegState(true))
2107           .addImm(M0(32));
2108       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2109           .addReg(VE::SX15)
2110           .addReg(Tmp2, getKillRegState(true))
2111           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
2112     } else {
2113       Register Tmp1 = MRI.createVirtualRegister(RC);
2114       Register Tmp2 = MRI.createVirtualRegister(RC);
2115       // Create following instructions for not local linkage PIC code.
2116       //     lea %Tmp1, Symbol@got_lo
2117       //     and %Tmp2, %Tmp1, (32)0
2118       //     lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2119       //     ld %Result, 0(%Tmp3)
2120       Register Tmp3 = MRI.createVirtualRegister(RC);
2121       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2122           .addImm(0)
2123           .addImm(0)
2124           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
2125       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2126           .addReg(Tmp1, getKillRegState(true))
2127           .addImm(M0(32));
2128       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2129           .addReg(VE::SX15)
2130           .addReg(Tmp2, getKillRegState(true))
2131           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
2132       BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2133           .addReg(Tmp3, getKillRegState(true))
2134           .addImm(0)
2135           .addImm(0);
2136     }
2137   } else {
2138     Register Tmp1 = MRI.createVirtualRegister(RC);
2139     Register Tmp2 = MRI.createVirtualRegister(RC);
2140     // Create following instructions for non-PIC code.
2141     //     lea     %Tmp1, Symbol@lo
2142     //     and     %Tmp2, %Tmp1, (32)0
2143     //     lea.sl  %Result, Symbol@hi(%Tmp2)
2144     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2145         .addImm(0)
2146         .addImm(0)
2147         .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
2148     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2149         .addReg(Tmp1, getKillRegState(true))
2150         .addImm(M0(32));
2151     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2152         .addReg(Tmp2, getKillRegState(true))
2153         .addImm(0)
2154         .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2155   }
2156   return Result;
2157 }
2158
2159 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
2160                                               MachineBasicBlock *MBB,
2161                                               MachineBasicBlock *DispatchBB,
2162                                               int FI, int Offset) const {
2163   DebugLoc DL = MI.getDebugLoc();
2164   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2165
2166   Register LabelReg =
2167       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
2168
2169   // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2170   // referenced by longjmp (throw) later.
2171   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2172   addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2173   MIB.addReg(LabelReg, getKillRegState(true));
2174 }
2175
2176 MachineBasicBlock *
2177 VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
2178                                    MachineBasicBlock *MBB) const {
2179   DebugLoc DL = MI.getDebugLoc();
2180   MachineFunction *MF = MBB->getParent();
2181   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2182   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2183   MachineRegisterInfo &MRI = MF->getRegInfo();
2184
2185   const BasicBlock *BB = MBB->getBasicBlock();
2186   MachineFunction::iterator I = ++MBB->getIterator();
2187
2188   // Memory Reference.
2189   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2190                                            MI.memoperands_end());
2191   Register BufReg = MI.getOperand(1).getReg();
2192
2193   Register DstReg;
2194
2195   DstReg = MI.getOperand(0).getReg();
2196   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2197   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2198   (void)TRI;
2199   Register MainDestReg = MRI.createVirtualRegister(RC);
2200   Register RestoreDestReg = MRI.createVirtualRegister(RC);
2201
2202   // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2203   // instructions.  SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2204   //
2205   // ThisMBB:
2206   //   buf[3] = %s17 iff %s17 is used as BP
2207   //   buf[1] = RestoreMBB as IC after longjmp
2208   //   # SjLjSetup RestoreMBB
2209   //
2210   // MainMBB:
2211   //   v_main = 0
2212   //
2213   // SinkMBB:
2214   //   v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2215   //   ...
2216   //
2217   // RestoreMBB:
2218   //   %s17 = buf[3] = iff %s17 is used as BP
2219   //   v_restore = 1
2220   //   goto SinkMBB
2221
2222   MachineBasicBlock *ThisMBB = MBB;
2223   MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2224   MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2225   MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2226   MF->insert(I, MainMBB);
2227   MF->insert(I, SinkMBB);
2228   MF->push_back(RestoreMBB);
2229   RestoreMBB->setMachineBlockAddressTaken();
2230
2231   // Transfer the remainder of BB and its successor edges to SinkMBB.
2232   SinkMBB->splice(SinkMBB->begin(), MBB,
2233                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2234   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2235
2236   // ThisMBB:
2237   Register LabelReg =
2238       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2239
2240   // Store BP in buf[3] iff this function is using BP.
2241   const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2242   if (TFI->hasBP(*MF)) {
2243     MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2244     MIB.addReg(BufReg);
2245     MIB.addImm(0);
2246     MIB.addImm(24);
2247     MIB.addReg(VE::SX17);
2248     MIB.setMemRefs(MMOs);
2249   }
2250
2251   // Store IP in buf[1].
2252   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2253   MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2254   MIB.addImm(0);
2255   MIB.addImm(8);
2256   MIB.addReg(LabelReg, getKillRegState(true));
2257   MIB.setMemRefs(MMOs);
2258
2259   // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2260
2261   // Insert setup.
2262   MIB =
2263       BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2264
2265   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2266   MIB.addRegMask(RegInfo->getNoPreservedMask());
2267   ThisMBB->addSuccessor(MainMBB);
2268   ThisMBB->addSuccessor(RestoreMBB);
2269
2270   // MainMBB:
2271   BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2272       .addImm(0)
2273       .addImm(0)
2274       .addImm(0);
2275   MainMBB->addSuccessor(SinkMBB);
2276
2277   // SinkMBB:
2278   BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2279       .addReg(MainDestReg)
2280       .addMBB(MainMBB)
2281       .addReg(RestoreDestReg)
2282       .addMBB(RestoreMBB);
2283
2284   // RestoreMBB:
2285   // Restore BP from buf[3] iff this function is using BP.  The address of
2286   // buf is in SX10.
2287   // FIXME: Better to not use SX10 here
2288   if (TFI->hasBP(*MF)) {
2289     MachineInstrBuilder MIB =
2290         BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2291     MIB.addReg(VE::SX10);
2292     MIB.addImm(0);
2293     MIB.addImm(24);
2294     MIB.setMemRefs(MMOs);
2295   }
2296   BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2297       .addImm(0)
2298       .addImm(0)
2299       .addImm(1);
2300   BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2301   RestoreMBB->addSuccessor(SinkMBB);
2302
2303   MI.eraseFromParent();
2304   return SinkMBB;
2305 }
2306
2307 MachineBasicBlock *
2308 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2309                                     MachineBasicBlock *MBB) const {
2310   DebugLoc DL = MI.getDebugLoc();
2311   MachineFunction *MF = MBB->getParent();
2312   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2313   MachineRegisterInfo &MRI = MF->getRegInfo();
2314
2315   // Memory Reference.
2316   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2317                                            MI.memoperands_end());
2318   Register BufReg = MI.getOperand(0).getReg();
2319
2320   Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2321   // Since FP is only updated here but NOT referenced, it's treated as GPR.
2322   Register FP = VE::SX9;
2323   Register SP = VE::SX11;
2324
2325   MachineInstrBuilder MIB;
2326
2327   MachineBasicBlock *ThisMBB = MBB;
2328
2329   // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2330   //
2331   // ThisMBB:
2332   //   %fp = load buf[0]
2333   //   %jmp = load buf[1]
2334   //   %s10 = buf        ; Store an address of buf to SX10 for RestoreMBB
2335   //   %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2336   //   jmp %jmp
2337
2338   // Reload FP.
2339   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2340   MIB.addReg(BufReg);
2341   MIB.addImm(0);
2342   MIB.addImm(0);
2343   MIB.setMemRefs(MMOs);
2344
2345   // Reload IP.
2346   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2347   MIB.addReg(BufReg);
2348   MIB.addImm(0);
2349   MIB.addImm(8);
2350   MIB.setMemRefs(MMOs);
2351
2352   // Copy BufReg to SX10 for later use in setjmp.
2353   // FIXME: Better to not use SX10 here
2354   BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2355       .addReg(BufReg)
2356       .addImm(0);
2357
2358   // Reload SP.
2359   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2360   MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2361   MIB.addImm(0);
2362   MIB.addImm(16);
2363   MIB.setMemRefs(MMOs);
2364
2365   // Jump.
2366   BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2367       .addReg(Tmp, getKillRegState(true))
2368       .addImm(0);
2369
2370   MI.eraseFromParent();
2371   return ThisMBB;
2372 }
2373
2374 MachineBasicBlock *
2375 VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2376                                         MachineBasicBlock *BB) const {
2377   DebugLoc DL = MI.getDebugLoc();
2378   MachineFunction *MF = BB->getParent();
2379   MachineFrameInfo &MFI = MF->getFrameInfo();
2380   MachineRegisterInfo &MRI = MF->getRegInfo();
2381   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2382   int FI = MFI.getFunctionContextIndex();
2383
2384   // Get a mapping of the call site numbers to all of the landing pads they're
2385   // associated with.
2386   DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2387   unsigned MaxCSNum = 0;
2388   for (auto &MBB : *MF) {
2389     if (!MBB.isEHPad())
2390       continue;
2391
2392     MCSymbol *Sym = nullptr;
2393     for (const auto &MI : MBB) {
2394       if (MI.isDebugInstr())
2395         continue;
2396
2397       assert(MI.isEHLabel() && "expected EH_LABEL");
2398       Sym = MI.getOperand(0).getMCSymbol();
2399       break;
2400     }
2401
2402     if (!MF->hasCallSiteLandingPad(Sym))
2403       continue;
2404
2405     for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2406       CallSiteNumToLPad[CSI].push_back(&MBB);
2407       MaxCSNum = std::max(MaxCSNum, CSI);
2408     }
2409   }
2410
2411   // Get an ordered list of the machine basic blocks for the jump table.
2412   std::vector<MachineBasicBlock *> LPadList;
2413   SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2414   LPadList.reserve(CallSiteNumToLPad.size());
2415
2416   for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2417     for (auto &LP : CallSiteNumToLPad[CSI]) {
2418       LPadList.push_back(LP);
2419       InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2420     }
2421   }
2422
2423   assert(!LPadList.empty() &&
2424          "No landing pad destinations for the dispatch jump table!");
2425
2426   // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2427   //   %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2428   //
2429   // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2430   // First `i64` is callsite, so callsite is FI+8.
2431   static const int OffsetIC = 72;
2432   static const int OffsetCS = 8;
2433
2434   // Create the MBBs for the dispatch code like following:
2435   //
2436   // ThisMBB:
2437   //   Prepare DispatchBB address and store it to buf[1].
2438   //   ...
2439   //
2440   // DispatchBB:
2441   //   %s15 = GETGOT iff isPositionIndependent
2442   //   %callsite = load callsite
2443   //   brgt.l.t #size of callsites, %callsite, DispContBB
2444   //
2445   // TrapBB:
2446   //   Call abort.
2447   //
2448   // DispContBB:
2449   //   %breg = address of jump table
2450   //   %pc = load and calculate next pc from %breg and %callsite
2451   //   jmp %pc
2452
2453   // Shove the dispatch's address into the return slot in the function context.
2454   MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2455   DispatchBB->setIsEHPad(true);
2456
2457   // Trap BB will causes trap like `assert(0)`.
2458   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2459   DispatchBB->addSuccessor(TrapBB);
2460
2461   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2462   DispatchBB->addSuccessor(DispContBB);
2463
2464   // Insert MBBs.
2465   MF->push_back(DispatchBB);
2466   MF->push_back(DispContBB);
2467   MF->push_back(TrapBB);
2468
2469   // Insert code to call abort in the TrapBB.
2470   Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2471                                  /* Local */ false, /* Call */ true);
2472   BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2473       .addReg(Abort, getKillRegState(true))
2474       .addImm(0)
2475       .addImm(0);
2476
2477   // Insert code into the entry block that creates and registers the function
2478   // context.
2479   setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2480
2481   // Create the jump table and associated information
2482   unsigned JTE = getJumpTableEncoding();
2483   MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2484   unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2485
2486   const VERegisterInfo &RI = TII->getRegisterInfo();
2487   // Add a register mask with no preserved registers.  This results in all
2488   // registers being marked as clobbered.
2489   BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2490       .addRegMask(RI.getNoPreservedMask());
2491
2492   if (isPositionIndependent()) {
2493     // Force to generate GETGOT, since current implementation doesn't store GOT
2494     // register.
2495     BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2496   }
2497
2498   // IReg is used as an index in a memory operand and therefore can't be SP
2499   const TargetRegisterClass *RC = &VE::I64RegClass;
2500   Register IReg = MRI.createVirtualRegister(RC);
2501   addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2502                     OffsetCS);
2503   if (LPadList.size() < 64) {
2504     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2505         .addImm(VECC::CC_ILE)
2506         .addImm(LPadList.size())
2507         .addReg(IReg)
2508         .addMBB(TrapBB);
2509   } else {
2510     assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2511     Register TmpReg = MRI.createVirtualRegister(RC);
2512     BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2513         .addImm(0)
2514         .addImm(0)
2515         .addImm(LPadList.size());
2516     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2517         .addImm(VECC::CC_ILE)
2518         .addReg(TmpReg, getKillRegState(true))
2519         .addReg(IReg)
2520         .addMBB(TrapBB);
2521   }
2522
2523   Register BReg = MRI.createVirtualRegister(RC);
2524   Register Tmp1 = MRI.createVirtualRegister(RC);
2525   Register Tmp2 = MRI.createVirtualRegister(RC);
2526
2527   if (isPositionIndependent()) {
2528     // Create following instructions for local linkage PIC code.
2529     //     lea    %Tmp1, .LJTI0_0@gotoff_lo
2530     //     and    %Tmp2, %Tmp1, (32)0
2531     //     lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2532     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2533         .addImm(0)
2534         .addImm(0)
2535         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2536     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2537         .addReg(Tmp1, getKillRegState(true))
2538         .addImm(M0(32));
2539     BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2540         .addReg(VE::SX15)
2541         .addReg(Tmp2, getKillRegState(true))
2542         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2543   } else {
2544     // Create following instructions for non-PIC code.
2545     //     lea     %Tmp1, .LJTI0_0@lo
2546     //     and     %Tmp2, %Tmp1, (32)0
2547     //     lea.sl  %BReg, .LJTI0_0@hi(%Tmp2)
2548     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2549         .addImm(0)
2550         .addImm(0)
2551         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2552     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2553         .addReg(Tmp1, getKillRegState(true))
2554         .addImm(M0(32));
2555     BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2556         .addReg(Tmp2, getKillRegState(true))
2557         .addImm(0)
2558         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2559   }
2560
2561   switch (JTE) {
2562   case MachineJumpTableInfo::EK_BlockAddress: {
2563     // Generate simple block address code for no-PIC model.
2564     //     sll %Tmp1, %IReg, 3
2565     //     lds %TReg, 0(%Tmp1, %BReg)
2566     //     bcfla %TReg
2567
2568     Register TReg = MRI.createVirtualRegister(RC);
2569     Register Tmp1 = MRI.createVirtualRegister(RC);
2570
2571     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2572         .addReg(IReg, getKillRegState(true))
2573         .addImm(3);
2574     BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2575         .addReg(BReg, getKillRegState(true))
2576         .addReg(Tmp1, getKillRegState(true))
2577         .addImm(0);
2578     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2579         .addReg(TReg, getKillRegState(true))
2580         .addImm(0);
2581     break;
2582   }
2583   case MachineJumpTableInfo::EK_Custom32: {
2584     // Generate block address code using differences from the function pointer
2585     // for PIC model.
2586     //     sll %Tmp1, %IReg, 2
2587     //     ldl.zx %OReg, 0(%Tmp1, %BReg)
2588     //     Prepare function address in BReg2.
2589     //     adds.l %TReg, %BReg2, %OReg
2590     //     bcfla %TReg
2591
2592     assert(isPositionIndependent());
2593     Register OReg = MRI.createVirtualRegister(RC);
2594     Register TReg = MRI.createVirtualRegister(RC);
2595     Register Tmp1 = MRI.createVirtualRegister(RC);
2596
2597     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2598         .addReg(IReg, getKillRegState(true))
2599         .addImm(2);
2600     BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2601         .addReg(BReg, getKillRegState(true))
2602         .addReg(Tmp1, getKillRegState(true))
2603         .addImm(0);
2604     Register BReg2 =
2605         prepareSymbol(*DispContBB, DispContBB->end(),
2606                       DispContBB->getParent()->getName(), DL, /* Local */ true);
2607     BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2608         .addReg(OReg, getKillRegState(true))
2609         .addReg(BReg2, getKillRegState(true));
2610     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2611         .addReg(TReg, getKillRegState(true))
2612         .addImm(0);
2613     break;
2614   }
2615   default:
2616     llvm_unreachable("Unexpected jump table encoding");
2617   }
2618
2619   // Add the jump table entries as successors to the MBB.
2620   SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2621   for (auto &LP : LPadList)
2622     if (SeenMBBs.insert(LP).second)
2623       DispContBB->addSuccessor(LP);
2624
2625   // N.B. the order the invoke BBs are processed in doesn't matter here.
2626   SmallVector<MachineBasicBlock *, 64> MBBLPads;
2627   const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2628   for (MachineBasicBlock *MBB : InvokeBBs) {
2629     // Remove the landing pad successor from the invoke block and replace it
2630     // with the new dispatch block.
2631     // Keep a copy of Successors since it's modified inside the loop.
2632     SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2633                                                    MBB->succ_rend());
2634     // FIXME: Avoid quadratic complexity.
2635     for (auto *MBBS : Successors) {
2636       if (MBBS->isEHPad()) {
2637         MBB->removeSuccessor(MBBS);
2638         MBBLPads.push_back(MBBS);
2639       }
2640     }
2641
2642     MBB->addSuccessor(DispatchBB);
2643
2644     // Find the invoke call and mark all of the callee-saved registers as
2645     // 'implicit defined' so that they're spilled.  This prevents code from
2646     // moving instructions to before the EH block, where they will never be
2647     // executed.
2648     for (auto &II : reverse(*MBB)) {
2649       if (!II.isCall())
2650         continue;
2651
2652       DenseMap<Register, bool> DefRegs;
2653       for (auto &MOp : II.operands())
2654         if (MOp.isReg())
2655           DefRegs[MOp.getReg()] = true;
2656
2657       MachineInstrBuilder MIB(*MF, &II);
2658       for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2659         Register Reg = SavedRegs[RI];
2660         if (!DefRegs[Reg])
2661           MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2662       }
2663
2664       break;
2665     }
2666   }
2667
2668   // Mark all former landing pads as non-landing pads.  The dispatch is the only
2669   // landing pad now.
2670   for (auto &LP : MBBLPads)
2671     LP->setIsEHPad(false);
2672
2673   // The instruction is gone now.
2674   MI.eraseFromParent();
2675   return BB;
2676 }
2677
2678 MachineBasicBlock *
2679 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2680                                               MachineBasicBlock *BB) const {
2681   switch (MI.getOpcode()) {
2682   default:
2683     llvm_unreachable("Unknown Custom Instruction!");
2684   case VE::EH_SjLj_LongJmp:
2685     return emitEHSjLjLongJmp(MI, BB);
2686   case VE::EH_SjLj_SetJmp:
2687     return emitEHSjLjSetJmp(MI, BB);
2688   case VE::EH_SjLj_Setup_Dispatch:
2689     return emitSjLjDispatchBlock(MI, BB);
2690   }
2691 }
2692
2693 static bool isSimm7(SDValue V) {
2694   EVT VT = V.getValueType();
2695   if (VT.isVector())
2696     return false;
2697
2698   if (VT.isInteger()) {
2699     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2700       return isInt<7>(C->getSExtValue());
2701   } else if (VT.isFloatingPoint()) {
2702     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2703       if (VT == MVT::f32 || VT == MVT::f64) {
2704         const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2705         uint64_t Val = Imm.getSExtValue();
2706         if (Imm.getBitWidth() == 32)
2707           Val <<= 32; // Immediate value of float place at higher bits on VE.
2708         return isInt<7>(Val);
2709       }
2710     }
2711   }
2712   return false;
2713 }
2714
2715 static bool isMImm(SDValue V) {
2716   EVT VT = V.getValueType();
2717   if (VT.isVector())
2718     return false;
2719
2720   if (VT.isInteger()) {
2721     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2722       return isMImmVal(getImmVal(C));
2723   } else if (VT.isFloatingPoint()) {
2724     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2725       if (VT == MVT::f32) {
2726         // Float value places at higher bits, so ignore lower 32 bits.
2727         return isMImm32Val(getFpImmVal(C) >> 32);
2728       } else if (VT == MVT::f64) {
2729         return isMImmVal(getFpImmVal(C));
2730       }
2731     }
2732   }
2733   return false;
2734 }
2735
2736 static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2737   if (SrcVT.isFloatingPoint()) {
2738     if (SrcVT == MVT::f128)
2739       return VEISD::CMPQ;
2740     return VEISD::CMPF;
2741   }
2742   return isSignedIntSetCC(CC) ? VEISD::CMPI : VEISD::CMPU;
2743 }
2744
2745 static EVT decideCompType(EVT SrcVT) {
2746   if (SrcVT == MVT::f128)
2747     return MVT::f64;
2748   return SrcVT;
2749 }
2750
2751 static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC,
2752                                     bool WithCMov) {
2753   if (SrcVT.isFloatingPoint()) {
2754     // For the case of floating point setcc, only unordered comparison
2755     // or general comparison with -enable-no-nans-fp-math option reach
2756     // here, so it is safe even if values are NaN.  Only f128 doesn't
2757     // safe since VE uses f64 result of f128 comparison.
2758     return SrcVT != MVT::f128;
2759   }
2760   if (isIntEqualitySetCC(CC)) {
2761     // For the case of equal or not equal, it is safe without comparison with 0.
2762     return true;
2763   }
2764   if (WithCMov) {
2765     // For the case of integer setcc with cmov, all signed comparison with 0
2766     // are safe.
2767     return isSignedIntSetCC(CC);
2768   }
2769   // For the case of integer setcc, only signed 64 bits comparison is safe.
2770   // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2771   // less than 0 witout CMPU.  For 32 bits, other half of 32 bits are
2772   // uncoditional, so it is not safe too without CMPI..
2773   return isSignedIntSetCC(CC) && SrcVT == MVT::i64;
2774 }
2775
2776 static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS,
2777                                   ISD::CondCode CC, bool WithCMov,
2778                                   const SDLoc &DL, SelectionDAG &DAG) {
2779   // Compare values.  If RHS is 0 and it is safe to calculate without
2780   // comparison, we don't generate an instruction for comparison.
2781   EVT CompVT = decideCompType(VT);
2782   if (CompVT == VT && safeWithoutCompWithNull(VT, CC, WithCMov) &&
2783       (isNullConstant(RHS) || isNullFPConstant(RHS))) {
2784     return LHS;
2785   }
2786   return DAG.getNode(decideComp(VT, CC), DL, CompVT, LHS, RHS);
2787 }
2788
2789 SDValue VETargetLowering::combineSelect(SDNode *N,
2790                                         DAGCombinerInfo &DCI) const {
2791   assert(N->getOpcode() == ISD::SELECT &&
2792          "Should be called with a SELECT node");
2793   ISD::CondCode CC = ISD::CondCode::SETNE;
2794   SDValue Cond = N->getOperand(0);
2795   SDValue True = N->getOperand(1);
2796   SDValue False = N->getOperand(2);
2797
2798   // We handle only scalar SELECT.
2799   EVT VT = N->getValueType(0);
2800   if (VT.isVector())
2801     return SDValue();
2802
2803   // Peform combineSelect after leagalize DAG.
2804   if (!DCI.isAfterLegalizeDAG())
2805     return SDValue();
2806
2807   EVT VT0 = Cond.getValueType();
2808   if (isMImm(True)) {
2809     // VE's condition move can handle MImm in True clause, so nothing to do.
2810   } else if (isMImm(False)) {
2811     // VE's condition move can handle MImm in True clause, so swap True and
2812     // False clauses if False has MImm value.  And, update condition code.
2813     std::swap(True, False);
2814     CC = getSetCCInverse(CC, VT0);
2815   }
2816
2817   SDLoc DL(N);
2818   SelectionDAG &DAG = DCI.DAG;
2819   VECC::CondCode VECCVal;
2820   if (VT0.isFloatingPoint()) {
2821     VECCVal = fpCondCode2Fcc(CC);
2822   } else {
2823     VECCVal = intCondCode2Icc(CC);
2824   }
2825   SDValue Ops[] = {Cond, True, False,
2826                    DAG.getConstant(VECCVal, DL, MVT::i32)};
2827   return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2828 }
2829
2830 SDValue VETargetLowering::combineSelectCC(SDNode *N,
2831                                           DAGCombinerInfo &DCI) const {
2832   assert(N->getOpcode() == ISD::SELECT_CC &&
2833          "Should be called with a SELECT_CC node");
2834   ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2835   SDValue LHS = N->getOperand(0);
2836   SDValue RHS = N->getOperand(1);
2837   SDValue True = N->getOperand(2);
2838   SDValue False = N->getOperand(3);
2839
2840   // We handle only scalar SELECT_CC.
2841   EVT VT = N->getValueType(0);
2842   if (VT.isVector())
2843     return SDValue();
2844
2845   // Peform combineSelectCC after leagalize DAG.
2846   if (!DCI.isAfterLegalizeDAG())
2847     return SDValue();
2848
2849   // We handle only i32/i64/f32/f64/f128 comparisons.
2850   EVT LHSVT = LHS.getValueType();
2851   assert(LHSVT == RHS.getValueType());
2852   switch (LHSVT.getSimpleVT().SimpleTy) {
2853   case MVT::i32:
2854   case MVT::i64:
2855   case MVT::f32:
2856   case MVT::f64:
2857   case MVT::f128:
2858     break;
2859   default:
2860     // Return SDValue to let llvm handle other types.
2861     return SDValue();
2862   }
2863
2864   if (isMImm(RHS)) {
2865     // VE's comparison can handle MImm in RHS, so nothing to do.
2866   } else if (isSimm7(RHS)) {
2867     // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2868     // update condition code.
2869     std::swap(LHS, RHS);
2870     CC = getSetCCSwappedOperands(CC);
2871   }
2872   if (isMImm(True)) {
2873     // VE's condition move can handle MImm in True clause, so nothing to do.
2874   } else if (isMImm(False)) {
2875     // VE's condition move can handle MImm in True clause, so swap True and
2876     // False clauses if False has MImm value.  And, update condition code.
2877     std::swap(True, False);
2878     CC = getSetCCInverse(CC, LHSVT);
2879   }
2880
2881   SDLoc DL(N);
2882   SelectionDAG &DAG = DCI.DAG;
2883
2884   bool WithCMov = true;
2885   SDValue CompNode = generateComparison(LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2886
2887   VECC::CondCode VECCVal;
2888   if (LHSVT.isFloatingPoint()) {
2889     VECCVal = fpCondCode2Fcc(CC);
2890   } else {
2891     VECCVal = intCondCode2Icc(CC);
2892   }
2893   SDValue Ops[] = {CompNode, True, False,
2894                    DAG.getConstant(VECCVal, DL, MVT::i32)};
2895   return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2896 }
2897
2898 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
2899 static bool isI32Insn(const SDNode *User, const SDNode *N) {
2900   switch (User->getOpcode()) {
2901   default:
2902     return false;
2903   case ISD::ADD:
2904   case ISD::SUB:
2905   case ISD::MUL:
2906   case ISD::SDIV:
2907   case ISD::UDIV:
2908   case ISD::SETCC:
2909   case ISD::SMIN:
2910   case ISD::SMAX:
2911   case ISD::SHL:
2912   case ISD::SRA:
2913   case ISD::BSWAP:
2914   case ISD::SINT_TO_FP:
2915   case ISD::UINT_TO_FP:
2916   case ISD::BR_CC:
2917   case ISD::BITCAST:
2918   case ISD::ATOMIC_CMP_SWAP:
2919   case ISD::ATOMIC_SWAP:
2920   case VEISD::CMPU:
2921   case VEISD::CMPI:
2922     return true;
2923   case ISD::SRL:
2924     if (N->getOperand(0).getOpcode() != ISD::SRL)
2925       return true;
2926     // (srl (trunc (srl ...))) may be optimized by combining srl, so
2927     // doesn't optimize trunc now.
2928     return false;
2929   case ISD::SELECT_CC:
2930     if (User->getOperand(2).getNode() != N &&
2931         User->getOperand(3).getNode() != N)
2932       return true;
2933     return isI32InsnAllUses(User, N);
2934   case VEISD::CMOV:
2935     // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2936     // However, trunc in true or false clauses is not safe.
2937     if (User->getOperand(1).getNode() != N &&
2938         User->getOperand(2).getNode() != N &&
2939         isa<ConstantSDNode>(User->getOperand(3))) {
2940       VECC::CondCode VECCVal =
2941           static_cast<VECC::CondCode>(User->getConstantOperandVal(3));
2942       return isIntVECondCode(VECCVal);
2943     }
2944     [[fallthrough]];
2945   case ISD::AND:
2946   case ISD::OR:
2947   case ISD::XOR:
2948   case ISD::SELECT:
2949   case ISD::CopyToReg:
2950     // Check all use of selections, bit operations, and copies.  If all of them
2951     // are safe, optimize truncate to extract_subreg.
2952     return isI32InsnAllUses(User, N);
2953   }
2954 }
2955
2956 static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2957   // Check all use of User node.  If all of them are safe, optimize
2958   // truncate to extract_subreg.
2959   for (const SDNode *U : User->uses()) {
2960     switch (U->getOpcode()) {
2961     default:
2962       // If the use is an instruction which treats the source operand as i32,
2963       // it is safe to avoid truncate here.
2964       if (isI32Insn(U, N))
2965         continue;
2966       break;
2967     case ISD::ANY_EXTEND:
2968     case ISD::SIGN_EXTEND:
2969     case ISD::ZERO_EXTEND: {
2970       // Special optimizations to the combination of ext and trunc.
2971       // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2972       // since this truncate instruction clears higher 32 bits which is filled
2973       // by one of ext instructions later.
2974       assert(N->getValueType(0) == MVT::i32 &&
2975              "find truncate to not i32 integer");
2976       if (User->getOpcode() == ISD::SELECT_CC ||
2977           User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2978         continue;
2979       break;
2980     }
2981     }
2982     return false;
2983   }
2984   return true;
2985 }
2986
2987 // Optimize TRUNCATE in DAG combining.  Optimizing it in CUSTOM lower is
2988 // sometime too early.  Optimizing it in DAG pattern matching in VEInstrInfo.td
2989 // is sometime too late.  So, doing it at here.
2990 SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2991                                           DAGCombinerInfo &DCI) const {
2992   assert(N->getOpcode() == ISD::TRUNCATE &&
2993          "Should be called with a TRUNCATE node");
2994
2995   SelectionDAG &DAG = DCI.DAG;
2996   SDLoc DL(N);
2997   EVT VT = N->getValueType(0);
2998
2999   // We prefer to do this when all types are legal.
3000   if (!DCI.isAfterLegalizeDAG())
3001     return SDValue();
3002
3003   // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
3004   if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
3005       isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
3006       isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
3007     return SDValue();
3008
3009   // Check all use of this TRUNCATE.
3010   for (const SDNode *User : N->uses()) {
3011     // Make sure that we're not going to replace TRUNCATE for non i32
3012     // instructions.
3013     //
3014     // FIXME: Although we could sometimes handle this, and it does occur in
3015     // practice that one of the condition inputs to the select is also one of
3016     // the outputs, we currently can't deal with this.
3017     if (isI32Insn(User, N))
3018       continue;
3019
3020     return SDValue();
3021   }
3022
3023   SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3024   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
3025                                     N->getOperand(0), SubI32),
3026                  0);
3027 }
3028
3029 SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
3030                                             DAGCombinerInfo &DCI) const {
3031   switch (N->getOpcode()) {
3032   default:
3033     break;
3034   case ISD::SELECT:
3035     return combineSelect(N, DCI);
3036   case ISD::SELECT_CC:
3037     return combineSelectCC(N, DCI);
3038   case ISD::TRUNCATE:
3039     return combineTRUNCATE(N, DCI);
3040   }
3041
3042   return SDValue();
3043 }
3044
3045 //===----------------------------------------------------------------------===//
3046 // VE Inline Assembly Support
3047 //===----------------------------------------------------------------------===//
3048
3049 VETargetLowering::ConstraintType
3050 VETargetLowering::getConstraintType(StringRef Constraint) const {
3051   if (Constraint.size() == 1) {
3052     switch (Constraint[0]) {
3053     default:
3054       break;
3055     case 'v': // vector registers
3056       return C_RegisterClass;
3057     }
3058   }
3059   return TargetLowering::getConstraintType(Constraint);
3060 }
3061
3062 std::pair<unsigned, const TargetRegisterClass *>
3063 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
3064                                                StringRef Constraint,
3065                                                MVT VT) const {
3066   const TargetRegisterClass *RC = nullptr;
3067   if (Constraint.size() == 1) {
3068     switch (Constraint[0]) {
3069     default:
3070       return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3071     case 'r':
3072       RC = &VE::I64RegClass;
3073       break;
3074     case 'v':
3075       RC = &VE::V64RegClass;
3076       break;
3077     }
3078     return std::make_pair(0U, RC);
3079   }
3080
3081   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3082 }
3083
3084 //===----------------------------------------------------------------------===//
3085 // VE Target Optimization Support
3086 //===----------------------------------------------------------------------===//
3087
3088 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
3089   // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3090   if (isJumpTableRelative())
3091     return 8;
3092
3093   return TargetLowering::getMinimumJumpTableEntries();
3094 }
3095
3096 bool VETargetLowering::hasAndNot(SDValue Y) const {
3097   EVT VT = Y.getValueType();
3098
3099   // VE doesn't have vector and not instruction.
3100   if (VT.isVector())
3101     return false;
3102
3103   // VE allows different immediate values for X and Y where ~X & Y.
3104   // Only simm7 works for X, and only mimm works for Y on VE.  However, this
3105   // function is used to check whether an immediate value is OK for and-not
3106   // instruction as both X and Y.  Generating additional instruction to
3107   // retrieve an immediate value is no good since the purpose of this
3108   // function is to convert a series of 3 instructions to another series of
3109   // 3 instructions with better parallelism.  Therefore, we return false
3110   // for all immediate values now.
3111   // FIXME: Change hasAndNot function to have two operands to make it work
3112   //        correctly with Aurora VE.
3113   if (isa<ConstantSDNode>(Y))
3114     return false;
3115
3116   // It's ok for generic registers.
3117   return true;
3118 }
3119
3120 SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
3121                                                   SelectionDAG &DAG) const {
3122   assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3123   MVT VT = Op.getOperand(0).getSimpleValueType();
3124
3125   // Special treatment for packed V64 types.
3126   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3127   (void)VT;
3128   // Example of codes:
3129   //   %packed_v = extractelt %vr, %idx / 2
3130   //   %v = %packed_v >> (%idx % 2 * 32)
3131   //   %res = %v & 0xffffffff
3132
3133   SDValue Vec = Op.getOperand(0);
3134   SDValue Idx = Op.getOperand(1);
3135   SDLoc DL(Op);
3136   SDValue Result = Op;
3137   if (false /* Idx->isConstant() */) {
3138     // TODO: optimized implementation using constant values
3139   } else {
3140     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3141     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3142     SDValue PackedElt =
3143         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3144     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3145     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3146     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3147     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3148     PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
3149     SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
3150     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3151     SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3152     Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
3153                                         MVT::i32, PackedElt, SubI32),
3154                      0);
3155
3156     if (Op.getSimpleValueType() == MVT::f32) {
3157       Result = DAG.getBitcast(MVT::f32, Result);
3158     } else {
3159       assert(Op.getSimpleValueType() == MVT::i32);
3160     }
3161   }
3162   return Result;
3163 }
3164
3165 SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
3166                                                  SelectionDAG &DAG) const {
3167   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3168   MVT VT = Op.getOperand(0).getSimpleValueType();
3169
3170   // Special treatment for packed V64 types.
3171   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3172   (void)VT;
3173   // The v512i32 and v512f32 starts from upper bits (0..31).  This "upper
3174   // bits" required `val << 32` from C implementation's point of view.
3175   //
3176   // Example of codes:
3177   //   %packed_elt = extractelt %vr, (%idx >> 1)
3178   //   %shift = ((%idx & 1) ^ 1) << 5
3179   //   %packed_elt &= 0xffffffff00000000 >> shift
3180   //   %packed_elt |= (zext %val) << shift
3181   //   %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3182
3183   SDLoc DL(Op);
3184   SDValue Vec = Op.getOperand(0);
3185   SDValue Val = Op.getOperand(1);
3186   SDValue Idx = Op.getOperand(2);
3187   if (Idx.getSimpleValueType() == MVT::i32)
3188     Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
3189   if (Val.getSimpleValueType() == MVT::f32)
3190     Val = DAG.getBitcast(MVT::i32, Val);
3191   assert(Val.getSimpleValueType() == MVT::i32);
3192   Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3193
3194   SDValue Result = Op;
3195   if (false /* Idx->isConstant()*/) {
3196     // TODO: optimized implementation using constant values
3197   } else {
3198     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3199     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3200     SDValue PackedElt =
3201         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3202     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3203     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3204     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3205     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3206     SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
3207     Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
3208     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3209     Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
3210     PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
3211     Result =
3212         SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
3213                                    {HalfIdx, PackedElt, Vec}),
3214                 0);
3215   }
3216   return Result;
3217 }