llvm/lib/Target/VE/VEISelLowering.cpp

   1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file implements the interfaces that VE uses to lower LLVM code into a
  10 // selection DAG.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "VEISelLowering.h"
  15 #include "MCTargetDesc/VEMCExpr.h"
  16 #include "VEInstrBuilder.h"
  17 #include "VEMachineFunctionInfo.h"
  18 #include "VERegisterInfo.h"
  19 #include "VETargetMachine.h"
  20 #include "llvm/ADT/StringSwitch.h"
  21 #include "llvm/CodeGen/CallingConvLower.h"
  22 #include "llvm/CodeGen/MachineFrameInfo.h"
  23 #include "llvm/CodeGen/MachineFunction.h"
  24 #include "llvm/CodeGen/MachineInstrBuilder.h"
  25 #include "llvm/CodeGen/MachineJumpTableInfo.h"
  26 #include "llvm/CodeGen/MachineModuleInfo.h"
  27 #include "llvm/CodeGen/MachineRegisterInfo.h"
  28 #include "llvm/CodeGen/SelectionDAG.h"
  29 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  30 #include "llvm/IR/DerivedTypes.h"
  31 #include "llvm/IR/Function.h"
  32 #include "llvm/IR/IRBuilder.h"
  33 #include "llvm/IR/Module.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Support/KnownBits.h"
  36 using namespace llvm;
  37
  38 #define DEBUG_TYPE "ve-lower"
  39
  40 //===----------------------------------------------------------------------===//
  41 // Calling Convention Implementation
  42 //===----------------------------------------------------------------------===//
  43
  44 #include "VEGenCallingConv.inc"
  45
  46 CCAssignFn *getReturnCC(CallingConv::ID CallConv) {
  47   switch (CallConv) {
  48   default:
  49     return RetCC_VE_C;
  50   case CallingConv::Fast:
  51     return RetCC_VE_Fast;
  52   }
  53 }
  54
  55 CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
  56   if (IsVarArg)
  57     return CC_VE2;
  58   switch (CallConv) {
  59   default:
  60     return CC_VE_C;
  61   case CallingConv::Fast:
  62     return CC_VE_Fast;
  63   }
  64 }
  65
  66 bool VETargetLowering::CanLowerReturn(
  67     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
  68     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
  69   CCAssignFn *RetCC = getReturnCC(CallConv);
  70   SmallVector<CCValAssign, 16> RVLocs;
  71   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
  72   return CCInfo.CheckReturn(Outs, RetCC);
  73 }
  74
  75 static const MVT AllVectorVTs[] = {MVT::v256i32, MVT::v512i32, MVT::v256i64,
  76                                    MVT::v256f32, MVT::v512f32, MVT::v256f64};
  77
  78 static const MVT AllPackedVTs[] = {MVT::v512i32, MVT::v512f32};
  79
  80 void VETargetLowering::initRegisterClasses() {
  81   // Set up the register classes.
  82   addRegisterClass(MVT::i32, &VE::I32RegClass);
  83   addRegisterClass(MVT::i64, &VE::I64RegClass);
  84   addRegisterClass(MVT::f32, &VE::F32RegClass);
  85   addRegisterClass(MVT::f64, &VE::I64RegClass);
  86   addRegisterClass(MVT::f128, &VE::F128RegClass);
  87
  88   if (Subtarget->enableVPU()) {
  89     for (MVT VecVT : AllVectorVTs)
  90       addRegisterClass(VecVT, &VE::V64RegClass);
  91     addRegisterClass(MVT::v256i1, &VE::VMRegClass);
  92     addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
  93   }
  94 }
  95
  96 void VETargetLowering::initSPUActions() {
  97   const auto &TM = getTargetMachine();
  98   /// Load & Store {
  99
 100   // VE doesn't have i1 sign extending load.
 101   for (MVT VT : MVT::integer_valuetypes()) {
 102     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
 103     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
 104     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
 105     setTruncStoreAction(VT, MVT::i1, Expand);
 106   }
 107
 108   // VE doesn't have floating point extload/truncstore, so expand them.
 109   for (MVT FPVT : MVT::fp_valuetypes()) {
 110     for (MVT OtherFPVT : MVT::fp_valuetypes()) {
 111       setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
 112       setTruncStoreAction(FPVT, OtherFPVT, Expand);
 113     }
 114   }
 115
 116   // VE doesn't have fp128 load/store, so expand them in custom lower.
 117   setOperationAction(ISD::LOAD, MVT::f128, Custom);
 118   setOperationAction(ISD::STORE, MVT::f128, Custom);
 119
 120   /// } Load & Store
 121
 122   // Custom legalize address nodes into LO/HI parts.
 123   MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
 124   setOperationAction(ISD::BlockAddress, PtrVT, Custom);
 125   setOperationAction(ISD::GlobalAddress, PtrVT, Custom);
 126   setOperationAction(ISD::GlobalTLSAddress, PtrVT, Custom);
 127   setOperationAction(ISD::ConstantPool, PtrVT, Custom);
 128   setOperationAction(ISD::JumpTable, PtrVT, Custom);
 129
 130   /// VAARG handling {
 131   setOperationAction(ISD::VASTART, MVT::Other, Custom);
 132   // VAARG needs to be lowered to access with 8 bytes alignment.
 133   setOperationAction(ISD::VAARG, MVT::Other, Custom);
 134   // Use the default implementation.
 135   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
 136   setOperationAction(ISD::VAEND, MVT::Other, Expand);
 137   /// } VAARG handling
 138
 139   /// Stack {
 140   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
 141   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Custom);
 142
 143   // Use the default implementation.
 144   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
 145   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
 146   /// } Stack
 147
 148   /// Branch {
 149
 150   // VE doesn't have BRCOND
 151   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
 152
 153   // BR_JT is not implemented yet.
 154   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 155
 156   /// } Branch
 157
 158   /// Int Ops {
 159   for (MVT IntVT : {MVT::i32, MVT::i64}) {
 160     // VE has no REM or DIVREM operations.
 161     setOperationAction(ISD::UREM, IntVT, Expand);
 162     setOperationAction(ISD::SREM, IntVT, Expand);
 163     setOperationAction(ISD::SDIVREM, IntVT, Expand);
 164     setOperationAction(ISD::UDIVREM, IntVT, Expand);
 165
 166     // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
 167     setOperationAction(ISD::SHL_PARTS, IntVT, Expand);
 168     setOperationAction(ISD::SRA_PARTS, IntVT, Expand);
 169     setOperationAction(ISD::SRL_PARTS, IntVT, Expand);
 170
 171     // VE has no MULHU/S or U/SMUL_LOHI operations.
 172     // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
 173     setOperationAction(ISD::MULHU, IntVT, Expand);
 174     setOperationAction(ISD::MULHS, IntVT, Expand);
 175     setOperationAction(ISD::UMUL_LOHI, IntVT, Expand);
 176     setOperationAction(ISD::SMUL_LOHI, IntVT, Expand);
 177
 178     // VE has no CTTZ, ROTL, ROTR operations.
 179     setOperationAction(ISD::CTTZ, IntVT, Expand);
 180     setOperationAction(ISD::ROTL, IntVT, Expand);
 181     setOperationAction(ISD::ROTR, IntVT, Expand);
 182
 183     // VE has 64 bits instruction which works as i64 BSWAP operation.  This
 184     // instruction works fine as i32 BSWAP operation with an additional
 185     // parameter.  Use isel patterns to lower BSWAP.
 186     setOperationAction(ISD::BSWAP, IntVT, Legal);
 187
 188     // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
 189     // operations.  Use isel patterns for i64, promote for i32.
 190     LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
 191     setOperationAction(ISD::BITREVERSE, IntVT, Act);
 192     setOperationAction(ISD::CTLZ, IntVT, Act);
 193     setOperationAction(ISD::CTLZ_ZERO_UNDEF, IntVT, Act);
 194     setOperationAction(ISD::CTPOP, IntVT, Act);
 195
 196     // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
 197     // Use isel patterns for i64, promote for i32.
 198     setOperationAction(ISD::AND, IntVT, Act);
 199     setOperationAction(ISD::OR, IntVT, Act);
 200     setOperationAction(ISD::XOR, IntVT, Act);
 201   }
 202   /// } Int Ops
 203
 204   /// Conversion {
 205   // VE doesn't have instructions for fp<->uint, so expand them by llvm
 206   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); // use i64
 207   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Promote); // use i64
 208   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 209   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
 210
 211   // fp16 not supported
 212   for (MVT FPVT : MVT::fp_valuetypes()) {
 213     setOperationAction(ISD::FP16_TO_FP, FPVT, Expand);
 214     setOperationAction(ISD::FP_TO_FP16, FPVT, Expand);
 215   }
 216   /// } Conversion
 217
 218   /// Floating-point Ops {
 219   /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
 220   ///       and fcmp.
 221
 222   // VE doesn't have following floating point operations.
 223   for (MVT VT : MVT::fp_valuetypes()) {
 224     setOperationAction(ISD::FNEG, VT, Expand);
 225     setOperationAction(ISD::FREM, VT, Expand);
 226   }
 227
 228   // VE doesn't have fdiv of f128.
 229   setOperationAction(ISD::FDIV, MVT::f128, Expand);
 230
 231   for (MVT FPVT : {MVT::f32, MVT::f64}) {
 232     // f32 and f64 uses ConstantFP.  f128 uses ConstantPool.
 233     setOperationAction(ISD::ConstantFP, FPVT, Legal);
 234   }
 235   /// } Floating-point Ops
 236
 237   /// Floating-point math functions {
 238
 239   // VE doesn't have following floating point math functions.
 240   for (MVT VT : MVT::fp_valuetypes()) {
 241     setOperationAction(ISD::FABS, VT, Expand);
 242     setOperationAction(ISD::FCOPYSIGN, VT, Expand);
 243     setOperationAction(ISD::FCOS, VT, Expand);
 244     setOperationAction(ISD::FSIN, VT, Expand);
 245     setOperationAction(ISD::FSQRT, VT, Expand);
 246   }
 247
 248   /// } Floating-point math functions
 249
 250   /// Atomic instructions {
 251
 252   setMaxAtomicSizeInBitsSupported(64);
 253   setMinCmpXchgSizeInBits(32);
 254   setSupportsUnalignedAtomics(false);
 255
 256   // Use custom inserter for ATOMIC_FENCE.
 257   setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
 258
 259   // Other atomic instructions.
 260   for (MVT VT : MVT::integer_valuetypes()) {
 261     // Support i8/i16 atomic swap.
 262     setOperationAction(ISD::ATOMIC_SWAP, VT, Custom);
 263
 264     // FIXME: Support "atmam" instructions.
 265     setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Expand);
 266     setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Expand);
 267     setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Expand);
 268     setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Expand);
 269
 270     // VE doesn't have follwing instructions.
 271     setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Expand);
 272     setOperationAction(ISD::ATOMIC_LOAD_CLR, VT, Expand);
 273     setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Expand);
 274     setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
 275     setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
 276     setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
 277     setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
 278     setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
 279   }
 280
 281   /// } Atomic instructions
 282
 283   /// SJLJ instructions {
 284   setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
 285   setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
 286   setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
 287   if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
 288     setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
 289   /// } SJLJ instructions
 290
 291   // Intrinsic instructions
 292   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 293 }
 294
 295 void VETargetLowering::initVPUActions() {
 296   for (MVT LegalVecVT : AllVectorVTs) {
 297     setOperationAction(ISD::BUILD_VECTOR, LegalVecVT, Custom);
 298     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalVecVT, Legal);
 299     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalVecVT, Legal);
 300     // Translate all vector instructions with legal element types to VVP_*
 301     // nodes.
 302     // TODO We will custom-widen into VVP_* nodes in the future. While we are
 303     // buildling the infrastructure for this, we only do this for legal vector
 304     // VTs.
 305 #define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME)                                     \
 306   setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
 307 #define ADD_VVP_OP(VVP_NAME, ISD_NAME)                                         \
 308   setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
 309 #include "VVPNodes.def"
 310   }
 311
 312   for (MVT LegalPackedVT : AllPackedVTs) {
 313     setOperationAction(ISD::INSERT_VECTOR_ELT, LegalPackedVT, Custom);
 314     setOperationAction(ISD::EXTRACT_VECTOR_ELT, LegalPackedVT, Custom);
 315   }
 316 }
 317
 318 SDValue
 319 VETargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 320                               bool IsVarArg,
 321                               const SmallVectorImpl<ISD::OutputArg> &Outs,
 322                               const SmallVectorImpl<SDValue> &OutVals,
 323                               const SDLoc &DL, SelectionDAG &DAG) const {
 324   // CCValAssign - represent the assignment of the return value to locations.
 325   SmallVector<CCValAssign, 16> RVLocs;
 326
 327   // CCState - Info about the registers and stack slot.
 328   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
 329                  *DAG.getContext());
 330
 331   // Analyze return values.
 332   CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
 333
 334   SDValue Flag;
 335   SmallVector<SDValue, 4> RetOps(1, Chain);
 336
 337   // Copy the result values into the output registers.
 338   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 339     CCValAssign &VA = RVLocs[i];
 340     assert(VA.isRegLoc() && "Can only return in registers!");
 341     assert(!VA.needsCustom() && "Unexpected custom lowering");
 342     SDValue OutVal = OutVals[i];
 343
 344     // Integer return values must be sign or zero extended by the callee.
 345     switch (VA.getLocInfo()) {
 346     case CCValAssign::Full:
 347       break;
 348     case CCValAssign::SExt:
 349       OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
 350       break;
 351     case CCValAssign::ZExt:
 352       OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
 353       break;
 354     case CCValAssign::AExt:
 355       OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
 356       break;
 357     case CCValAssign::BCvt: {
 358       // Convert a float return value to i64 with padding.
 359       //     63     31   0
 360       //    +------+------+
 361       //    | float|   0  |
 362       //    +------+------+
 363       assert(VA.getLocVT() == MVT::i64);
 364       assert(VA.getValVT() == MVT::f32);
 365       SDValue Undef = SDValue(
 366           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
 367       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 368       OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
 369                                           MVT::i64, Undef, OutVal, Sub_f32),
 370                        0);
 371       break;
 372     }
 373     default:
 374       llvm_unreachable("Unknown loc info!");
 375     }
 376
 377     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
 378
 379     // Guarantee that all emitted copies are stuck together with flags.
 380     Flag = Chain.getValue(1);
 381     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
 382   }
 383
 384   RetOps[0] = Chain; // Update chain.
 385
 386   // Add the flag if we have it.
 387   if (Flag.getNode())
 388     RetOps.push_back(Flag);
 389
 390   return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
 391 }
 392
 393 SDValue VETargetLowering::LowerFormalArguments(
 394     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
 395     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
 396     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
 397   MachineFunction &MF = DAG.getMachineFunction();
 398
 399   // Get the base offset of the incoming arguments stack space.
 400   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
 401   // Get the size of the preserved arguments area
 402   unsigned ArgsPreserved = 64;
 403
 404   // Analyze arguments according to CC_VE.
 405   SmallVector<CCValAssign, 16> ArgLocs;
 406   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
 407                  *DAG.getContext());
 408   // Allocate the preserved area first.
 409   CCInfo.AllocateStack(ArgsPreserved, Align(8));
 410   // We already allocated the preserved area, so the stack offset computed
 411   // by CC_VE would be correct now.
 412   CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
 413
 414   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 415     CCValAssign &VA = ArgLocs[i];
 416     assert(!VA.needsCustom() && "Unexpected custom lowering");
 417     if (VA.isRegLoc()) {
 418       // This argument is passed in a register.
 419       // All integer register arguments are promoted by the caller to i64.
 420
 421       // Create a virtual register for the promoted live-in value.
 422       unsigned VReg =
 423           MF.addLiveIn(VA.getLocReg(), getRegClassFor(VA.getLocVT()));
 424       SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
 425
 426       // The caller promoted the argument, so insert an Assert?ext SDNode so we
 427       // won't promote the value again in this function.
 428       switch (VA.getLocInfo()) {
 429       case CCValAssign::SExt:
 430         Arg = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), Arg,
 431                           DAG.getValueType(VA.getValVT()));
 432         break;
 433       case CCValAssign::ZExt:
 434         Arg = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), Arg,
 435                           DAG.getValueType(VA.getValVT()));
 436         break;
 437       case CCValAssign::BCvt: {
 438         // Extract a float argument from i64 with padding.
 439         //     63     31   0
 440         //    +------+------+
 441         //    | float|   0  |
 442         //    +------+------+
 443         assert(VA.getLocVT() == MVT::i64);
 444         assert(VA.getValVT() == MVT::f32);
 445         SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 446         Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
 447                                          MVT::f32, Arg, Sub_f32),
 448                       0);
 449         break;
 450       }
 451       default:
 452         break;
 453       }
 454
 455       // Truncate the register down to the argument type.
 456       if (VA.isExtInLoc())
 457         Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
 458
 459       InVals.push_back(Arg);
 460       continue;
 461     }
 462
 463     // The registers are exhausted. This argument was passed on the stack.
 464     assert(VA.isMemLoc());
 465     // The CC_VE_Full/Half functions compute stack offsets relative to the
 466     // beginning of the arguments area at %fp + the size of reserved area.
 467     unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
 468     unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
 469
 470     // Adjust offset for a float argument by adding 4 since the argument is
 471     // stored in 8 bytes buffer with offset like below.  LLVM generates
 472     // 4 bytes load instruction, so need to adjust offset here.  This
 473     // adjustment is required in only LowerFormalArguments.  In LowerCall,
 474     // a float argument is converted to i64 first, and stored as 8 bytes
 475     // data, which is required by ABI, so no need for adjustment.
 476     //    0      4
 477     //    +------+------+
 478     //    | empty| float|
 479     //    +------+------+
 480     if (VA.getValVT() == MVT::f32)
 481       Offset += 4;
 482
 483     int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
 484     InVals.push_back(
 485         DAG.getLoad(VA.getValVT(), DL, Chain,
 486                     DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
 487                     MachinePointerInfo::getFixedStack(MF, FI)));
 488   }
 489
 490   if (!IsVarArg)
 491     return Chain;
 492
 493   // This function takes variable arguments, some of which may have been passed
 494   // in registers %s0-%s8.
 495   //
 496   // The va_start intrinsic needs to know the offset to the first variable
 497   // argument.
 498   // TODO: need to calculate offset correctly once we support f128.
 499   unsigned ArgOffset = ArgLocs.size() * 8;
 500   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
 501   // Skip the reserved area at the top of stack.
 502   FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
 503
 504   return Chain;
 505 }
 506
 507 // FIXME? Maybe this could be a TableGen attribute on some registers and
 508 // this table could be generated automatically from RegInfo.
 509 Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
 510                                              const MachineFunction &MF) const {
 511   Register Reg = StringSwitch<Register>(RegName)
 512                      .Case("sp", VE::SX11)    // Stack pointer
 513                      .Case("fp", VE::SX9)     // Frame pointer
 514                      .Case("sl", VE::SX8)     // Stack limit
 515                      .Case("lr", VE::SX10)    // Link register
 516                      .Case("tp", VE::SX14)    // Thread pointer
 517                      .Case("outer", VE::SX12) // Outer regiser
 518                      .Case("info", VE::SX17)  // Info area register
 519                      .Case("got", VE::SX15)   // Global offset table register
 520                      .Case("plt", VE::SX16) // Procedure linkage table register
 521                      .Default(0);
 522
 523   if (Reg)
 524     return Reg;
 525
 526   report_fatal_error("Invalid register name global variable");
 527 }
 528
 529 //===----------------------------------------------------------------------===//
 530 // TargetLowering Implementation
 531 //===----------------------------------------------------------------------===//
 532
 533 SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 534                                     SmallVectorImpl<SDValue> &InVals) const {
 535   SelectionDAG &DAG = CLI.DAG;
 536   SDLoc DL = CLI.DL;
 537   SDValue Chain = CLI.Chain;
 538   auto PtrVT = getPointerTy(DAG.getDataLayout());
 539
 540   // VE target does not yet support tail call optimization.
 541   CLI.IsTailCall = false;
 542
 543   // Get the base offset of the outgoing arguments stack space.
 544   unsigned ArgsBaseOffset = Subtarget->getRsaSize();
 545   // Get the size of the preserved arguments area
 546   unsigned ArgsPreserved = 8 * 8u;
 547
 548   // Analyze operands of the call, assigning locations to each operand.
 549   SmallVector<CCValAssign, 16> ArgLocs;
 550   CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
 551                  *DAG.getContext());
 552   // Allocate the preserved area first.
 553   CCInfo.AllocateStack(ArgsPreserved, Align(8));
 554   // We already allocated the preserved area, so the stack offset computed
 555   // by CC_VE would be correct now.
 556   CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
 557
 558   // VE requires to use both register and stack for varargs or no-prototyped
 559   // functions.
 560   bool UseBoth = CLI.IsVarArg;
 561
 562   // Analyze operands again if it is required to store BOTH.
 563   SmallVector<CCValAssign, 16> ArgLocs2;
 564   CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
 565                   ArgLocs2, *DAG.getContext());
 566   if (UseBoth)
 567     CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
 568
 569   // Get the size of the outgoing arguments stack space requirement.
 570   unsigned ArgsSize = CCInfo.getNextStackOffset();
 571
 572   // Keep stack frames 16-byte aligned.
 573   ArgsSize = alignTo(ArgsSize, 16);
 574
 575   // Adjust the stack pointer to make room for the arguments.
 576   // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
 577   // with more than 6 arguments.
 578   Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
 579
 580   // Collect the set of registers to pass to the function and their values.
 581   // This will be emitted as a sequence of CopyToReg nodes glued to the call
 582   // instruction.
 583   SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
 584
 585   // Collect chains from all the memory opeations that copy arguments to the
 586   // stack. They must follow the stack pointer adjustment above and precede the
 587   // call instruction itself.
 588   SmallVector<SDValue, 8> MemOpChains;
 589
 590   // VE needs to get address of callee function in a register
 591   // So, prepare to copy it to SX12 here.
 592
 593   // If the callee is a GlobalAddress node (quite common, every direct call is)
 594   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
 595   // Likewise ExternalSymbol -> TargetExternalSymbol.
 596   SDValue Callee = CLI.Callee;
 597
 598   bool IsPICCall = isPositionIndependent();
 599
 600   // PC-relative references to external symbols should go through $stub.
 601   // If so, we need to prepare GlobalBaseReg first.
 602   const TargetMachine &TM = DAG.getTarget();
 603   const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
 604   const GlobalValue *GV = nullptr;
 605   auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
 606   if (CalleeG)
 607     GV = CalleeG->getGlobal();
 608   bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
 609   bool UsePlt = !Local;
 610   MachineFunction &MF = DAG.getMachineFunction();
 611
 612   // Turn GlobalAddress/ExternalSymbol node into a value node
 613   // containing the address of them here.
 614   if (CalleeG) {
 615     if (IsPICCall) {
 616       if (UsePlt)
 617         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
 618       Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
 619       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
 620     } else {
 621       Callee =
 622           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
 623     }
 624   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
 625     if (IsPICCall) {
 626       if (UsePlt)
 627         Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
 628       Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
 629       Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
 630     } else {
 631       Callee =
 632           makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
 633     }
 634   }
 635
 636   RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
 637
 638   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
 639     CCValAssign &VA = ArgLocs[i];
 640     SDValue Arg = CLI.OutVals[i];
 641
 642     // Promote the value if needed.
 643     switch (VA.getLocInfo()) {
 644     default:
 645       llvm_unreachable("Unknown location info!");
 646     case CCValAssign::Full:
 647       break;
 648     case CCValAssign::SExt:
 649       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
 650       break;
 651     case CCValAssign::ZExt:
 652       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
 653       break;
 654     case CCValAssign::AExt:
 655       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
 656       break;
 657     case CCValAssign::BCvt: {
 658       // Convert a float argument to i64 with padding.
 659       //     63     31   0
 660       //    +------+------+
 661       //    | float|   0  |
 662       //    +------+------+
 663       assert(VA.getLocVT() == MVT::i64);
 664       assert(VA.getValVT() == MVT::f32);
 665       SDValue Undef = SDValue(
 666           DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
 667       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 668       Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
 669                                        MVT::i64, Undef, Arg, Sub_f32),
 670                     0);
 671       break;
 672     }
 673     }
 674
 675     if (VA.isRegLoc()) {
 676       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
 677       if (!UseBoth)
 678         continue;
 679       VA = ArgLocs2[i];
 680     }
 681
 682     assert(VA.isMemLoc());
 683
 684     // Create a store off the stack pointer for this argument.
 685     SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
 686     // The argument area starts at %fp/%sp + the size of reserved area.
 687     SDValue PtrOff =
 688         DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
 689     PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
 690     MemOpChains.push_back(
 691         DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
 692   }
 693
 694   // Emit all stores, make sure they occur before the call.
 695   if (!MemOpChains.empty())
 696     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
 697
 698   // Build a sequence of CopyToReg nodes glued together with token chain and
 699   // glue operands which copy the outgoing args into registers. The InGlue is
 700   // necessary since all emitted instructions must be stuck together in order
 701   // to pass the live physical registers.
 702   SDValue InGlue;
 703   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
 704     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
 705                              RegsToPass[i].second, InGlue);
 706     InGlue = Chain.getValue(1);
 707   }
 708
 709   // Build the operands for the call instruction itself.
 710   SmallVector<SDValue, 8> Ops;
 711   Ops.push_back(Chain);
 712   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
 713     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
 714                                   RegsToPass[i].second.getValueType()));
 715
 716   // Add a register mask operand representing the call-preserved registers.
 717   const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
 718   const uint32_t *Mask =
 719       TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
 720   assert(Mask && "Missing call preserved mask for calling convention");
 721   Ops.push_back(DAG.getRegisterMask(Mask));
 722
 723   // Make sure the CopyToReg nodes are glued to the call instruction which
 724   // consumes the registers.
 725   if (InGlue.getNode())
 726     Ops.push_back(InGlue);
 727
 728   // Now the call itself.
 729   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 730   Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
 731   InGlue = Chain.getValue(1);
 732
 733   // Revert the stack pointer immediately after the call.
 734   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
 735                              DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
 736   InGlue = Chain.getValue(1);
 737
 738   // Now extract the return values. This is more or less the same as
 739   // LowerFormalArguments.
 740
 741   // Assign locations to each value returned by this call.
 742   SmallVector<CCValAssign, 16> RVLocs;
 743   CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
 744                  *DAG.getContext());
 745
 746   // Set inreg flag manually for codegen generated library calls that
 747   // return float.
 748   if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
 749     CLI.Ins[0].Flags.setInReg();
 750
 751   RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
 752
 753   // Copy all of the result registers out of their specified physreg.
 754   for (unsigned i = 0; i != RVLocs.size(); ++i) {
 755     CCValAssign &VA = RVLocs[i];
 756     assert(!VA.needsCustom() && "Unexpected custom lowering");
 757     unsigned Reg = VA.getLocReg();
 758
 759     // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
 760     // reside in the same register in the high and low bits. Reuse the
 761     // CopyFromReg previous node to avoid duplicate copies.
 762     SDValue RV;
 763     if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
 764       if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
 765         RV = Chain.getValue(0);
 766
 767     // But usually we'll create a new CopyFromReg for a different register.
 768     if (!RV.getNode()) {
 769       RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
 770       Chain = RV.getValue(1);
 771       InGlue = Chain.getValue(2);
 772     }
 773
 774     // The callee promoted the return value, so insert an Assert?ext SDNode so
 775     // we won't promote the value again in this function.
 776     switch (VA.getLocInfo()) {
 777     case CCValAssign::SExt:
 778       RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
 779                        DAG.getValueType(VA.getValVT()));
 780       break;
 781     case CCValAssign::ZExt:
 782       RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
 783                        DAG.getValueType(VA.getValVT()));
 784       break;
 785     case CCValAssign::BCvt: {
 786       // Extract a float return value from i64 with padding.
 787       //     63     31   0
 788       //    +------+------+
 789       //    | float|   0  |
 790       //    +------+------+
 791       assert(VA.getLocVT() == MVT::i64);
 792       assert(VA.getValVT() == MVT::f32);
 793       SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
 794       RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
 795                                       MVT::f32, RV, Sub_f32),
 796                    0);
 797       break;
 798     }
 799     default:
 800       break;
 801     }
 802
 803     // Truncate the register down to the return value type.
 804     if (VA.isExtInLoc())
 805       RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
 806
 807     InVals.push_back(RV);
 808   }
 809
 810   return Chain;
 811 }
 812
 813 bool VETargetLowering::isOffsetFoldingLegal(
 814     const GlobalAddressSDNode *GA) const {
 815   // VE uses 64 bit addressing, so we need multiple instructions to generate
 816   // an address.  Folding address with offset increases the number of
 817   // instructions, so that we disable it here.  Offsets will be folded in
 818   // the DAG combine later if it worth to do so.
 819   return false;
 820 }
 821
 822 /// isFPImmLegal - Returns true if the target can instruction select the
 823 /// specified FP immediate natively. If false, the legalizer will
 824 /// materialize the FP immediate as a load from a constant pool.
 825 bool VETargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
 826                                     bool ForCodeSize) const {
 827   return VT == MVT::f32 || VT == MVT::f64;
 828 }
 829
 830 /// Determine if the target supports unaligned memory accesses.
 831 ///
 832 /// This function returns true if the target allows unaligned memory accesses
 833 /// of the specified type in the given address space. If true, it also returns
 834 /// whether the unaligned memory access is "fast" in the last argument by
 835 /// reference. This is used, for example, in situations where an array
 836 /// copy/move/set is converted to a sequence of store operations. Its use
 837 /// helps to ensure that such replacements don't generate code that causes an
 838 /// alignment error (trap) on the target machine.
 839 bool VETargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
 840                                                       unsigned AddrSpace,
 841                                                       Align A,
 842                                                       MachineMemOperand::Flags,
 843                                                       bool *Fast) const {
 844   if (Fast) {
 845     // It's fast anytime on VE
 846     *Fast = true;
 847   }
 848   return true;
 849 }
 850
 851 VETargetLowering::VETargetLowering(const TargetMachine &TM,
 852                                    const VESubtarget &STI)
 853     : TargetLowering(TM), Subtarget(&STI) {
 854   // Instructions which use registers as conditionals examine all the
 855   // bits (as does the pseudo SELECT_CC expansion). I don't think it
 856   // matters much whether it's ZeroOrOneBooleanContent, or
 857   // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
 858   // former.
 859   setBooleanContents(ZeroOrOneBooleanContent);
 860   setBooleanVectorContents(ZeroOrOneBooleanContent);
 861
 862   initRegisterClasses();
 863   initSPUActions();
 864   initVPUActions();
 865
 866   setStackPointerRegisterToSaveRestore(VE::SX11);
 867
 868   // We have target-specific dag combine patterns for the following nodes:
 869   setTargetDAGCombine(ISD::TRUNCATE);
 870
 871   // Set function alignment to 16 bytes
 872   setMinFunctionAlignment(Align(16));
 873
 874   // VE stores all argument by 8 bytes alignment
 875   setMinStackArgumentAlignment(Align(8));
 876
 877   computeRegisterProperties(Subtarget->getRegisterInfo());
 878 }
 879
 880 const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
 881 #define TARGET_NODE_CASE(NAME)                                                 \
 882   case VEISD::NAME:                                                            \
 883     return "VEISD::" #NAME;
 884   switch ((VEISD::NodeType)Opcode) {
 885   case VEISD::FIRST_NUMBER:
 886     break;
 887     TARGET_NODE_CASE(CALL)
 888     TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
 889     TARGET_NODE_CASE(EH_SJLJ_SETJMP)
 890     TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
 891     TARGET_NODE_CASE(GETFUNPLT)
 892     TARGET_NODE_CASE(GETSTACKTOP)
 893     TARGET_NODE_CASE(GETTLSADDR)
 894     TARGET_NODE_CASE(GLOBAL_BASE_REG)
 895     TARGET_NODE_CASE(Hi)
 896     TARGET_NODE_CASE(Lo)
 897     TARGET_NODE_CASE(MEMBARRIER)
 898     TARGET_NODE_CASE(RET_FLAG)
 899     TARGET_NODE_CASE(TS1AM)
 900     TARGET_NODE_CASE(VEC_BROADCAST)
 901
 902     // Register the VVP_* SDNodes.
 903 #define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
 904 #include "VVPNodes.def"
 905   }
 906 #undef TARGET_NODE_CASE
 907   return nullptr;
 908 }
 909
 910 EVT VETargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
 911                                          EVT VT) const {
 912   return MVT::i32;
 913 }
 914
 915 // Convert to a target node and set target flags.
 916 SDValue VETargetLowering::withTargetFlags(SDValue Op, unsigned TF,
 917                                           SelectionDAG &DAG) const {
 918   if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
 919     return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
 920                                       GA->getValueType(0), GA->getOffset(), TF);
 921
 922   if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
 923     return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
 924                                      0, TF);
 925
 926   if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
 927     return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
 928                                      CP->getAlign(), CP->getOffset(), TF);
 929
 930   if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
 931     return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
 932                                        TF);
 933
 934   if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
 935     return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
 936
 937   llvm_unreachable("Unhandled address SDNode");
 938 }
 939
 940 // Split Op into high and low parts according to HiTF and LoTF.
 941 // Return an ADD node combining the parts.
 942 SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
 943                                        SelectionDAG &DAG) const {
 944   SDLoc DL(Op);
 945   EVT VT = Op.getValueType();
 946   SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
 947   SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
 948   return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
 949 }
 950
 951 // Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
 952 // or ExternalSymbol SDNode.
 953 SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
 954   SDLoc DL(Op);
 955   EVT PtrVT = Op.getValueType();
 956
 957   // Handle PIC mode first. VE needs a got load for every variable!
 958   if (isPositionIndependent()) {
 959     auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
 960
 961     if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
 962         (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
 963       // Create following instructions for local linkage PIC code.
 964       //     lea %reg, label@gotoff_lo
 965       //     and %reg, %reg, (32)0
 966       //     lea.sl %reg, label@gotoff_hi(%reg, %got)
 967       SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
 968                                   VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
 969       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
 970       return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
 971     }
 972     // Create following instructions for not local linkage PIC code.
 973     //     lea %reg, label@got_lo
 974     //     and %reg, %reg, (32)0
 975     //     lea.sl %reg, label@got_hi(%reg)
 976     //     ld %reg, (%reg, %got)
 977     SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOT_HI32,
 978                                 VEMCExpr::VK_VE_GOT_LO32, DAG);
 979     SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
 980     SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
 981     return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
 982                        MachinePointerInfo::getGOT(DAG.getMachineFunction()));
 983   }
 984
 985   // This is one of the absolute code models.
 986   switch (getTargetMachine().getCodeModel()) {
 987   default:
 988     llvm_unreachable("Unsupported absolute code model");
 989   case CodeModel::Small:
 990   case CodeModel::Medium:
 991   case CodeModel::Large:
 992     // abs64.
 993     return makeHiLoPair(Op, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
 994   }
 995 }
 996
 997 /// Custom Lower {
 998
 999 // The mappings for emitLeading/TrailingFence for VE is designed by following
1000 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1001 Instruction *VETargetLowering::emitLeadingFence(IRBuilderBase &Builder,
1002                                                 Instruction *Inst,
1003                                                 AtomicOrdering Ord) const {
1004   switch (Ord) {
1005   case AtomicOrdering::NotAtomic:
1006   case AtomicOrdering::Unordered:
1007     llvm_unreachable("Invalid fence: unordered/non-atomic");
1008   case AtomicOrdering::Monotonic:
1009   case AtomicOrdering::Acquire:
1010     return nullptr; // Nothing to do
1011   case AtomicOrdering::Release:
1012   case AtomicOrdering::AcquireRelease:
1013     return Builder.CreateFence(AtomicOrdering::Release);
1014   case AtomicOrdering::SequentiallyConsistent:
1015     if (!Inst->hasAtomicStore())
1016       return nullptr; // Nothing to do
1017     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1018   }
1019   llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1020 }
1021
1022 Instruction *VETargetLowering::emitTrailingFence(IRBuilderBase &Builder,
1023                                                  Instruction *Inst,
1024                                                  AtomicOrdering Ord) const {
1025   switch (Ord) {
1026   case AtomicOrdering::NotAtomic:
1027   case AtomicOrdering::Unordered:
1028     llvm_unreachable("Invalid fence: unordered/not-atomic");
1029   case AtomicOrdering::Monotonic:
1030   case AtomicOrdering::Release:
1031     return nullptr; // Nothing to do
1032   case AtomicOrdering::Acquire:
1033   case AtomicOrdering::AcquireRelease:
1034     return Builder.CreateFence(AtomicOrdering::Acquire);
1035   case AtomicOrdering::SequentiallyConsistent:
1036     return Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
1037   }
1038   llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1039 }
1040
1041 SDValue VETargetLowering::lowerATOMIC_FENCE(SDValue Op,
1042                                             SelectionDAG &DAG) const {
1043   SDLoc DL(Op);
1044   AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
1045       cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
1046   SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
1047       cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
1048
1049   // VE uses Release consistency, so need a fence instruction if it is a
1050   // cross-thread fence.
1051   if (FenceSSID == SyncScope::System) {
1052     switch (FenceOrdering) {
1053     case AtomicOrdering::NotAtomic:
1054     case AtomicOrdering::Unordered:
1055     case AtomicOrdering::Monotonic:
1056       // No need to generate fencem instruction here.
1057       break;
1058     case AtomicOrdering::Acquire:
1059       // Generate "fencem 2" as acquire fence.
1060       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1061                                         DAG.getTargetConstant(2, DL, MVT::i32),
1062                                         Op.getOperand(0)),
1063                      0);
1064     case AtomicOrdering::Release:
1065       // Generate "fencem 1" as release fence.
1066       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1067                                         DAG.getTargetConstant(1, DL, MVT::i32),
1068                                         Op.getOperand(0)),
1069                      0);
1070     case AtomicOrdering::AcquireRelease:
1071     case AtomicOrdering::SequentiallyConsistent:
1072       // Generate "fencem 3" as acq_rel and seq_cst fence.
1073       // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
1074       //        so  seq_cst may require more instruction for them.
1075       return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1076                                         DAG.getTargetConstant(3, DL, MVT::i32),
1077                                         Op.getOperand(0)),
1078                      0);
1079     }
1080   }
1081
1082   // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1083   return DAG.getNode(VEISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1084 }
1085
1086 TargetLowering::AtomicExpansionKind
1087 VETargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
1088   // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1089   if (AI->getOperation() == AtomicRMWInst::Xchg) {
1090     return AtomicExpansionKind::None;
1091   }
1092   // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1093
1094   // Otherwise, expand it using compare and exchange instruction to not call
1095   // __sync_fetch_and_* functions.
1096   return AtomicExpansionKind::CmpXChg;
1097 }
1098
1099 static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag,
1100                             SDValue &Bits) {
1101   SDLoc DL(Op);
1102   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1103   SDValue Ptr = N->getOperand(1);
1104   SDValue Val = N->getOperand(2);
1105   EVT PtrVT = Ptr.getValueType();
1106   bool Byte = N->getMemoryVT() == MVT::i8;
1107   //   Remainder = AND Ptr, 3
1108   //   Flag = 1 << Remainder  ; If Byte is true (1 byte swap flag)
1109   //   Flag = 3 << Remainder  ; If Byte is false (2 bytes swap flag)
1110   //   Bits = Remainder << 3
1111   //   NewVal = Val << Bits
1112   SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1113   SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1114   SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1115                       : DAG.getConstant(3, DL, MVT::i32);
1116   Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1117   Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1118   return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1119 }
1120
1121 static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data,
1122                              SDValue Bits) {
1123   SDLoc DL(Op);
1124   EVT VT = Data.getValueType();
1125   bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1126   //   NewData = Data >> Bits
1127   //   Result = NewData & 0xff   ; If Byte is true (1 byte)
1128   //   Result = NewData & 0xffff ; If Byte is false (2 bytes)
1129
1130   SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1131   return DAG.getNode(ISD::AND, DL, VT,
1132                      {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1133 }
1134
1135 SDValue VETargetLowering::lowerATOMIC_SWAP(SDValue Op,
1136                                            SelectionDAG &DAG) const {
1137   SDLoc DL(Op);
1138   AtomicSDNode *N = cast<AtomicSDNode>(Op);
1139
1140   if (N->getMemoryVT() == MVT::i8) {
1141     // For i8, use "ts1am"
1142     //   Input:
1143     //     ATOMIC_SWAP Ptr, Val, Order
1144     //
1145     //   Output:
1146     //     Remainder = AND Ptr, 3
1147     //     Flag = 1 << Remainder   ; 1 byte swap flag for TS1AM inst.
1148     //     Bits = Remainder << 3
1149     //     NewVal = Val << Bits
1150     //
1151     //     Aligned = AND Ptr, -4
1152     //     Data = TS1AM Aligned, Flag, NewVal
1153     //
1154     //     NewData = Data >> Bits
1155     //     Result = NewData & 0xff ; 1 byte result
1156     SDValue Flag;
1157     SDValue Bits;
1158     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1159
1160     SDValue Ptr = N->getOperand(1);
1161     SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1162                                   {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1163     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1164                                   DAG.getVTList(Op.getNode()->getValueType(0),
1165                                                 Op.getNode()->getValueType(1)),
1166                                   {N->getChain(), Aligned, Flag, NewVal},
1167                                   N->getMemOperand());
1168
1169     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1170     SDValue Chain = TS1AM.getValue(1);
1171     return DAG.getMergeValues({Result, Chain}, DL);
1172   }
1173   if (N->getMemoryVT() == MVT::i16) {
1174     // For i16, use "ts1am"
1175     SDValue Flag;
1176     SDValue Bits;
1177     SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1178
1179     SDValue Ptr = N->getOperand(1);
1180     SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1181                                   {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1182     SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1183                                   DAG.getVTList(Op.getNode()->getValueType(0),
1184                                                 Op.getNode()->getValueType(1)),
1185                                   {N->getChain(), Aligned, Flag, NewVal},
1186                                   N->getMemOperand());
1187
1188     SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1189     SDValue Chain = TS1AM.getValue(1);
1190     return DAG.getMergeValues({Result, Chain}, DL);
1191   }
1192   // Otherwise, let llvm legalize it.
1193   return Op;
1194 }
1195
1196 SDValue VETargetLowering::lowerGlobalAddress(SDValue Op,
1197                                              SelectionDAG &DAG) const {
1198   return makeAddress(Op, DAG);
1199 }
1200
1201 SDValue VETargetLowering::lowerBlockAddress(SDValue Op,
1202                                             SelectionDAG &DAG) const {
1203   return makeAddress(Op, DAG);
1204 }
1205
1206 SDValue VETargetLowering::lowerConstantPool(SDValue Op,
1207                                             SelectionDAG &DAG) const {
1208   return makeAddress(Op, DAG);
1209 }
1210
1211 SDValue
1212 VETargetLowering::lowerToTLSGeneralDynamicModel(SDValue Op,
1213                                                 SelectionDAG &DAG) const {
1214   SDLoc DL(Op);
1215
1216   // Generate the following code:
1217   //   t1: ch,glue = callseq_start t0, 0, 0
1218   //   t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1219   //   t3: ch,glue = callseq_end t2, 0, 0, t2:2
1220   //   t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1221   SDValue Label = withTargetFlags(Op, 0, DAG);
1222   EVT PtrVT = Op.getValueType();
1223
1224   // Lowering the machine isd will make sure everything is in the right
1225   // location.
1226   SDValue Chain = DAG.getEntryNode();
1227   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1228   const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1229       DAG.getMachineFunction(), CallingConv::C);
1230   Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1231   SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1232   Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1233   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(64, DL, true),
1234                              DAG.getIntPtrConstant(0, DL, true),
1235                              Chain.getValue(1), DL);
1236   Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1237
1238   // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1239   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1240   MFI.setHasCalls(true);
1241
1242   // Also generate code to prepare a GOT register if it is PIC.
1243   if (isPositionIndependent()) {
1244     MachineFunction &MF = DAG.getMachineFunction();
1245     Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1246   }
1247
1248   return Chain;
1249 }
1250
1251 SDValue VETargetLowering::lowerGlobalTLSAddress(SDValue Op,
1252                                                 SelectionDAG &DAG) const {
1253   // The current implementation of nld (2.26) doesn't allow local exec model
1254   // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1255   // generate the general dynamic model code sequence.
1256   //
1257   // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1258   return lowerToTLSGeneralDynamicModel(Op, DAG);
1259 }
1260
1261 SDValue VETargetLowering::lowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
1262   return makeAddress(Op, DAG);
1263 }
1264
1265 // Lower a f128 load into two f64 loads.
1266 static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG) {
1267   SDLoc DL(Op);
1268   LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1269   assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1270   unsigned Alignment = LdNode->getAlign().value();
1271   if (Alignment > 8)
1272     Alignment = 8;
1273
1274   SDValue Lo64 =
1275       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1276                   LdNode->getPointerInfo(), Alignment,
1277                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1278                                        : MachineMemOperand::MONone);
1279   EVT AddrVT = LdNode->getBasePtr().getValueType();
1280   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1281                               DAG.getConstant(8, DL, AddrVT));
1282   SDValue Hi64 =
1283       DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1284                   LdNode->getPointerInfo(), Alignment,
1285                   LdNode->isVolatile() ? MachineMemOperand::MOVolatile
1286                                        : MachineMemOperand::MONone);
1287
1288   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1289   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1290
1291   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1292   SDNode *InFP128 =
1293       DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1294   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1295                                SDValue(InFP128, 0), Hi64, SubRegEven);
1296   InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1297                                SDValue(InFP128, 0), Lo64, SubRegOdd);
1298   SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1299                           SDValue(Hi64.getNode(), 1)};
1300   SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1301   SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1302   return DAG.getMergeValues(Ops, DL);
1303 }
1304
1305 SDValue VETargetLowering::lowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1306   LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1307
1308   SDValue BasePtr = LdNode->getBasePtr();
1309   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1310     // Do not expand store instruction with frame index here because of
1311     // dependency problems.  We expand it later in eliminateFrameIndex().
1312     return Op;
1313   }
1314
1315   EVT MemVT = LdNode->getMemoryVT();
1316   if (MemVT == MVT::f128)
1317     return lowerLoadF128(Op, DAG);
1318
1319   return Op;
1320 }
1321
1322 // Lower a f128 store into two f64 stores.
1323 static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG) {
1324   SDLoc DL(Op);
1325   StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1326   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1327
1328   SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1329   SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1330
1331   SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1332                                     StNode->getValue(), SubRegEven);
1333   SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1334                                     StNode->getValue(), SubRegOdd);
1335
1336   unsigned Alignment = StNode->getAlign().value();
1337   if (Alignment > 8)
1338     Alignment = 8;
1339
1340   // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1341   SDValue OutChains[2];
1342   OutChains[0] =
1343       DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1344                    StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1345                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1346                                         : MachineMemOperand::MONone);
1347   EVT AddrVT = StNode->getBasePtr().getValueType();
1348   SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1349                               DAG.getConstant(8, DL, AddrVT));
1350   OutChains[1] =
1351       DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1352                    MachinePointerInfo(), Alignment,
1353                    StNode->isVolatile() ? MachineMemOperand::MOVolatile
1354                                         : MachineMemOperand::MONone);
1355   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1356 }
1357
1358 SDValue VETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1359   StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1360   assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1361
1362   SDValue BasePtr = StNode->getBasePtr();
1363   if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1364     // Do not expand store instruction with frame index here because of
1365     // dependency problems.  We expand it later in eliminateFrameIndex().
1366     return Op;
1367   }
1368
1369   EVT MemVT = StNode->getMemoryVT();
1370   if (MemVT == MVT::f128)
1371     return lowerStoreF128(Op, DAG);
1372
1373   // Otherwise, ask llvm to expand it.
1374   return SDValue();
1375 }
1376
1377 SDValue VETargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
1378   MachineFunction &MF = DAG.getMachineFunction();
1379   VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
1380   auto PtrVT = getPointerTy(DAG.getDataLayout());
1381
1382   // Need frame address to find the address of VarArgsFrameIndex.
1383   MF.getFrameInfo().setFrameAddressIsTaken(true);
1384
1385   // vastart just stores the address of the VarArgsFrameIndex slot into the
1386   // memory location argument.
1387   SDLoc DL(Op);
1388   SDValue Offset =
1389       DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1390                   DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1391   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1392   return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1393                       MachinePointerInfo(SV));
1394 }
1395
1396 SDValue VETargetLowering::lowerVAARG(SDValue Op, SelectionDAG &DAG) const {
1397   SDNode *Node = Op.getNode();
1398   EVT VT = Node->getValueType(0);
1399   SDValue InChain = Node->getOperand(0);
1400   SDValue VAListPtr = Node->getOperand(1);
1401   EVT PtrVT = VAListPtr.getValueType();
1402   const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1403   SDLoc DL(Node);
1404   SDValue VAList =
1405       DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1406   SDValue Chain = VAList.getValue(1);
1407   SDValue NextPtr;
1408
1409   if (VT == MVT::f128) {
1410     // VE f128 values must be stored with 16 bytes alignment.  We doesn't
1411     // know the actual alignment of VAList, so we take alignment of it
1412     // dyanmically.
1413     int Align = 16;
1414     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1415                          DAG.getConstant(Align - 1, DL, PtrVT));
1416     VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1417                          DAG.getConstant(-Align, DL, PtrVT));
1418     // Increment the pointer, VAList, by 16 to the next vaarg.
1419     NextPtr =
1420         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1421   } else if (VT == MVT::f32) {
1422     // float --> need special handling like below.
1423     //    0      4
1424     //    +------+------+
1425     //    | empty| float|
1426     //    +------+------+
1427     // Increment the pointer, VAList, by 8 to the next vaarg.
1428     NextPtr =
1429         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1430     // Then, adjust VAList.
1431     unsigned InternalOffset = 4;
1432     VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1433                          DAG.getConstant(InternalOffset, DL, PtrVT));
1434   } else {
1435     // Increment the pointer, VAList, by 8 to the next vaarg.
1436     NextPtr =
1437         DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1438   }
1439
1440   // Store the incremented VAList to the legalized pointer.
1441   InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1442
1443   // Load the actual argument out of the pointer VAList.
1444   // We can't count on greater alignment than the word size.
1445   return DAG.getLoad(VT, DL, InChain, VAList, MachinePointerInfo(),
1446                      std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8);
1447 }
1448
1449 SDValue VETargetLowering::lowerDYNAMIC_STACKALLOC(SDValue Op,
1450                                                   SelectionDAG &DAG) const {
1451   // Generate following code.
1452   //   (void)__llvm_grow_stack(size);
1453   //   ret = GETSTACKTOP;        // pseudo instruction
1454   SDLoc DL(Op);
1455
1456   // Get the inputs.
1457   SDNode *Node = Op.getNode();
1458   SDValue Chain = Op.getOperand(0);
1459   SDValue Size = Op.getOperand(1);
1460   MaybeAlign Alignment(Op.getConstantOperandVal(2));
1461   EVT VT = Node->getValueType(0);
1462
1463   // Chain the dynamic stack allocation so that it doesn't modify the stack
1464   // pointer when other instructions are using the stack.
1465   Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1466
1467   const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1468   Align StackAlign = TFI.getStackAlign();
1469   bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1470
1471   // Prepare arguments
1472   TargetLowering::ArgListTy Args;
1473   TargetLowering::ArgListEntry Entry;
1474   Entry.Node = Size;
1475   Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1476   Args.push_back(Entry);
1477   if (NeedsAlign) {
1478     Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1479     Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1480     Args.push_back(Entry);
1481   }
1482   Type *RetTy = Type::getVoidTy(*DAG.getContext());
1483
1484   EVT PtrVT = Op.getValueType();
1485   SDValue Callee;
1486   if (NeedsAlign) {
1487     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1488   } else {
1489     Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1490   }
1491
1492   TargetLowering::CallLoweringInfo CLI(DAG);
1493   CLI.setDebugLoc(DL)
1494       .setChain(Chain)
1495       .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1496       .setDiscardResult(true);
1497   std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1498   Chain = pair.second;
1499   SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1500   if (NeedsAlign) {
1501     Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1502                          DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1503     Result = DAG.getNode(ISD::AND, DL, VT, Result,
1504                          DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1505   }
1506   //  Chain = Result.getValue(1);
1507   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
1508                              DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
1509
1510   SDValue Ops[2] = {Result, Chain};
1511   return DAG.getMergeValues(Ops, DL);
1512 }
1513
1514 SDValue VETargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
1515                                                SelectionDAG &DAG) const {
1516   SDLoc DL(Op);
1517   return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1518                      Op.getOperand(1));
1519 }
1520
1521 SDValue VETargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
1522                                               SelectionDAG &DAG) const {
1523   SDLoc DL(Op);
1524   return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1525                      DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1526                      Op.getOperand(1));
1527 }
1528
1529 SDValue VETargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op,
1530                                                       SelectionDAG &DAG) const {
1531   SDLoc DL(Op);
1532   return DAG.getNode(VEISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other,
1533                      Op.getOperand(0));
1534 }
1535
1536 static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
1537                               const VETargetLowering &TLI,
1538                               const VESubtarget *Subtarget) {
1539   SDLoc DL(Op);
1540   MachineFunction &MF = DAG.getMachineFunction();
1541   EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1542
1543   MachineFrameInfo &MFI = MF.getFrameInfo();
1544   MFI.setFrameAddressIsTaken(true);
1545
1546   unsigned Depth = Op.getConstantOperandVal(0);
1547   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1548   unsigned FrameReg = RegInfo->getFrameRegister(MF);
1549   SDValue FrameAddr =
1550       DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1551   while (Depth--)
1552     FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1553                             FrameAddr, MachinePointerInfo());
1554   return FrameAddr;
1555 }
1556
1557 static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
1558                                const VETargetLowering &TLI,
1559                                const VESubtarget *Subtarget) {
1560   MachineFunction &MF = DAG.getMachineFunction();
1561   MachineFrameInfo &MFI = MF.getFrameInfo();
1562   MFI.setReturnAddressIsTaken(true);
1563
1564   if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
1565     return SDValue();
1566
1567   SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1568
1569   SDLoc DL(Op);
1570   EVT VT = Op.getValueType();
1571   SDValue Offset = DAG.getConstant(8, DL, VT);
1572   return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1573                      DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1574                      MachinePointerInfo());
1575 }
1576
1577 SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
1578                                                   SelectionDAG &DAG) const {
1579   SDLoc DL(Op);
1580   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1581   switch (IntNo) {
1582   default: // Don't custom lower most intrinsics.
1583     return SDValue();
1584   case Intrinsic::eh_sjlj_lsda: {
1585     MachineFunction &MF = DAG.getMachineFunction();
1586     MVT VT = Op.getSimpleValueType();
1587     const VETargetMachine *TM =
1588         static_cast<const VETargetMachine *>(&DAG.getTarget());
1589
1590     // Create GCC_except_tableXX string.  The real symbol for that will be
1591     // generated in EHStreamer::emitExceptionTable() later.  So, we just
1592     // borrow it's name here.
1593     TM->getStrList()->push_back(std::string(
1594         (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1595     SDValue Addr =
1596         DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1597     if (isPositionIndependent()) {
1598       Addr = makeHiLoPair(Addr, VEMCExpr::VK_VE_GOTOFF_HI32,
1599                           VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1600       SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1601       return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1602     }
1603     return makeHiLoPair(Addr, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
1604   }
1605   }
1606 }
1607
1608 static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1609   if (!isa<BuildVectorSDNode>(N))
1610     return false;
1611   const auto *BVN = cast<BuildVectorSDNode>(N);
1612
1613   // Find first non-undef insertion.
1614   unsigned Idx;
1615   for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1616     auto ElemV = BVN->getOperand(Idx);
1617     if (!ElemV->isUndef())
1618       break;
1619   }
1620   // Catch the (hypothetical) all-undef case.
1621   if (Idx == BVN->getNumOperands())
1622     return false;
1623   // Remember insertion.
1624   UniqueIdx = Idx++;
1625   // Verify that all other insertions are undef.
1626   for (; Idx < BVN->getNumOperands(); ++Idx) {
1627     auto ElemV = BVN->getOperand(Idx);
1628     if (!ElemV->isUndef())
1629       return false;
1630   }
1631   return true;
1632 }
1633
1634 static SDValue getSplatValue(SDNode *N) {
1635   if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1636     return BuildVec->getSplatValue();
1637   }
1638   return SDValue();
1639 }
1640
1641 SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
1642                                             SelectionDAG &DAG) const {
1643   SDLoc DL(Op);
1644   unsigned NumEls = Op.getValueType().getVectorNumElements();
1645   MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
1646
1647   // If there is just one element, expand to INSERT_VECTOR_ELT.
1648   unsigned UniqueIdx;
1649   if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1650     SDValue AccuV = DAG.getUNDEF(Op.getValueType());
1651     auto ElemV = Op->getOperand(UniqueIdx);
1652     SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64);
1653     return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV,
1654                        ElemV, IdxV);
1655   }
1656
1657   // Else emit a broadcast.
1658   if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1659     // lower to VEC_BROADCAST
1660     MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
1661
1662     auto AVL = DAG.getConstant(NumEls, DL, MVT::i32);
1663     return DAG.getNode(VEISD::VEC_BROADCAST, DL, LegalResVT, Op.getOperand(0),
1664                        AVL);
1665   }
1666
1667   // Expand
1668   return SDValue();
1669 }
1670
1671 SDValue VETargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
1672   unsigned Opcode = Op.getOpcode();
1673   if (ISD::isVPOpcode(Opcode))
1674     return lowerToVVP(Op, DAG);
1675
1676   switch (Opcode) {
1677   default:
1678     llvm_unreachable("Should not custom lower this!");
1679   case ISD::ATOMIC_FENCE:
1680     return lowerATOMIC_FENCE(Op, DAG);
1681   case ISD::ATOMIC_SWAP:
1682     return lowerATOMIC_SWAP(Op, DAG);
1683   case ISD::BlockAddress:
1684     return lowerBlockAddress(Op, DAG);
1685   case ISD::ConstantPool:
1686     return lowerConstantPool(Op, DAG);
1687   case ISD::DYNAMIC_STACKALLOC:
1688     return lowerDYNAMIC_STACKALLOC(Op, DAG);
1689   case ISD::EH_SJLJ_LONGJMP:
1690     return lowerEH_SJLJ_LONGJMP(Op, DAG);
1691   case ISD::EH_SJLJ_SETJMP:
1692     return lowerEH_SJLJ_SETJMP(Op, DAG);
1693   case ISD::EH_SJLJ_SETUP_DISPATCH:
1694     return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1695   case ISD::FRAMEADDR:
1696     return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1697   case ISD::GlobalAddress:
1698     return lowerGlobalAddress(Op, DAG);
1699   case ISD::GlobalTLSAddress:
1700     return lowerGlobalTLSAddress(Op, DAG);
1701   case ISD::INTRINSIC_WO_CHAIN:
1702     return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1703   case ISD::JumpTable:
1704     return lowerJumpTable(Op, DAG);
1705   case ISD::LOAD:
1706     return lowerLOAD(Op, DAG);
1707   case ISD::RETURNADDR:
1708     return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1709   case ISD::BUILD_VECTOR:
1710     return lowerBUILD_VECTOR(Op, DAG);
1711   case ISD::STORE:
1712     return lowerSTORE(Op, DAG);
1713   case ISD::VASTART:
1714     return lowerVASTART(Op, DAG);
1715   case ISD::VAARG:
1716     return lowerVAARG(Op, DAG);
1717
1718   case ISD::INSERT_VECTOR_ELT:
1719     return lowerINSERT_VECTOR_ELT(Op, DAG);
1720   case ISD::EXTRACT_VECTOR_ELT:
1721     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1722
1723 #define ADD_BINARY_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1724 #include "VVPNodes.def"
1725     return lowerToVVP(Op, DAG);
1726   }
1727 }
1728 /// } Custom Lower
1729
1730 void VETargetLowering::ReplaceNodeResults(SDNode *N,
1731                                           SmallVectorImpl<SDValue> &Results,
1732                                           SelectionDAG &DAG) const {
1733   switch (N->getOpcode()) {
1734   case ISD::ATOMIC_SWAP:
1735     // Let LLVM expand atomic swap instruction through LowerOperation.
1736     return;
1737   default:
1738     LLVM_DEBUG(N->dumpr(&DAG));
1739     llvm_unreachable("Do not know how to custom type legalize this operation!");
1740   }
1741 }
1742
1743 /// JumpTable for VE.
1744 ///
1745 ///   VE cannot generate relocatable symbol in jump table.  VE cannot
1746 ///   generate expressions using symbols in both text segment and data
1747 ///   segment like below.
1748 ///             .4byte  .LBB0_2-.LJTI0_0
1749 ///   So, we generate offset from the top of function like below as
1750 ///   a custom label.
1751 ///             .4byte  .LBB0_2-<function name>
1752
1753 unsigned VETargetLowering::getJumpTableEncoding() const {
1754   // Use custom label for PIC.
1755   if (isPositionIndependent())
1756     return MachineJumpTableInfo::EK_Custom32;
1757
1758   // Otherwise, use the normal jump table encoding heuristics.
1759   return TargetLowering::getJumpTableEncoding();
1760 }
1761
1762 const MCExpr *VETargetLowering::LowerCustomJumpTableEntry(
1763     const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1764     unsigned Uid, MCContext &Ctx) const {
1765   assert(isPositionIndependent());
1766
1767   // Generate custom label for PIC like below.
1768   //    .4bytes  .LBB0_2-<function name>
1769   const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1770   MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1771   const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1772   return MCBinaryExpr::createSub(Value, Base, Ctx);
1773 }
1774
1775 SDValue VETargetLowering::getPICJumpTableRelocBase(SDValue Table,
1776                                                    SelectionDAG &DAG) const {
1777   assert(isPositionIndependent());
1778   SDLoc DL(Table);
1779   Function *Function = &DAG.getMachineFunction().getFunction();
1780   assert(Function != nullptr);
1781   auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
1782
1783   // In the jump table, we have following values in PIC mode.
1784   //    .4bytes  .LBB0_2-<function name>
1785   // We need to add this value and the address of this function to generate
1786   // .LBB0_2 label correctly under PIC mode.  So, we want to generate following
1787   // instructions:
1788   //     lea %reg, fun@gotoff_lo
1789   //     and %reg, %reg, (32)0
1790   //     lea.sl %reg, fun@gotoff_hi(%reg, %got)
1791   // In order to do so, we need to genarate correctly marked DAG node using
1792   // makeHiLoPair.
1793   SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
1794   SDValue HiLo = makeHiLoPair(Op, VEMCExpr::VK_VE_GOTOFF_HI32,
1795                               VEMCExpr::VK_VE_GOTOFF_LO32, DAG);
1796   SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
1797   return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
1798 }
1799
1800 Register VETargetLowering::prepareMBB(MachineBasicBlock &MBB,
1801                                       MachineBasicBlock::iterator I,
1802                                       MachineBasicBlock *TargetBB,
1803                                       const DebugLoc &DL) const {
1804   MachineFunction *MF = MBB.getParent();
1805   MachineRegisterInfo &MRI = MF->getRegInfo();
1806   const VEInstrInfo *TII = Subtarget->getInstrInfo();
1807
1808   const TargetRegisterClass *RC = &VE::I64RegClass;
1809   Register Tmp1 = MRI.createVirtualRegister(RC);
1810   Register Tmp2 = MRI.createVirtualRegister(RC);
1811   Register Result = MRI.createVirtualRegister(RC);
1812
1813   if (isPositionIndependent()) {
1814     // Create following instructions for local linkage PIC code.
1815     //     lea %Tmp1, TargetBB@gotoff_lo
1816     //     and %Tmp2, %Tmp1, (32)0
1817     //     lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1818     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1819         .addImm(0)
1820         .addImm(0)
1821         .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_LO32);
1822     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1823         .addReg(Tmp1, getKillRegState(true))
1824         .addImm(M0(32));
1825     BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
1826         .addReg(VE::SX15)
1827         .addReg(Tmp2, getKillRegState(true))
1828         .addMBB(TargetBB, VEMCExpr::VK_VE_GOTOFF_HI32);
1829   } else {
1830     // Create following instructions for non-PIC code.
1831     //     lea     %Tmp1, TargetBB@lo
1832     //     and     %Tmp2, %Tmp1, (32)0
1833     //     lea.sl  %Result, TargetBB@hi(%Tmp2)
1834     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1835         .addImm(0)
1836         .addImm(0)
1837         .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
1838     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1839         .addReg(Tmp1, getKillRegState(true))
1840         .addImm(M0(32));
1841     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
1842         .addReg(Tmp2, getKillRegState(true))
1843         .addImm(0)
1844         .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
1845   }
1846   return Result;
1847 }
1848
1849 Register VETargetLowering::prepareSymbol(MachineBasicBlock &MBB,
1850                                          MachineBasicBlock::iterator I,
1851                                          StringRef Symbol, const DebugLoc &DL,
1852                                          bool IsLocal = false,
1853                                          bool IsCall = false) const {
1854   MachineFunction *MF = MBB.getParent();
1855   MachineRegisterInfo &MRI = MF->getRegInfo();
1856   const VEInstrInfo *TII = Subtarget->getInstrInfo();
1857
1858   const TargetRegisterClass *RC = &VE::I64RegClass;
1859   Register Result = MRI.createVirtualRegister(RC);
1860
1861   if (isPositionIndependent()) {
1862     if (IsCall && !IsLocal) {
1863       // Create following instructions for non-local linkage PIC code function
1864       // calls.  These instructions uses IC and magic number -24, so we expand
1865       // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
1866       //     lea %Reg, Symbol@plt_lo(-24)
1867       //     and %Reg, %Reg, (32)0
1868       //     sic %s16
1869       //     lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
1870       BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
1871           .addExternalSymbol("abort");
1872     } else if (IsLocal) {
1873       Register Tmp1 = MRI.createVirtualRegister(RC);
1874       Register Tmp2 = MRI.createVirtualRegister(RC);
1875       // Create following instructions for local linkage PIC code.
1876       //     lea %Tmp1, Symbol@gotoff_lo
1877       //     and %Tmp2, %Tmp1, (32)0
1878       //     lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1879       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1880           .addImm(0)
1881           .addImm(0)
1882           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_LO32);
1883       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1884           .addReg(Tmp1, getKillRegState(true))
1885           .addImm(M0(32));
1886       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
1887           .addReg(VE::SX15)
1888           .addReg(Tmp2, getKillRegState(true))
1889           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOTOFF_HI32);
1890     } else {
1891       Register Tmp1 = MRI.createVirtualRegister(RC);
1892       Register Tmp2 = MRI.createVirtualRegister(RC);
1893       // Create following instructions for not local linkage PIC code.
1894       //     lea %Tmp1, Symbol@got_lo
1895       //     and %Tmp2, %Tmp1, (32)0
1896       //     lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
1897       //     ld %Result, 0(%Tmp3)
1898       Register Tmp3 = MRI.createVirtualRegister(RC);
1899       BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1900           .addImm(0)
1901           .addImm(0)
1902           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_LO32);
1903       BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1904           .addReg(Tmp1, getKillRegState(true))
1905           .addImm(M0(32));
1906       BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
1907           .addReg(VE::SX15)
1908           .addReg(Tmp2, getKillRegState(true))
1909           .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_GOT_HI32);
1910       BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
1911           .addReg(Tmp3, getKillRegState(true))
1912           .addImm(0)
1913           .addImm(0);
1914     }
1915   } else {
1916     Register Tmp1 = MRI.createVirtualRegister(RC);
1917     Register Tmp2 = MRI.createVirtualRegister(RC);
1918     // Create following instructions for non-PIC code.
1919     //     lea     %Tmp1, Symbol@lo
1920     //     and     %Tmp2, %Tmp1, (32)0
1921     //     lea.sl  %Result, Symbol@hi(%Tmp2)
1922     BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
1923         .addImm(0)
1924         .addImm(0)
1925         .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
1926     BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
1927         .addReg(Tmp1, getKillRegState(true))
1928         .addImm(M0(32));
1929     BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
1930         .addReg(Tmp2, getKillRegState(true))
1931         .addImm(0)
1932         .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
1933   }
1934   return Result;
1935 }
1936
1937 void VETargetLowering::setupEntryBlockForSjLj(MachineInstr &MI,
1938                                               MachineBasicBlock *MBB,
1939                                               MachineBasicBlock *DispatchBB,
1940                                               int FI, int Offset) const {
1941   DebugLoc DL = MI.getDebugLoc();
1942   const VEInstrInfo *TII = Subtarget->getInstrInfo();
1943
1944   Register LabelReg =
1945       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), DispatchBB, DL);
1946
1947   // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
1948   // referenced by longjmp (throw) later.
1949   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
1950   addFrameReference(MIB, FI, Offset); // jmpbuf[1]
1951   MIB.addReg(LabelReg, getKillRegState(true));
1952 }
1953
1954 MachineBasicBlock *
1955 VETargetLowering::emitEHSjLjSetJmp(MachineInstr &MI,
1956                                    MachineBasicBlock *MBB) const {
1957   DebugLoc DL = MI.getDebugLoc();
1958   MachineFunction *MF = MBB->getParent();
1959   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
1960   const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
1961   MachineRegisterInfo &MRI = MF->getRegInfo();
1962
1963   const BasicBlock *BB = MBB->getBasicBlock();
1964   MachineFunction::iterator I = ++MBB->getIterator();
1965
1966   // Memory Reference.
1967   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
1968                                            MI.memoperands_end());
1969   Register BufReg = MI.getOperand(1).getReg();
1970
1971   Register DstReg;
1972
1973   DstReg = MI.getOperand(0).getReg();
1974   const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
1975   assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
1976   (void)TRI;
1977   Register MainDestReg = MRI.createVirtualRegister(RC);
1978   Register RestoreDestReg = MRI.createVirtualRegister(RC);
1979
1980   // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
1981   // instructions.  SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
1982   //
1983   // ThisMBB:
1984   //   buf[3] = %s17 iff %s17 is used as BP
1985   //   buf[1] = RestoreMBB as IC after longjmp
1986   //   # SjLjSetup RestoreMBB
1987   //
1988   // MainMBB:
1989   //   v_main = 0
1990   //
1991   // SinkMBB:
1992   //   v = phi(v_main, MainMBB, v_restore, RestoreMBB)
1993   //   ...
1994   //
1995   // RestoreMBB:
1996   //   %s17 = buf[3] = iff %s17 is used as BP
1997   //   v_restore = 1
1998   //   goto SinkMBB
1999
2000   MachineBasicBlock *ThisMBB = MBB;
2001   MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2002   MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2003   MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2004   MF->insert(I, MainMBB);
2005   MF->insert(I, SinkMBB);
2006   MF->push_back(RestoreMBB);
2007   RestoreMBB->setHasAddressTaken();
2008
2009   // Transfer the remainder of BB and its successor edges to SinkMBB.
2010   SinkMBB->splice(SinkMBB->begin(), MBB,
2011                   std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2012   SinkMBB->transferSuccessorsAndUpdatePHIs(MBB);
2013
2014   // ThisMBB:
2015   Register LabelReg =
2016       prepareMBB(*MBB, MachineBasicBlock::iterator(MI), RestoreMBB, DL);
2017
2018   // Store BP in buf[3] iff this function is using BP.
2019   const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2020   if (TFI->hasBP(*MF)) {
2021     MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2022     MIB.addReg(BufReg);
2023     MIB.addImm(0);
2024     MIB.addImm(24);
2025     MIB.addReg(VE::SX17);
2026     MIB.setMemRefs(MMOs);
2027   }
2028
2029   // Store IP in buf[1].
2030   MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2031   MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2032   MIB.addImm(0);
2033   MIB.addImm(8);
2034   MIB.addReg(LabelReg, getKillRegState(true));
2035   MIB.setMemRefs(MMOs);
2036
2037   // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2038
2039   // Insert setup.
2040   MIB =
2041       BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2042
2043   const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2044   MIB.addRegMask(RegInfo->getNoPreservedMask());
2045   ThisMBB->addSuccessor(MainMBB);
2046   ThisMBB->addSuccessor(RestoreMBB);
2047
2048   // MainMBB:
2049   BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2050       .addImm(0)
2051       .addImm(0)
2052       .addImm(0);
2053   MainMBB->addSuccessor(SinkMBB);
2054
2055   // SinkMBB:
2056   BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2057       .addReg(MainDestReg)
2058       .addMBB(MainMBB)
2059       .addReg(RestoreDestReg)
2060       .addMBB(RestoreMBB);
2061
2062   // RestoreMBB:
2063   // Restore BP from buf[3] iff this function is using BP.  The address of
2064   // buf is in SX10.
2065   // FIXME: Better to not use SX10 here
2066   if (TFI->hasBP(*MF)) {
2067     MachineInstrBuilder MIB =
2068         BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2069     MIB.addReg(VE::SX10);
2070     MIB.addImm(0);
2071     MIB.addImm(24);
2072     MIB.setMemRefs(MMOs);
2073   }
2074   BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2075       .addImm(0)
2076       .addImm(0)
2077       .addImm(1);
2078   BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2079   RestoreMBB->addSuccessor(SinkMBB);
2080
2081   MI.eraseFromParent();
2082   return SinkMBB;
2083 }
2084
2085 MachineBasicBlock *
2086 VETargetLowering::emitEHSjLjLongJmp(MachineInstr &MI,
2087                                     MachineBasicBlock *MBB) const {
2088   DebugLoc DL = MI.getDebugLoc();
2089   MachineFunction *MF = MBB->getParent();
2090   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2091   MachineRegisterInfo &MRI = MF->getRegInfo();
2092
2093   // Memory Reference.
2094   SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2095                                            MI.memoperands_end());
2096   Register BufReg = MI.getOperand(0).getReg();
2097
2098   Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2099   // Since FP is only updated here but NOT referenced, it's treated as GPR.
2100   Register FP = VE::SX9;
2101   Register SP = VE::SX11;
2102
2103   MachineInstrBuilder MIB;
2104
2105   MachineBasicBlock *ThisMBB = MBB;
2106
2107   // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2108   //
2109   // ThisMBB:
2110   //   %fp = load buf[0]
2111   //   %jmp = load buf[1]
2112   //   %s10 = buf        ; Store an address of buf to SX10 for RestoreMBB
2113   //   %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2114   //   jmp %jmp
2115
2116   // Reload FP.
2117   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2118   MIB.addReg(BufReg);
2119   MIB.addImm(0);
2120   MIB.addImm(0);
2121   MIB.setMemRefs(MMOs);
2122
2123   // Reload IP.
2124   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2125   MIB.addReg(BufReg);
2126   MIB.addImm(0);
2127   MIB.addImm(8);
2128   MIB.setMemRefs(MMOs);
2129
2130   // Copy BufReg to SX10 for later use in setjmp.
2131   // FIXME: Better to not use SX10 here
2132   BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2133       .addReg(BufReg)
2134       .addImm(0);
2135
2136   // Reload SP.
2137   MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2138   MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2139   MIB.addImm(0);
2140   MIB.addImm(16);
2141   MIB.setMemRefs(MMOs);
2142
2143   // Jump.
2144   BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2145       .addReg(Tmp, getKillRegState(true))
2146       .addImm(0);
2147
2148   MI.eraseFromParent();
2149   return ThisMBB;
2150 }
2151
2152 MachineBasicBlock *
2153 VETargetLowering::emitSjLjDispatchBlock(MachineInstr &MI,
2154                                         MachineBasicBlock *BB) const {
2155   DebugLoc DL = MI.getDebugLoc();
2156   MachineFunction *MF = BB->getParent();
2157   MachineFrameInfo &MFI = MF->getFrameInfo();
2158   MachineRegisterInfo &MRI = MF->getRegInfo();
2159   const VEInstrInfo *TII = Subtarget->getInstrInfo();
2160   int FI = MFI.getFunctionContextIndex();
2161
2162   // Get a mapping of the call site numbers to all of the landing pads they're
2163   // associated with.
2164   DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad;
2165   unsigned MaxCSNum = 0;
2166   for (auto &MBB : *MF) {
2167     if (!MBB.isEHPad())
2168       continue;
2169
2170     MCSymbol *Sym = nullptr;
2171     for (const auto &MI : MBB) {
2172       if (MI.isDebugInstr())
2173         continue;
2174
2175       assert(MI.isEHLabel() && "expected EH_LABEL");
2176       Sym = MI.getOperand(0).getMCSymbol();
2177       break;
2178     }
2179
2180     if (!MF->hasCallSiteLandingPad(Sym))
2181       continue;
2182
2183     for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2184       CallSiteNumToLPad[CSI].push_back(&MBB);
2185       MaxCSNum = std::max(MaxCSNum, CSI);
2186     }
2187   }
2188
2189   // Get an ordered list of the machine basic blocks for the jump table.
2190   std::vector<MachineBasicBlock *> LPadList;
2191   SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs;
2192   LPadList.reserve(CallSiteNumToLPad.size());
2193
2194   for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2195     for (auto &LP : CallSiteNumToLPad[CSI]) {
2196       LPadList.push_back(LP);
2197       InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2198     }
2199   }
2200
2201   assert(!LPadList.empty() &&
2202          "No landing pad destinations for the dispatch jump table!");
2203
2204   // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2205   //   %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2206   //
2207   // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2208   // First `i64` is callsite, so callsite is FI+8.
2209   static const int OffsetIC = 72;
2210   static const int OffsetCS = 8;
2211
2212   // Create the MBBs for the dispatch code like following:
2213   //
2214   // ThisMBB:
2215   //   Prepare DispatchBB address and store it to buf[1].
2216   //   ...
2217   //
2218   // DispatchBB:
2219   //   %s15 = GETGOT iff isPositionIndependent
2220   //   %callsite = load callsite
2221   //   brgt.l.t #size of callsites, %callsite, DispContBB
2222   //
2223   // TrapBB:
2224   //   Call abort.
2225   //
2226   // DispContBB:
2227   //   %breg = address of jump table
2228   //   %pc = load and calculate next pc from %breg and %callsite
2229   //   jmp %pc
2230
2231   // Shove the dispatch's address into the return slot in the function context.
2232   MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2233   DispatchBB->setIsEHPad(true);
2234
2235   // Trap BB will causes trap like `assert(0)`.
2236   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
2237   DispatchBB->addSuccessor(TrapBB);
2238
2239   MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2240   DispatchBB->addSuccessor(DispContBB);
2241
2242   // Insert MBBs.
2243   MF->push_back(DispatchBB);
2244   MF->push_back(DispContBB);
2245   MF->push_back(TrapBB);
2246
2247   // Insert code to call abort in the TrapBB.
2248   Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2249                                  /* Local */ false, /* Call */ true);
2250   BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2251       .addReg(Abort, getKillRegState(true))
2252       .addImm(0)
2253       .addImm(0);
2254
2255   // Insert code into the entry block that creates and registers the function
2256   // context.
2257   setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2258
2259   // Create the jump table and associated information
2260   unsigned JTE = getJumpTableEncoding();
2261   MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE);
2262   unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2263
2264   const VERegisterInfo &RI = TII->getRegisterInfo();
2265   // Add a register mask with no preserved registers.  This results in all
2266   // registers being marked as clobbered.
2267   BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2268       .addRegMask(RI.getNoPreservedMask());
2269
2270   if (isPositionIndependent()) {
2271     // Force to generate GETGOT, since current implementation doesn't store GOT
2272     // register.
2273     BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2274   }
2275
2276   // IReg is used as an index in a memory operand and therefore can't be SP
2277   const TargetRegisterClass *RC = &VE::I64RegClass;
2278   Register IReg = MRI.createVirtualRegister(RC);
2279   addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2280                     OffsetCS);
2281   if (LPadList.size() < 64) {
2282     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2283         .addImm(VECC::CC_ILE)
2284         .addImm(LPadList.size())
2285         .addReg(IReg)
2286         .addMBB(TrapBB);
2287   } else {
2288     assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2289     Register TmpReg = MRI.createVirtualRegister(RC);
2290     BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2291         .addImm(0)
2292         .addImm(0)
2293         .addImm(LPadList.size());
2294     BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2295         .addImm(VECC::CC_ILE)
2296         .addReg(TmpReg, getKillRegState(true))
2297         .addReg(IReg)
2298         .addMBB(TrapBB);
2299   }
2300
2301   Register BReg = MRI.createVirtualRegister(RC);
2302   Register Tmp1 = MRI.createVirtualRegister(RC);
2303   Register Tmp2 = MRI.createVirtualRegister(RC);
2304
2305   if (isPositionIndependent()) {
2306     // Create following instructions for local linkage PIC code.
2307     //     lea    %Tmp1, .LJTI0_0@gotoff_lo
2308     //     and    %Tmp2, %Tmp1, (32)0
2309     //     lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2310     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2311         .addImm(0)
2312         .addImm(0)
2313         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_LO32);
2314     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2315         .addReg(Tmp1, getKillRegState(true))
2316         .addImm(M0(32));
2317     BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2318         .addReg(VE::SX15)
2319         .addReg(Tmp2, getKillRegState(true))
2320         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_GOTOFF_HI32);
2321   } else {
2322     // Create following instructions for non-PIC code.
2323     //     lea     %Tmp1, .LJTI0_0@lo
2324     //     and     %Tmp2, %Tmp1, (32)0
2325     //     lea.sl  %BReg, .LJTI0_0@hi(%Tmp2)
2326     BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2327         .addImm(0)
2328         .addImm(0)
2329         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_LO32);
2330     BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2331         .addReg(Tmp1, getKillRegState(true))
2332         .addImm(M0(32));
2333     BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2334         .addReg(Tmp2, getKillRegState(true))
2335         .addImm(0)
2336         .addJumpTableIndex(MJTI, VEMCExpr::VK_VE_HI32);
2337   }
2338
2339   switch (JTE) {
2340   case MachineJumpTableInfo::EK_BlockAddress: {
2341     // Generate simple block address code for no-PIC model.
2342     //     sll %Tmp1, %IReg, 3
2343     //     lds %TReg, 0(%Tmp1, %BReg)
2344     //     bcfla %TReg
2345
2346     Register TReg = MRI.createVirtualRegister(RC);
2347     Register Tmp1 = MRI.createVirtualRegister(RC);
2348
2349     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2350         .addReg(IReg, getKillRegState(true))
2351         .addImm(3);
2352     BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2353         .addReg(BReg, getKillRegState(true))
2354         .addReg(Tmp1, getKillRegState(true))
2355         .addImm(0);
2356     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2357         .addReg(TReg, getKillRegState(true))
2358         .addImm(0);
2359     break;
2360   }
2361   case MachineJumpTableInfo::EK_Custom32: {
2362     // Generate block address code using differences from the function pointer
2363     // for PIC model.
2364     //     sll %Tmp1, %IReg, 2
2365     //     ldl.zx %OReg, 0(%Tmp1, %BReg)
2366     //     Prepare function address in BReg2.
2367     //     adds.l %TReg, %BReg2, %OReg
2368     //     bcfla %TReg
2369
2370     assert(isPositionIndependent());
2371     Register OReg = MRI.createVirtualRegister(RC);
2372     Register TReg = MRI.createVirtualRegister(RC);
2373     Register Tmp1 = MRI.createVirtualRegister(RC);
2374
2375     BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2376         .addReg(IReg, getKillRegState(true))
2377         .addImm(2);
2378     BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2379         .addReg(BReg, getKillRegState(true))
2380         .addReg(Tmp1, getKillRegState(true))
2381         .addImm(0);
2382     Register BReg2 =
2383         prepareSymbol(*DispContBB, DispContBB->end(),
2384                       DispContBB->getParent()->getName(), DL, /* Local */ true);
2385     BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2386         .addReg(OReg, getKillRegState(true))
2387         .addReg(BReg2, getKillRegState(true));
2388     BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2389         .addReg(TReg, getKillRegState(true))
2390         .addImm(0);
2391     break;
2392   }
2393   default:
2394     llvm_unreachable("Unexpected jump table encoding");
2395   }
2396
2397   // Add the jump table entries as successors to the MBB.
2398   SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs;
2399   for (auto &LP : LPadList)
2400     if (SeenMBBs.insert(LP).second)
2401       DispContBB->addSuccessor(LP);
2402
2403   // N.B. the order the invoke BBs are processed in doesn't matter here.
2404   SmallVector<MachineBasicBlock *, 64> MBBLPads;
2405   const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2406   for (MachineBasicBlock *MBB : InvokeBBs) {
2407     // Remove the landing pad successor from the invoke block and replace it
2408     // with the new dispatch block.
2409     // Keep a copy of Successors since it's modified inside the loop.
2410     SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(),
2411                                                    MBB->succ_rend());
2412     // FIXME: Avoid quadratic complexity.
2413     for (auto MBBS : Successors) {
2414       if (MBBS->isEHPad()) {
2415         MBB->removeSuccessor(MBBS);
2416         MBBLPads.push_back(MBBS);
2417       }
2418     }
2419
2420     MBB->addSuccessor(DispatchBB);
2421
2422     // Find the invoke call and mark all of the callee-saved registers as
2423     // 'implicit defined' so that they're spilled.  This prevents code from
2424     // moving instructions to before the EH block, where they will never be
2425     // executed.
2426     for (auto &II : reverse(*MBB)) {
2427       if (!II.isCall())
2428         continue;
2429
2430       DenseMap<Register, bool> DefRegs;
2431       for (auto &MOp : II.operands())
2432         if (MOp.isReg())
2433           DefRegs[MOp.getReg()] = true;
2434
2435       MachineInstrBuilder MIB(*MF, &II);
2436       for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2437         Register Reg = SavedRegs[RI];
2438         if (!DefRegs[Reg])
2439           MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead);
2440       }
2441
2442       break;
2443     }
2444   }
2445
2446   // Mark all former landing pads as non-landing pads.  The dispatch is the only
2447   // landing pad now.
2448   for (auto &LP : MBBLPads)
2449     LP->setIsEHPad(false);
2450
2451   // The instruction is gone now.
2452   MI.eraseFromParent();
2453   return BB;
2454 }
2455
2456 MachineBasicBlock *
2457 VETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
2458                                               MachineBasicBlock *BB) const {
2459   switch (MI.getOpcode()) {
2460   default:
2461     llvm_unreachable("Unknown Custom Instruction!");
2462   case VE::EH_SjLj_LongJmp:
2463     return emitEHSjLjLongJmp(MI, BB);
2464   case VE::EH_SjLj_SetJmp:
2465     return emitEHSjLjSetJmp(MI, BB);
2466   case VE::EH_SjLj_Setup_Dispatch:
2467     return emitSjLjDispatchBlock(MI, BB);
2468   }
2469 }
2470
2471 static bool isI32Insn(const SDNode *User, const SDNode *N) {
2472   switch (User->getOpcode()) {
2473   default:
2474     return false;
2475   case ISD::ADD:
2476   case ISD::SUB:
2477   case ISD::MUL:
2478   case ISD::SDIV:
2479   case ISD::UDIV:
2480   case ISD::SETCC:
2481   case ISD::SMIN:
2482   case ISD::SMAX:
2483   case ISD::SHL:
2484   case ISD::SRA:
2485   case ISD::BSWAP:
2486   case ISD::SINT_TO_FP:
2487   case ISD::UINT_TO_FP:
2488   case ISD::BR_CC:
2489   case ISD::BITCAST:
2490   case ISD::ATOMIC_CMP_SWAP:
2491   case ISD::ATOMIC_SWAP:
2492     return true;
2493   case ISD::SRL:
2494     if (N->getOperand(0).getOpcode() != ISD::SRL)
2495       return true;
2496     // (srl (trunc (srl ...))) may be optimized by combining srl, so
2497     // doesn't optimize trunc now.
2498     return false;
2499   case ISD::SELECT_CC:
2500     if (User->getOperand(2).getNode() != N &&
2501         User->getOperand(3).getNode() != N)
2502       return true;
2503     LLVM_FALLTHROUGH;
2504   case ISD::AND:
2505   case ISD::OR:
2506   case ISD::XOR:
2507   case ISD::SELECT:
2508   case ISD::CopyToReg:
2509     // Check all use of selections, bit operations, and copies.  If all of them
2510     // are safe, optimize truncate to extract_subreg.
2511     for (SDNode::use_iterator UI = User->use_begin(), UE = User->use_end();
2512          UI != UE; ++UI) {
2513       switch ((*UI)->getOpcode()) {
2514       default:
2515         // If the use is an instruction which treats the source operand as i32,
2516         // it is safe to avoid truncate here.
2517         if (isI32Insn(*UI, N))
2518           continue;
2519         break;
2520       case ISD::ANY_EXTEND:
2521       case ISD::SIGN_EXTEND:
2522       case ISD::ZERO_EXTEND: {
2523         // Special optimizations to the combination of ext and trunc.
2524         // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2525         // since this truncate instruction clears higher 32 bits which is filled
2526         // by one of ext instructions later.
2527         assert(N->getValueType(0) == MVT::i32 &&
2528                "find truncate to not i32 integer");
2529         if (User->getOpcode() == ISD::SELECT_CC ||
2530             User->getOpcode() == ISD::SELECT)
2531           continue;
2532         break;
2533       }
2534       }
2535       return false;
2536     }
2537     return true;
2538   }
2539 }
2540
2541 // Optimize TRUNCATE in DAG combining.  Optimizing it in CUSTOM lower is
2542 // sometime too early.  Optimizing it in DAG pattern matching in VEInstrInfo.td
2543 // is sometime too late.  So, doing it at here.
2544 SDValue VETargetLowering::combineTRUNCATE(SDNode *N,
2545                                           DAGCombinerInfo &DCI) const {
2546   assert(N->getOpcode() == ISD::TRUNCATE &&
2547          "Should be called with a TRUNCATE node");
2548
2549   SelectionDAG &DAG = DCI.DAG;
2550   SDLoc DL(N);
2551   EVT VT = N->getValueType(0);
2552
2553   // We prefer to do this when all types are legal.
2554   if (!DCI.isAfterLegalizeDAG())
2555     return SDValue();
2556
2557   // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
2558   if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
2559       isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
2560       isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
2561     return SDValue();
2562
2563   // Check all use of this TRUNCATE.
2564   for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE;
2565        ++UI) {
2566     SDNode *User = *UI;
2567
2568     // Make sure that we're not going to replace TRUNCATE for non i32
2569     // instructions.
2570     //
2571     // FIXME: Although we could sometimes handle this, and it does occur in
2572     // practice that one of the condition inputs to the select is also one of
2573     // the outputs, we currently can't deal with this.
2574     if (isI32Insn(User, N))
2575       continue;
2576
2577     return SDValue();
2578   }
2579
2580   SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2581   return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
2582                                     N->getOperand(0), SubI32),
2583                  0);
2584 }
2585
2586 SDValue VETargetLowering::PerformDAGCombine(SDNode *N,
2587                                             DAGCombinerInfo &DCI) const {
2588   switch (N->getOpcode()) {
2589   default:
2590     break;
2591   case ISD::TRUNCATE:
2592     return combineTRUNCATE(N, DCI);
2593   }
2594
2595   return SDValue();
2596 }
2597
2598 //===----------------------------------------------------------------------===//
2599 // VE Inline Assembly Support
2600 //===----------------------------------------------------------------------===//
2601
2602 VETargetLowering::ConstraintType
2603 VETargetLowering::getConstraintType(StringRef Constraint) const {
2604   if (Constraint.size() == 1) {
2605     switch (Constraint[0]) {
2606     default:
2607       break;
2608     case 'v': // vector registers
2609       return C_RegisterClass;
2610     }
2611   }
2612   return TargetLowering::getConstraintType(Constraint);
2613 }
2614
2615 std::pair<unsigned, const TargetRegisterClass *>
2616 VETargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
2617                                                StringRef Constraint,
2618                                                MVT VT) const {
2619   const TargetRegisterClass *RC = nullptr;
2620   if (Constraint.size() == 1) {
2621     switch (Constraint[0]) {
2622     default:
2623       return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2624     case 'r':
2625       RC = &VE::I64RegClass;
2626       break;
2627     case 'v':
2628       RC = &VE::V64RegClass;
2629       break;
2630     }
2631     return std::make_pair(0U, RC);
2632   }
2633
2634   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
2635 }
2636
2637 //===----------------------------------------------------------------------===//
2638 // VE Target Optimization Support
2639 //===----------------------------------------------------------------------===//
2640
2641 unsigned VETargetLowering::getMinimumJumpTableEntries() const {
2642   // Specify 8 for PIC model to relieve the impact of PIC load instructions.
2643   if (isJumpTableRelative())
2644     return 8;
2645
2646   return TargetLowering::getMinimumJumpTableEntries();
2647 }
2648
2649 bool VETargetLowering::hasAndNot(SDValue Y) const {
2650   EVT VT = Y.getValueType();
2651
2652   // VE doesn't have vector and not instruction.
2653   if (VT.isVector())
2654     return false;
2655
2656   // VE allows different immediate values for X and Y where ~X & Y.
2657   // Only simm7 works for X, and only mimm works for Y on VE.  However, this
2658   // function is used to check whether an immediate value is OK for and-not
2659   // instruction as both X and Y.  Generating additional instruction to
2660   // retrieve an immediate value is no good since the purpose of this
2661   // function is to convert a series of 3 instructions to another series of
2662   // 3 instructions with better parallelism.  Therefore, we return false
2663   // for all immediate values now.
2664   // FIXME: Change hasAndNot function to have two operands to make it work
2665   //        correctly with Aurora VE.
2666   if (isa<ConstantSDNode>(Y))
2667     return false;
2668
2669   // It's ok for generic registers.
2670   return true;
2671 }
2672
2673 /// \returns the VVP_* SDNode opcode corresponsing to \p OC.
2674 static Optional<unsigned> getVVPOpcode(unsigned Opcode) {
2675   switch (Opcode) {
2676 #define HANDLE_VP_TO_VVP(VPOPC, VVPNAME)                                       \
2677   case ISD::VPOPC:                                                             \
2678     return VEISD::VVPNAME;
2679 #define ADD_VVP_OP(VVPNAME, SDNAME)                                            \
2680   case VEISD::VVPNAME:                                                         \
2681   case ISD::SDNAME:                                                            \
2682     return VEISD::VVPNAME;
2683 #include "VVPNodes.def"
2684   }
2685   return None;
2686 }
2687
2688 SDValue VETargetLowering::lowerToVVP(SDValue Op, SelectionDAG &DAG) const {
2689   // Can we represent this as a VVP node.
2690   const unsigned Opcode = Op->getOpcode();
2691   auto VVPOpcodeOpt = getVVPOpcode(Opcode);
2692   if (!VVPOpcodeOpt.hasValue())
2693     return SDValue();
2694   unsigned VVPOpcode = VVPOpcodeOpt.getValue();
2695   const bool FromVP = ISD::isVPOpcode(Opcode);
2696
2697   // The representative and legalized vector type of this operation.
2698   SDLoc DL(Op);
2699   MVT MaskVT = MVT::v256i1; // TODO: packed mode.
2700   EVT OpVecVT = Op.getValueType();
2701   EVT LegalVecVT = getTypeToTransformTo(*DAG.getContext(), OpVecVT);
2702
2703   SDValue AVL;
2704   SDValue Mask;
2705
2706   if (FromVP) {
2707     // All upstream VP SDNodes always have a mask and avl.
2708     auto MaskIdx = ISD::getVPMaskIdx(Opcode).getValue();
2709     auto AVLIdx = ISD::getVPExplicitVectorLengthIdx(Opcode).getValue();
2710     Mask = Op->getOperand(MaskIdx);
2711     AVL = Op->getOperand(AVLIdx);
2712
2713   } else {
2714     // Materialize the VL parameter.
2715     AVL = DAG.getConstant(OpVecVT.getVectorNumElements(), DL, MVT::i32);
2716     SDValue ConstTrue = DAG.getConstant(1, DL, MVT::i32);
2717     Mask = DAG.getNode(VEISD::VEC_BROADCAST, DL, MaskVT,
2718                        ConstTrue); // emit a VEISD::VEC_BROADCAST here.
2719   }
2720
2721   // Categories we are interested in.
2722   bool IsBinaryOp = false;
2723
2724   switch (VVPOpcode) {
2725 #define ADD_BINARY_VVP_OP(VVPNAME, ...)                                        \
2726   case VEISD::VVPNAME:                                                         \
2727     IsBinaryOp = true;                                                         \
2728     break;
2729 #include "VVPNodes.def"
2730   }
2731
2732   if (IsBinaryOp) {
2733     assert(LegalVecVT.isSimple());
2734     return DAG.getNode(VVPOpcode, DL, LegalVecVT, Op->getOperand(0),
2735                        Op->getOperand(1), Mask, AVL);
2736   }
2737   llvm_unreachable("lowerToVVP called for unexpected SDNode.");
2738 }
2739
2740 SDValue VETargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
2741                                                   SelectionDAG &DAG) const {
2742   assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
2743   MVT VT = Op.getOperand(0).getSimpleValueType();
2744
2745   // Special treatment for packed V64 types.
2746   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2747   (void)VT;
2748   // Example of codes:
2749   //   %packed_v = extractelt %vr, %idx / 2
2750   //   %v = %packed_v >> (%idx % 2 * 32)
2751   //   %res = %v & 0xffffffff
2752
2753   SDValue Vec = Op.getOperand(0);
2754   SDValue Idx = Op.getOperand(1);
2755   SDLoc DL(Op);
2756   SDValue Result = Op;
2757   if (0 /* Idx->isConstant() */) {
2758     // TODO: optimized implementation using constant values
2759   } else {
2760     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2761     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2762     SDValue PackedElt =
2763         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2764     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2765     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2766     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2767     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2768     PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
2769     SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
2770     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2771     SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
2772     Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
2773                                         MVT::i32, PackedElt, SubI32),
2774                      0);
2775
2776     if (Op.getSimpleValueType() == MVT::f32) {
2777       Result = DAG.getBitcast(MVT::f32, Result);
2778     } else {
2779       assert(Op.getSimpleValueType() == MVT::i32);
2780     }
2781   }
2782   return Result;
2783 }
2784
2785 SDValue VETargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
2786                                                  SelectionDAG &DAG) const {
2787   assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
2788   MVT VT = Op.getOperand(0).getSimpleValueType();
2789
2790   // Special treatment for packed V64 types.
2791   assert(VT == MVT::v512i32 || VT == MVT::v512f32);
2792   (void)VT;
2793   // The v512i32 and v512f32 starts from upper bits (0..31).  This "upper
2794   // bits" required `val << 32` from C implementation's point of view.
2795   //
2796   // Example of codes:
2797   //   %packed_elt = extractelt %vr, (%idx >> 1)
2798   //   %shift = ((%idx & 1) ^ 1) << 5
2799   //   %packed_elt &= 0xffffffff00000000 >> shift
2800   //   %packed_elt |= (zext %val) << shift
2801   //   %vr = insertelt %vr, %packed_elt, (%idx >> 1)
2802
2803   SDLoc DL(Op);
2804   SDValue Vec = Op.getOperand(0);
2805   SDValue Val = Op.getOperand(1);
2806   SDValue Idx = Op.getOperand(2);
2807   if (Idx.getSimpleValueType() == MVT::i32)
2808     Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
2809   if (Val.getSimpleValueType() == MVT::f32)
2810     Val = DAG.getBitcast(MVT::i32, Val);
2811   assert(Val.getSimpleValueType() == MVT::i32);
2812   Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
2813
2814   SDValue Result = Op;
2815   if (0 /* Idx->isConstant()*/) {
2816     // TODO: optimized implementation using constant values
2817   } else {
2818     SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
2819     SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
2820     SDValue PackedElt =
2821         SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
2822     SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
2823     SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
2824     SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
2825     Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
2826     SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
2827     Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
2828     PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
2829     Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
2830     PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
2831     Result =
2832         SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
2833                                    {HalfIdx, PackedElt, Vec}),
2834                 0);
2835   }
2836   return Result;
2837 }