lib/Target/CellSPU/SPUISelLowering.cpp

   1 //
   2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
   3 //                     The LLVM Compiler Infrastructure
   4 //
   5 // This file is distributed under the University of Illinois Open Source
   6 // License. See LICENSE.TXT for details.
   7 //
   8 //===----------------------------------------------------------------------===//
   9 //
  10 // This file implements the SPUTargetLowering class.
  11 //
  12 //===----------------------------------------------------------------------===//
  13
  14 #include "SPURegisterNames.h"
  15 #include "SPUISelLowering.h"
  16 #include "SPUTargetMachine.h"
  17 #include "SPUFrameInfo.h"
  18 #include "SPUMachineFunction.h"
  19 #include "llvm/Constants.h"
  20 #include "llvm/Function.h"
  21 #include "llvm/Intrinsics.h"
  22 #include "llvm/CallingConv.h"
  23 #include "llvm/Type.h"
  24 #include "llvm/CodeGen/CallingConvLower.h"
  25 #include "llvm/CodeGen/MachineFrameInfo.h"
  26 #include "llvm/CodeGen/MachineFunction.h"
  27 #include "llvm/CodeGen/MachineInstrBuilder.h"
  28 #include "llvm/CodeGen/MachineRegisterInfo.h"
  29 #include "llvm/CodeGen/SelectionDAG.h"
  30 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
  31 #include "llvm/Target/TargetOptions.h"
  32 #include "llvm/ADT/VectorExtras.h"
  33 #include "llvm/Support/Debug.h"
  34 #include "llvm/Support/ErrorHandling.h"
  35 #include "llvm/Support/MathExtras.h"
  36 #include "llvm/Support/raw_ostream.h"
  37 #include <map>
  38
  39 using namespace llvm;
  40
  41 // Used in getTargetNodeName() below
  42 namespace {
  43   std::map<unsigned, const char *> node_names;
  44
  45   //! EVT mapping to useful data for Cell SPU
  46   struct valtype_map_s {
  47     EVT   valtype;
  48     int   prefslot_byte;
  49   };
  50
  51   const valtype_map_s valtype_map[] = {
  52     { MVT::i1,   3 },
  53     { MVT::i8,   3 },
  54     { MVT::i16,  2 },
  55     { MVT::i32,  0 },
  56     { MVT::f32,  0 },
  57     { MVT::i64,  0 },
  58     { MVT::f64,  0 },
  59     { MVT::i128, 0 }
  60   };
  61
  62   const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
  63
  64   const valtype_map_s *getValueTypeMapEntry(EVT VT) {
  65     const valtype_map_s *retval = 0;
  66
  67     for (size_t i = 0; i < n_valtype_map; ++i) {
  68       if (valtype_map[i].valtype == VT) {
  69         retval = valtype_map + i;
  70         break;
  71       }
  72     }
  73
  74 #ifndef NDEBUG
  75     if (retval == 0) {
  76       report_fatal_error("getValueTypeMapEntry returns NULL for " +
  77                          Twine(VT.getEVTString()));
  78     }
  79 #endif
  80
  81     return retval;
  82   }
  83
  84   //! Expand a library call into an actual call DAG node
  85   /*!
  86    \note
  87    This code is taken from SelectionDAGLegalize, since it is not exposed as
  88    part of the LLVM SelectionDAG API.
  89    */
  90
  91   SDValue
  92   ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
  93                 bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
  94     // The input chain to this libcall is the entry node of the function.
  95     // Legalizing the call will automatically add the previous call to the
  96     // dependence.
  97     SDValue InChain = DAG.getEntryNode();
  98
  99     TargetLowering::ArgListTy Args;
 100     TargetLowering::ArgListEntry Entry;
 101     for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
 102       EVT ArgVT = Op.getOperand(i).getValueType();
 103       const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 104       Entry.Node = Op.getOperand(i);
 105       Entry.Ty = ArgTy;
 106       Entry.isSExt = isSigned;
 107       Entry.isZExt = !isSigned;
 108       Args.push_back(Entry);
 109     }
 110     SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
 111                                            TLI.getPointerTy());
 112
 113     // Splice the libcall in wherever FindInputOutputChains tells us to.
 114     const Type *RetTy =
 115                 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
 116     std::pair<SDValue, SDValue> CallInfo =
 117             TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
 118                             0, TLI.getLibcallCallingConv(LC), false,
 119                             /*isReturnValueUsed=*/true,
 120                             Callee, Args, DAG, Op.getDebugLoc());
 121
 122     return CallInfo.first;
 123   }
 124 }
 125
 126 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
 127   : TargetLowering(TM, new TargetLoweringObjectFileELF()),
 128     SPUTM(TM) {
 129   // Fold away setcc operations if possible.
 130   setPow2DivIsCheap();
 131
 132   // Use _setjmp/_longjmp instead of setjmp/longjmp.
 133   setUseUnderscoreSetJmp(true);
 134   setUseUnderscoreLongJmp(true);
 135
 136   // Set RTLIB libcall names as used by SPU:
 137   setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
 138
 139   // Set up the SPU's register classes:
 140   addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
 141   addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
 142   addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
 143   addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
 144   addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
 145   addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
 146   addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
 147
 148   // SPU has no sign or zero extended loads for i1, i8, i16:
 149   setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
 150   setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
 151   setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
 152
 153   setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
 154   setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
 155
 156   setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 157   setTruncStoreAction(MVT::i128, MVT::i32, Expand);
 158   setTruncStoreAction(MVT::i128, MVT::i16, Expand);
 159   setTruncStoreAction(MVT::i128, MVT::i8, Expand);
 160
 161   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 162
 163   // SPU constant load actions are custom lowered:
 164   setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
 165   setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
 166
 167   // SPU's loads and stores have to be custom lowered:
 168   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
 169        ++sctype) {
 170     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 171
 172     setOperationAction(ISD::LOAD,   VT, Custom);
 173     setOperationAction(ISD::STORE,  VT, Custom);
 174     setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
 175     setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
 176     setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
 177
 178     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
 179       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 180       setTruncStoreAction(VT, StoreVT, Expand);
 181     }
 182   }
 183
 184   for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
 185        ++sctype) {
 186     MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
 187
 188     setOperationAction(ISD::LOAD,   VT, Custom);
 189     setOperationAction(ISD::STORE,  VT, Custom);
 190
 191     for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
 192       MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
 193       setTruncStoreAction(VT, StoreVT, Expand);
 194     }
 195   }
 196
 197   // Expand the jumptable branches
 198   setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
 199   setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
 200
 201   // Custom lower SELECT_CC for most cases, but expand by default
 202   setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
 203   setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
 204   setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
 205   setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
 206   setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
 207
 208   // SPU has no intrinsics for these particular operations:
 209   setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
 210
 211   // SPU has no division/remainder instructions
 212   setOperationAction(ISD::SREM,    MVT::i8,   Expand);
 213   setOperationAction(ISD::UREM,    MVT::i8,   Expand);
 214   setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
 215   setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
 216   setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
 217   setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
 218   setOperationAction(ISD::SREM,    MVT::i16,  Expand);
 219   setOperationAction(ISD::UREM,    MVT::i16,  Expand);
 220   setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
 221   setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
 222   setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
 223   setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
 224   setOperationAction(ISD::SREM,    MVT::i32,  Expand);
 225   setOperationAction(ISD::UREM,    MVT::i32,  Expand);
 226   setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
 227   setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
 228   setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
 229   setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
 230   setOperationAction(ISD::SREM,    MVT::i64,  Expand);
 231   setOperationAction(ISD::UREM,    MVT::i64,  Expand);
 232   setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
 233   setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
 234   setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
 235   setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
 236   setOperationAction(ISD::SREM,    MVT::i128, Expand);
 237   setOperationAction(ISD::UREM,    MVT::i128, Expand);
 238   setOperationAction(ISD::SDIV,    MVT::i128, Expand);
 239   setOperationAction(ISD::UDIV,    MVT::i128, Expand);
 240   setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
 241   setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
 242
 243   // We don't support sin/cos/sqrt/fmod
 244   setOperationAction(ISD::FSIN , MVT::f64, Expand);
 245   setOperationAction(ISD::FCOS , MVT::f64, Expand);
 246   setOperationAction(ISD::FREM , MVT::f64, Expand);
 247   setOperationAction(ISD::FSIN , MVT::f32, Expand);
 248   setOperationAction(ISD::FCOS , MVT::f32, Expand);
 249   setOperationAction(ISD::FREM , MVT::f32, Expand);
 250
 251   // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
 252   // for f32!)
 253   setOperationAction(ISD::FSQRT, MVT::f64, Expand);
 254   setOperationAction(ISD::FSQRT, MVT::f32, Expand);
 255
 256   setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
 257   setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
 258
 259   // SPU can do rotate right and left, so legalize it... but customize for i8
 260   // because instructions don't exist.
 261
 262   // FIXME: Change from "expand" to appropriate type once ROTR is supported in
 263   //        .td files.
 264   setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
 265   setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
 266   setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
 267
 268   setOperationAction(ISD::ROTL, MVT::i32,    Legal);
 269   setOperationAction(ISD::ROTL, MVT::i16,    Legal);
 270   setOperationAction(ISD::ROTL, MVT::i8,     Custom);
 271
 272   // SPU has no native version of shift left/right for i8
 273   setOperationAction(ISD::SHL,  MVT::i8,     Custom);
 274   setOperationAction(ISD::SRL,  MVT::i8,     Custom);
 275   setOperationAction(ISD::SRA,  MVT::i8,     Custom);
 276
 277   // Make these operations legal and handle them during instruction selection:
 278   setOperationAction(ISD::SHL,  MVT::i64,    Legal);
 279   setOperationAction(ISD::SRL,  MVT::i64,    Legal);
 280   setOperationAction(ISD::SRA,  MVT::i64,    Legal);
 281
 282   // Custom lower i8, i32 and i64 multiplications
 283   setOperationAction(ISD::MUL,  MVT::i8,     Custom);
 284   setOperationAction(ISD::MUL,  MVT::i32,    Legal);
 285   setOperationAction(ISD::MUL,  MVT::i64,    Legal);
 286
 287   // Expand double-width multiplication
 288   // FIXME: It would probably be reasonable to support some of these operations
 289   setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
 290   setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
 291   setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
 292   setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
 293   setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
 294   setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
 295   setOperationAction(ISD::MULHU,     MVT::i16, Expand);
 296   setOperationAction(ISD::MULHS,     MVT::i16, Expand);
 297   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
 298   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
 299   setOperationAction(ISD::MULHU,     MVT::i32, Expand);
 300   setOperationAction(ISD::MULHS,     MVT::i32, Expand);
 301   setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
 302   setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
 303   setOperationAction(ISD::MULHU,     MVT::i64, Expand);
 304   setOperationAction(ISD::MULHS,     MVT::i64, Expand);
 305
 306   // Need to custom handle (some) common i8, i64 math ops
 307   setOperationAction(ISD::ADD,  MVT::i8,     Custom);
 308   setOperationAction(ISD::ADD,  MVT::i64,    Legal);
 309   setOperationAction(ISD::SUB,  MVT::i8,     Custom);
 310   setOperationAction(ISD::SUB,  MVT::i64,    Legal);
 311
 312   // SPU does not have BSWAP. It does have i32 support CTLZ.
 313   // CTPOP has to be custom lowered.
 314   setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
 315   setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
 316
 317   setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
 318   setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
 319   setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
 320   setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
 321   setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
 322
 323   setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
 324   setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
 325   setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
 326   setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
 327   setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
 328
 329   setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
 330   setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
 331   setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
 332   setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
 333   setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
 334
 335   // SPU has a version of select that implements (a&~c)|(b&c), just like
 336   // select ought to work:
 337   setOperationAction(ISD::SELECT, MVT::i8,   Legal);
 338   setOperationAction(ISD::SELECT, MVT::i16,  Legal);
 339   setOperationAction(ISD::SELECT, MVT::i32,  Legal);
 340   setOperationAction(ISD::SELECT, MVT::i64,  Legal);
 341
 342   setOperationAction(ISD::SETCC, MVT::i8,    Legal);
 343   setOperationAction(ISD::SETCC, MVT::i16,   Legal);
 344   setOperationAction(ISD::SETCC, MVT::i32,   Legal);
 345   setOperationAction(ISD::SETCC, MVT::i64,   Legal);
 346   setOperationAction(ISD::SETCC, MVT::f64,   Custom);
 347
 348   // Custom lower i128 -> i64 truncates
 349   setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
 350
 351   // Custom lower i32/i64 -> i128 sign extend
 352   setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
 353
 354   setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
 355   setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
 356   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
 357   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
 358   // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
 359   // to expand to a libcall, hence the custom lowering:
 360   setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
 361   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
 362   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
 363   setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
 364   setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
 365   setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
 366
 367   // FDIV on SPU requires custom lowering
 368   setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
 369
 370   // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
 371   setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
 372   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
 373   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
 374   setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
 375   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
 376   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
 377   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 378   setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
 379
 380   setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
 381   setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
 382   setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
 383   setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
 384
 385   // We cannot sextinreg(i1).  Expand to shifts.
 386   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
 387
 388   // We want to legalize GlobalAddress and ConstantPool nodes into the
 389   // appropriate instructions to materialize the address.
 390   for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
 391        ++sctype) {
 392     MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
 393
 394     setOperationAction(ISD::GlobalAddress,  VT, Custom);
 395     setOperationAction(ISD::ConstantPool,   VT, Custom);
 396     setOperationAction(ISD::JumpTable,      VT, Custom);
 397   }
 398
 399   // VASTART needs to be custom lowered to use the VarArgsFrameIndex
 400   setOperationAction(ISD::VASTART           , MVT::Other, Custom);
 401
 402   // Use the default implementation.
 403   setOperationAction(ISD::VAARG             , MVT::Other, Expand);
 404   setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
 405   setOperationAction(ISD::VAEND             , MVT::Other, Expand);
 406   setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
 407   setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
 408   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
 409   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
 410
 411   // Cell SPU has instructions for converting between i64 and fp.
 412   setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
 413   setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
 414
 415   // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
 416   setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
 417
 418   // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
 419   setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
 420
 421   // First set operation action for all vector types to expand. Then we
 422   // will selectively turn on ones that can be effectively codegen'd.
 423   addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
 424   addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
 425   addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
 426   addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
 427   addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
 428   addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
 429
 430   for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
 431        i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
 432     MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
 433
 434     // add/sub are legal for all supported vector VT's.
 435     setOperationAction(ISD::ADD,     VT, Legal);
 436     setOperationAction(ISD::SUB,     VT, Legal);
 437     // mul has to be custom lowered.
 438     setOperationAction(ISD::MUL,     VT, Legal);
 439
 440     setOperationAction(ISD::AND,     VT, Legal);
 441     setOperationAction(ISD::OR,      VT, Legal);
 442     setOperationAction(ISD::XOR,     VT, Legal);
 443     setOperationAction(ISD::LOAD,    VT, Legal);
 444     setOperationAction(ISD::SELECT,  VT, Legal);
 445     setOperationAction(ISD::STORE,   VT, Legal);
 446
 447     // These operations need to be expanded:
 448     setOperationAction(ISD::SDIV,    VT, Expand);
 449     setOperationAction(ISD::SREM,    VT, Expand);
 450     setOperationAction(ISD::UDIV,    VT, Expand);
 451     setOperationAction(ISD::UREM,    VT, Expand);
 452
 453     // Custom lower build_vector, constant pool spills, insert and
 454     // extract vector elements:
 455     setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
 456     setOperationAction(ISD::ConstantPool, VT, Custom);
 457     setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
 458     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
 459     setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
 460     setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
 461   }
 462
 463   setOperationAction(ISD::AND, MVT::v16i8, Custom);
 464   setOperationAction(ISD::OR,  MVT::v16i8, Custom);
 465   setOperationAction(ISD::XOR, MVT::v16i8, Custom);
 466   setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
 467
 468   setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
 469
 470   setShiftAmountType(MVT::i32);
 471   setBooleanContents(ZeroOrNegativeOneBooleanContent);
 472
 473   setStackPointerRegisterToSaveRestore(SPU::R1);
 474
 475   // We have target-specific dag combine patterns for the following nodes:
 476   setTargetDAGCombine(ISD::ADD);
 477   setTargetDAGCombine(ISD::ZERO_EXTEND);
 478   setTargetDAGCombine(ISD::SIGN_EXTEND);
 479   setTargetDAGCombine(ISD::ANY_EXTEND);
 480
 481   computeRegisterProperties();
 482
 483   // Set pre-RA register scheduler default to BURR, which produces slightly
 484   // better code than the default (could also be TDRR, but TargetLowering.h
 485   // needs a mod to support that model):
 486   setSchedulingPreference(Sched::RegPressure);
 487 }
 488
 489 const char *
 490 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
 491 {
 492   if (node_names.empty()) {
 493     node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
 494     node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
 495     node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
 496     node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
 497     node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
 498     node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
 499     node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
 500     node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
 501     node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
 502     node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
 503     node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
 504     node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
 505     node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
 506     node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
 507     node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
 508     node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
 509     node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
 510     node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
 511     node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
 512             "SPUISD::ROTBYTES_LEFT_BITS";
 513     node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
 514     node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
 515     node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
 516     node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
 517     node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
 518   }
 519
 520   std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
 521
 522   return ((i != node_names.end()) ? i->second : 0);
 523 }
 524
 525 /// getFunctionAlignment - Return the Log2 alignment of this function.
 526 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
 527   return 3;
 528 }
 529
 530 //===----------------------------------------------------------------------===//
 531 // Return the Cell SPU's SETCC result type
 532 //===----------------------------------------------------------------------===//
 533
 534 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
 535   // i16 and i32 are valid SETCC result types
 536   return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
 537     VT.getSimpleVT().SimpleTy :
 538     MVT::i32);
 539 }
 540
 541 //===----------------------------------------------------------------------===//
 542 // Calling convention code:
 543 //===----------------------------------------------------------------------===//
 544
 545 #include "SPUGenCallingConv.inc"
 546
 547 //===----------------------------------------------------------------------===//
 548 //  LowerOperation implementation
 549 //===----------------------------------------------------------------------===//
 550
 551 /// Custom lower loads for CellSPU
 552 /*!
 553  All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
 554  within a 16-byte block, we have to rotate to extract the requested element.
 555
 556  For extending loads, we also want to ensure that the following sequence is
 557  emitted, e.g. for MVT::f32 extending load to MVT::f64:
 558
 559 \verbatim
 560 %1  v16i8,ch = load
 561 %2  v16i8,ch = rotate %1
 562 %3  v4f8, ch = bitconvert %2
 563 %4  f32      = vec2perfslot %3
 564 %5  f64      = fp_extend %4
 565 \endverbatim
 566 */
 567 static SDValue
 568 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 569   LoadSDNode *LN = cast<LoadSDNode>(Op);
 570   SDValue the_chain = LN->getChain();
 571   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 572   EVT InVT = LN->getMemoryVT();
 573   EVT OutVT = Op.getValueType();
 574   ISD::LoadExtType ExtType = LN->getExtensionType();
 575   unsigned alignment = LN->getAlignment();
 576   const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
 577   DebugLoc dl = Op.getDebugLoc();
 578
 579   switch (LN->getAddressingMode()) {
 580   case ISD::UNINDEXED: {
 581     SDValue result;
 582     SDValue basePtr = LN->getBasePtr();
 583     SDValue rotate;
 584
 585     if (alignment == 16) {
 586       ConstantSDNode *CN;
 587
 588       // Special cases for a known aligned load to simplify the base pointer
 589       // and the rotation amount:
 590       if (basePtr.getOpcode() == ISD::ADD
 591           && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
 592         // Known offset into basePtr
 593         int64_t offset = CN->getSExtValue();
 594         int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
 595
 596         if (rotamt < 0)
 597           rotamt += 16;
 598
 599         rotate = DAG.getConstant(rotamt, MVT::i16);
 600
 601         // Simplify the base pointer for this case:
 602         basePtr = basePtr.getOperand(0);
 603         if ((offset & ~0xf) > 0) {
 604           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 605                                 basePtr,
 606                                 DAG.getConstant((offset & ~0xf), PtrVT));
 607         }
 608       } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
 609                  || (basePtr.getOpcode() == SPUISD::IndirectAddr
 610                      && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
 611                      && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
 612         // Plain aligned a-form address: rotate into preferred slot
 613         // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
 614         int64_t rotamt = -vtm->prefslot_byte;
 615         if (rotamt < 0)
 616           rotamt += 16;
 617         rotate = DAG.getConstant(rotamt, MVT::i16);
 618       } else {
 619         // Offset the rotate amount by the basePtr and the preferred slot
 620         // byte offset
 621         int64_t rotamt = -vtm->prefslot_byte;
 622         if (rotamt < 0)
 623           rotamt += 16;
 624         rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 625                              basePtr,
 626                              DAG.getConstant(rotamt, PtrVT));
 627       }
 628     } else {
 629       // Unaligned load: must be more pessimistic about addressing modes:
 630       if (basePtr.getOpcode() == ISD::ADD) {
 631         MachineFunction &MF = DAG.getMachineFunction();
 632         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 633         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 634         SDValue Flag;
 635
 636         SDValue Op0 = basePtr.getOperand(0);
 637         SDValue Op1 = basePtr.getOperand(1);
 638
 639         if (isa<ConstantSDNode>(Op1)) {
 640           // Convert the (add <ptr>, <const>) to an indirect address contained
 641           // in a register. Note that this is done because we need to avoid
 642           // creating a 0(reg) d-form address due to the SPU's block loads.
 643           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 644           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 645           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 646         } else {
 647           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 648           // will likely be lowered as a reg(reg) x-form address.
 649           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 650         }
 651       } else {
 652         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 653                               basePtr,
 654                               DAG.getConstant(0, PtrVT));
 655       }
 656
 657       // Offset the rotate amount by the basePtr and the preferred slot
 658       // byte offset
 659       rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
 660                            basePtr,
 661                            DAG.getConstant(-vtm->prefslot_byte, PtrVT));
 662     }
 663
 664     // Re-emit as a v16i8 vector load
 665     result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
 666                          LN->getPointerInfo(),
 667                          LN->isVolatile(), LN->isNonTemporal(), 16);
 668
 669     // Update the chain
 670     the_chain = result.getValue(1);
 671
 672     // Rotate into the preferred slot:
 673     result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
 674                          result.getValue(0), rotate);
 675
 676     // Convert the loaded v16i8 vector to the appropriate vector type
 677     // specified by the operand:
 678     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 679                                  InVT, (128 / InVT.getSizeInBits()));
 680     result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
 681                          DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
 682
 683     // Handle extending loads by extending the scalar result:
 684     if (ExtType == ISD::SEXTLOAD) {
 685       result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
 686     } else if (ExtType == ISD::ZEXTLOAD) {
 687       result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
 688     } else if (ExtType == ISD::EXTLOAD) {
 689       unsigned NewOpc = ISD::ANY_EXTEND;
 690
 691       if (OutVT.isFloatingPoint())
 692         NewOpc = ISD::FP_EXTEND;
 693
 694       result = DAG.getNode(NewOpc, dl, OutVT, result);
 695     }
 696
 697     SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
 698     SDValue retops[2] = {
 699       result,
 700       the_chain
 701     };
 702
 703     result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
 704                          retops, sizeof(retops) / sizeof(retops[0]));
 705     return result;
 706   }
 707   case ISD::PRE_INC:
 708   case ISD::PRE_DEC:
 709   case ISD::POST_INC:
 710   case ISD::POST_DEC:
 711   case ISD::LAST_INDEXED_MODE:
 712     {
 713       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 714                          "than UNINDEXED\n" +
 715                          Twine((unsigned)LN->getAddressingMode()));
 716       /*NOTREACHED*/
 717     }
 718   }
 719
 720   return SDValue();
 721 }
 722
 723 /// Custom lower stores for CellSPU
 724 /*!
 725  All CellSPU stores are aligned to 16-byte boundaries, so for elements
 726  within a 16-byte block, we have to generate a shuffle to insert the
 727  requested element into its place, then store the resulting block.
 728  */
 729 static SDValue
 730 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 731   StoreSDNode *SN = cast<StoreSDNode>(Op);
 732   SDValue Value = SN->getValue();
 733   EVT VT = Value.getValueType();
 734   EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
 735   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
 736   DebugLoc dl = Op.getDebugLoc();
 737   unsigned alignment = SN->getAlignment();
 738
 739   switch (SN->getAddressingMode()) {
 740   case ISD::UNINDEXED: {
 741     // The vector type we really want to load from the 16-byte chunk.
 742     EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
 743                                  VT, (128 / VT.getSizeInBits()));
 744
 745     SDValue alignLoadVec;
 746     SDValue basePtr = SN->getBasePtr();
 747     SDValue the_chain = SN->getChain();
 748     SDValue insertEltOffs;
 749
 750     if (alignment == 16) {
 751       ConstantSDNode *CN;
 752       // Special cases for a known aligned load to simplify the base pointer
 753       // and insertion byte:
 754       if (basePtr.getOpcode() == ISD::ADD
 755           && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
 756         // Known offset into basePtr
 757         int64_t offset = CN->getSExtValue();
 758
 759         // Simplify the base pointer for this case:
 760         basePtr = basePtr.getOperand(0);
 761         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 762                                     basePtr,
 763                                     DAG.getConstant((offset & 0xf), PtrVT));
 764
 765         if ((offset & ~0xf) > 0) {
 766           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 767                                 basePtr,
 768                                 DAG.getConstant((offset & ~0xf), PtrVT));
 769         }
 770       } else {
 771         // Otherwise, assume it's at byte 0 of basePtr
 772         insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 773                                     basePtr,
 774                                     DAG.getConstant(0, PtrVT));
 775         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 776                                     basePtr,
 777                                     DAG.getConstant(0, PtrVT));
 778       }
 779     } else {
 780       // Unaligned load: must be more pessimistic about addressing modes:
 781       if (basePtr.getOpcode() == ISD::ADD) {
 782         MachineFunction &MF = DAG.getMachineFunction();
 783         MachineRegisterInfo &RegInfo = MF.getRegInfo();
 784         unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
 785         SDValue Flag;
 786
 787         SDValue Op0 = basePtr.getOperand(0);
 788         SDValue Op1 = basePtr.getOperand(1);
 789
 790         if (isa<ConstantSDNode>(Op1)) {
 791           // Convert the (add <ptr>, <const>) to an indirect address contained
 792           // in a register. Note that this is done because we need to avoid
 793           // creating a 0(reg) d-form address due to the SPU's block loads.
 794           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 795           the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
 796           basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
 797         } else {
 798           // Convert the (add <arg1>, <arg2>) to an indirect address, which
 799           // will likely be lowered as a reg(reg) x-form address.
 800           basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
 801         }
 802       } else {
 803         basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
 804                               basePtr,
 805                               DAG.getConstant(0, PtrVT));
 806       }
 807
 808       // Insertion point is solely determined by basePtr's contents
 809       insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
 810                                   basePtr,
 811                                   DAG.getConstant(0, PtrVT));
 812     }
 813
 814     // Load the memory to which to store.
 815     alignLoadVec = DAG.getLoad(vecVT, dl, the_chain, basePtr,
 816                                SN->getPointerInfo(),
 817                                SN->isVolatile(), SN->isNonTemporal(), 16);
 818
 819     // Update the chain
 820     the_chain = alignLoadVec.getValue(1);
 821
 822     LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
 823     SDValue theValue = SN->getValue();
 824     SDValue result;
 825
 826     if (StVT != VT
 827         && (theValue.getOpcode() == ISD::AssertZext
 828             || theValue.getOpcode() == ISD::AssertSext)) {
 829       // Drill down and get the value for zero- and sign-extended
 830       // quantities
 831       theValue = theValue.getOperand(0);
 832     }
 833
 834     // If the base pointer is already a D-form address, then just create
 835     // a new D-form address with a slot offset and the orignal base pointer.
 836     // Otherwise generate a D-form address with the slot offset relative
 837     // to the stack pointer, which is always aligned.
 838 #if !defined(NDEBUG)
 839       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 840         errs() << "CellSPU LowerSTORE: basePtr = ";
 841         basePtr.getNode()->dump(&DAG);
 842         errs() << "\n";
 843       }
 844 #endif
 845
 846     SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
 847                                       insertEltOffs);
 848     SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
 849                                       theValue);
 850
 851     result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
 852                          vectorizeOp, alignLoadVec,
 853                          DAG.getNode(ISD::BIT_CONVERT, dl,
 854                                      MVT::v4i32, insertEltOp));
 855
 856     result = DAG.getStore(the_chain, dl, result, basePtr,
 857                           LN->getPointerInfo(),
 858                           LN->isVolatile(), LN->isNonTemporal(),
 859                           LN->getAlignment());
 860
 861 #if 0 && !defined(NDEBUG)
 862     if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
 863       const SDValue &currentRoot = DAG.getRoot();
 864
 865       DAG.setRoot(result);
 866       errs() << "------- CellSPU:LowerStore result:\n";
 867       DAG.dump();
 868       errs() << "-------\n";
 869       DAG.setRoot(currentRoot);
 870     }
 871 #endif
 872
 873     return result;
 874     /*UNREACHED*/
 875   }
 876   case ISD::PRE_INC:
 877   case ISD::PRE_DEC:
 878   case ISD::POST_INC:
 879   case ISD::POST_DEC:
 880   case ISD::LAST_INDEXED_MODE:
 881     {
 882       report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
 883                          "than UNINDEXED\n" +
 884                          Twine((unsigned)SN->getAddressingMode()));
 885       /*NOTREACHED*/
 886     }
 887   }
 888
 889   return SDValue();
 890 }
 891
 892 //! Generate the address of a constant pool entry.
 893 static SDValue
 894 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 895   EVT PtrVT = Op.getValueType();
 896   ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
 897   const Constant *C = CP->getConstVal();
 898   SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
 899   SDValue Zero = DAG.getConstant(0, PtrVT);
 900   const TargetMachine &TM = DAG.getTarget();
 901   // FIXME there is no actual debug info here
 902   DebugLoc dl = Op.getDebugLoc();
 903
 904   if (TM.getRelocationModel() == Reloc::Static) {
 905     if (!ST->usingLargeMem()) {
 906       // Just return the SDValue with the constant pool address in it.
 907       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
 908     } else {
 909       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
 910       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
 911       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 912     }
 913   }
 914
 915   llvm_unreachable("LowerConstantPool: Relocation model other than static"
 916                    " not supported.");
 917   return SDValue();
 918 }
 919
 920 //! Alternate entry point for generating the address of a constant pool entry
 921 SDValue
 922 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
 923   return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
 924 }
 925
 926 static SDValue
 927 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 928   EVT PtrVT = Op.getValueType();
 929   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
 930   SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
 931   SDValue Zero = DAG.getConstant(0, PtrVT);
 932   const TargetMachine &TM = DAG.getTarget();
 933   // FIXME there is no actual debug info here
 934   DebugLoc dl = Op.getDebugLoc();
 935
 936   if (TM.getRelocationModel() == Reloc::Static) {
 937     if (!ST->usingLargeMem()) {
 938       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
 939     } else {
 940       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
 941       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
 942       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 943     }
 944   }
 945
 946   llvm_unreachable("LowerJumpTable: Relocation model other than static"
 947                    " not supported.");
 948   return SDValue();
 949 }
 950
 951 static SDValue
 952 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
 953   EVT PtrVT = Op.getValueType();
 954   GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
 955   const GlobalValue *GV = GSDN->getGlobal();
 956   SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
 957                                           PtrVT, GSDN->getOffset());
 958   const TargetMachine &TM = DAG.getTarget();
 959   SDValue Zero = DAG.getConstant(0, PtrVT);
 960   // FIXME there is no actual debug info here
 961   DebugLoc dl = Op.getDebugLoc();
 962
 963   if (TM.getRelocationModel() == Reloc::Static) {
 964     if (!ST->usingLargeMem()) {
 965       return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
 966     } else {
 967       SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
 968       SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
 969       return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
 970     }
 971   } else {
 972     report_fatal_error("LowerGlobalAddress: Relocation model other than static"
 973                       "not supported.");
 974     /*NOTREACHED*/
 975   }
 976
 977   return SDValue();
 978 }
 979
 980 //! Custom lower double precision floating point constants
 981 static SDValue
 982 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
 983   EVT VT = Op.getValueType();
 984   // FIXME there is no actual debug info here
 985   DebugLoc dl = Op.getDebugLoc();
 986
 987   if (VT == MVT::f64) {
 988     ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
 989
 990     assert((FP != 0) &&
 991            "LowerConstantFP: Node is not ConstantFPSDNode");
 992
 993     uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
 994     SDValue T = DAG.getConstant(dbits, MVT::i64);
 995     SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
 996     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
 997                        DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
 998   }
 999
1000   return SDValue();
1001 }
1002
1003 SDValue
1004 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1005                                         CallingConv::ID CallConv, bool isVarArg,
1006                                         const SmallVectorImpl<ISD::InputArg>
1007                                           &Ins,
1008                                         DebugLoc dl, SelectionDAG &DAG,
1009                                         SmallVectorImpl<SDValue> &InVals)
1010                                           const {
1011
1012   MachineFunction &MF = DAG.getMachineFunction();
1013   MachineFrameInfo *MFI = MF.getFrameInfo();
1014   MachineRegisterInfo &RegInfo = MF.getRegInfo();
1015   SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1016
1017   unsigned ArgOffset = SPUFrameInfo::minStackSize();
1018   unsigned ArgRegIdx = 0;
1019   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1020
1021   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1022
1023   SmallVector<CCValAssign, 16> ArgLocs;
1024   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1025                  *DAG.getContext());
1026   // FIXME: allow for other calling conventions
1027   CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1028
1029   // Add DAG nodes to load the arguments or copy them out of registers.
1030   for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1031     EVT ObjectVT = Ins[ArgNo].VT;
1032     unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1033     SDValue ArgVal;
1034     CCValAssign &VA = ArgLocs[ArgNo];
1035
1036     if (VA.isRegLoc()) {
1037       const TargetRegisterClass *ArgRegClass;
1038
1039       switch (ObjectVT.getSimpleVT().SimpleTy) {
1040       default:
1041         report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1042                            Twine(ObjectVT.getEVTString()));
1043       case MVT::i8:
1044         ArgRegClass = &SPU::R8CRegClass;
1045         break;
1046       case MVT::i16:
1047         ArgRegClass = &SPU::R16CRegClass;
1048         break;
1049       case MVT::i32:
1050         ArgRegClass = &SPU::R32CRegClass;
1051         break;
1052       case MVT::i64:
1053         ArgRegClass = &SPU::R64CRegClass;
1054         break;
1055       case MVT::i128:
1056         ArgRegClass = &SPU::GPRCRegClass;
1057         break;
1058       case MVT::f32:
1059         ArgRegClass = &SPU::R32FPRegClass;
1060         break;
1061       case MVT::f64:
1062         ArgRegClass = &SPU::R64FPRegClass;
1063         break;
1064       case MVT::v2f64:
1065       case MVT::v4f32:
1066       case MVT::v2i64:
1067       case MVT::v4i32:
1068       case MVT::v8i16:
1069       case MVT::v16i8:
1070         ArgRegClass = &SPU::VECREGRegClass;
1071         break;
1072       }
1073
1074       unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1075       RegInfo.addLiveIn(VA.getLocReg(), VReg);
1076       ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1077       ++ArgRegIdx;
1078     } else {
1079       // We need to load the argument to a virtual register if we determined
1080       // above that we ran out of physical registers of the appropriate type
1081       // or we're forced to do vararg
1082       int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1083       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1084       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
1085                            false, false, 0);
1086       ArgOffset += StackSlotSize;
1087     }
1088
1089     InVals.push_back(ArgVal);
1090     // Update the chain
1091     Chain = ArgVal.getOperand(0);
1092   }
1093
1094   // vararg handling:
1095   if (isVarArg) {
1096     // FIXME: we should be able to query the argument registers from
1097     //        tablegen generated code.
1098     static const unsigned ArgRegs[] = {
1099       SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
1100       SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1101       SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1102       SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1103       SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1104       SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1105       SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1106       SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1107       SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1108       SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1109       SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1110     };
1111     // size of ArgRegs array
1112     unsigned NumArgRegs = 77;
1113
1114     // We will spill (79-3)+1 registers to the stack
1115     SmallVector<SDValue, 79-3+1> MemOps;
1116
1117     // Create the frame slot
1118     for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1119       FuncInfo->setVarArgsFrameIndex(
1120         MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1121       SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1122       unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1123       SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1124       SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
1125                                    false, false, 0);
1126       Chain = Store.getOperand(0);
1127       MemOps.push_back(Store);
1128
1129       // Increment address by stack slot size for the next stored argument
1130       ArgOffset += StackSlotSize;
1131     }
1132     if (!MemOps.empty())
1133       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1134                           &MemOps[0], MemOps.size());
1135   }
1136
1137   return Chain;
1138 }
1139
1140 /// isLSAAddress - Return the immediate to use if the specified
1141 /// value is representable as a LSA address.
1142 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1143   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1144   if (!C) return 0;
1145
1146   int Addr = C->getZExtValue();
1147   if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1148       (Addr << 14 >> 14) != Addr)
1149     return 0;  // Top 14 bits have to be sext of immediate.
1150
1151   return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1152 }
1153
1154 SDValue
1155 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1156                              CallingConv::ID CallConv, bool isVarArg,
1157                              bool &isTailCall,
1158                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1159                              const SmallVectorImpl<SDValue> &OutVals,
1160                              const SmallVectorImpl<ISD::InputArg> &Ins,
1161                              DebugLoc dl, SelectionDAG &DAG,
1162                              SmallVectorImpl<SDValue> &InVals) const {
1163   // CellSPU target does not yet support tail call optimization.
1164   isTailCall = false;
1165
1166   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1167   unsigned NumOps     = Outs.size();
1168   unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1169
1170   SmallVector<CCValAssign, 16> ArgLocs;
1171   CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1172                  *DAG.getContext());
1173   // FIXME: allow for other calling conventions
1174   CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1175
1176   const unsigned NumArgRegs = ArgLocs.size();
1177
1178
1179   // Handy pointer type
1180   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1181
1182   // Set up a copy of the stack pointer for use loading and storing any
1183   // arguments that may not fit in the registers available for argument
1184   // passing.
1185   SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1186
1187   // Figure out which arguments are going to go in registers, and which in
1188   // memory.
1189   unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1190   unsigned ArgRegIdx = 0;
1191
1192   // Keep track of registers passing arguments
1193   std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1194   // And the arguments passed on the stack
1195   SmallVector<SDValue, 8> MemOpChains;
1196
1197   for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1198     SDValue Arg = OutVals[ArgRegIdx];
1199     CCValAssign &VA = ArgLocs[ArgRegIdx];
1200
1201     // PtrOff will be used to store the current argument to the stack if a
1202     // register cannot be found for it.
1203     SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1204     PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1205
1206     switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1207     default: llvm_unreachable("Unexpected ValueType for argument!");
1208     case MVT::i8:
1209     case MVT::i16:
1210     case MVT::i32:
1211     case MVT::i64:
1212     case MVT::i128:
1213     case MVT::f32:
1214     case MVT::f64:
1215     case MVT::v2i64:
1216     case MVT::v2f64:
1217     case MVT::v4f32:
1218     case MVT::v4i32:
1219     case MVT::v8i16:
1220     case MVT::v16i8:
1221       if (ArgRegIdx != NumArgRegs) {
1222         RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1223       } else {
1224         MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1225                                            MachinePointerInfo(),
1226                                            false, false, 0));
1227         ArgOffset += StackSlotSize;
1228       }
1229       break;
1230     }
1231   }
1232
1233   // Accumulate how many bytes are to be pushed on the stack, including the
1234   // linkage area, and parameter passing area.  According to the SPU ABI,
1235   // we minimally need space for [LR] and [SP].
1236   unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1237
1238   // Insert a call sequence start
1239   Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1240                                                             true));
1241
1242   if (!MemOpChains.empty()) {
1243     // Adjust the stack pointer for the stack arguments.
1244     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1245                         &MemOpChains[0], MemOpChains.size());
1246   }
1247
1248   // Build a sequence of copy-to-reg nodes chained together with token chain
1249   // and flag operands which copy the outgoing args into the appropriate regs.
1250   SDValue InFlag;
1251   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1252     Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1253                              RegsToPass[i].second, InFlag);
1254     InFlag = Chain.getValue(1);
1255   }
1256
1257   SmallVector<SDValue, 8> Ops;
1258   unsigned CallOpc = SPUISD::CALL;
1259
1260   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1261   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1262   // node so that legalize doesn't hack it.
1263   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1264     const GlobalValue *GV = G->getGlobal();
1265     EVT CalleeVT = Callee.getValueType();
1266     SDValue Zero = DAG.getConstant(0, PtrVT);
1267     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1268
1269     if (!ST->usingLargeMem()) {
1270       // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1271       // style calls, otherwise, external symbols are BRASL calls. This assumes
1272       // that declared/defined symbols are in the same compilation unit and can
1273       // be reached through PC-relative jumps.
1274       //
1275       // NOTE:
1276       // This may be an unsafe assumption for JIT and really large compilation
1277       // units.
1278       if (GV->isDeclaration()) {
1279         Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1280       } else {
1281         Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1282       }
1283     } else {
1284       // "Large memory" mode: Turn all calls into indirect calls with a X-form
1285       // address pairs:
1286       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1287     }
1288   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1289     EVT CalleeVT = Callee.getValueType();
1290     SDValue Zero = DAG.getConstant(0, PtrVT);
1291     SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1292         Callee.getValueType());
1293
1294     if (!ST->usingLargeMem()) {
1295       Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1296     } else {
1297       Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1298     }
1299   } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1300     // If this is an absolute destination address that appears to be a legal
1301     // local store address, use the munged value.
1302     Callee = SDValue(Dest, 0);
1303   }
1304
1305   Ops.push_back(Chain);
1306   Ops.push_back(Callee);
1307
1308   // Add argument registers to the end of the list so that they are known live
1309   // into the call.
1310   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1311     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1312                                   RegsToPass[i].second.getValueType()));
1313
1314   if (InFlag.getNode())
1315     Ops.push_back(InFlag);
1316   // Returns a chain and a flag for retval copy to use.
1317   Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1318                       &Ops[0], Ops.size());
1319   InFlag = Chain.getValue(1);
1320
1321   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1322                              DAG.getIntPtrConstant(0, true), InFlag);
1323   if (!Ins.empty())
1324     InFlag = Chain.getValue(1);
1325
1326   // If the function returns void, just return the chain.
1327   if (Ins.empty())
1328     return Chain;
1329
1330   // Now handle the return value(s)
1331   SmallVector<CCValAssign, 16> RVLocs;
1332   CCState CCRetInfo(CallConv, isVarArg, getTargetMachine(),
1333                     RVLocs, *DAG.getContext());
1334   CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
1335
1336
1337   // If the call has results, copy the values out of the ret val registers.
1338   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1339     CCValAssign VA = RVLocs[i];
1340
1341     SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1342                                      InFlag);
1343     Chain = Val.getValue(1);
1344     InFlag = Val.getValue(2);
1345     InVals.push_back(Val);
1346    }
1347
1348   return Chain;
1349 }
1350
1351 SDValue
1352 SPUTargetLowering::LowerReturn(SDValue Chain,
1353                                CallingConv::ID CallConv, bool isVarArg,
1354                                const SmallVectorImpl<ISD::OutputArg> &Outs,
1355                                const SmallVectorImpl<SDValue> &OutVals,
1356                                DebugLoc dl, SelectionDAG &DAG) const {
1357
1358   SmallVector<CCValAssign, 16> RVLocs;
1359   CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1360                  RVLocs, *DAG.getContext());
1361   CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1362
1363   // If this is the first return lowered for this function, add the regs to the
1364   // liveout set for the function.
1365   if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1366     for (unsigned i = 0; i != RVLocs.size(); ++i)
1367       DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1368   }
1369
1370   SDValue Flag;
1371
1372   // Copy the result values into the output registers.
1373   for (unsigned i = 0; i != RVLocs.size(); ++i) {
1374     CCValAssign &VA = RVLocs[i];
1375     assert(VA.isRegLoc() && "Can only return in registers!");
1376     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1377                              OutVals[i], Flag);
1378     Flag = Chain.getValue(1);
1379   }
1380
1381   if (Flag.getNode())
1382     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1383   else
1384     return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1385 }
1386
1387
1388 //===----------------------------------------------------------------------===//
1389 // Vector related lowering:
1390 //===----------------------------------------------------------------------===//
1391
1392 static ConstantSDNode *
1393 getVecImm(SDNode *N) {
1394   SDValue OpVal(0, 0);
1395
1396   // Check to see if this buildvec has a single non-undef value in its elements.
1397   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1398     if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1399     if (OpVal.getNode() == 0)
1400       OpVal = N->getOperand(i);
1401     else if (OpVal != N->getOperand(i))
1402       return 0;
1403   }
1404
1405   if (OpVal.getNode() != 0) {
1406     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1407       return CN;
1408     }
1409   }
1410
1411   return 0;
1412 }
1413
1414 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1415 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1416 /// constant
1417 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1418                               EVT ValueType) {
1419   if (ConstantSDNode *CN = getVecImm(N)) {
1420     uint64_t Value = CN->getZExtValue();
1421     if (ValueType == MVT::i64) {
1422       uint64_t UValue = CN->getZExtValue();
1423       uint32_t upper = uint32_t(UValue >> 32);
1424       uint32_t lower = uint32_t(UValue);
1425       if (upper != lower)
1426         return SDValue();
1427       Value = Value >> 32;
1428     }
1429     if (Value <= 0x3ffff)
1430       return DAG.getTargetConstant(Value, ValueType);
1431   }
1432
1433   return SDValue();
1434 }
1435
1436 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1437 /// and the value fits into a signed 16-bit constant, and if so, return the
1438 /// constant
1439 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1440                               EVT ValueType) {
1441   if (ConstantSDNode *CN = getVecImm(N)) {
1442     int64_t Value = CN->getSExtValue();
1443     if (ValueType == MVT::i64) {
1444       uint64_t UValue = CN->getZExtValue();
1445       uint32_t upper = uint32_t(UValue >> 32);
1446       uint32_t lower = uint32_t(UValue);
1447       if (upper != lower)
1448         return SDValue();
1449       Value = Value >> 32;
1450     }
1451     if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1452       return DAG.getTargetConstant(Value, ValueType);
1453     }
1454   }
1455
1456   return SDValue();
1457 }
1458
1459 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1460 /// and the value fits into a signed 10-bit constant, and if so, return the
1461 /// constant
1462 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1463                               EVT ValueType) {
1464   if (ConstantSDNode *CN = getVecImm(N)) {
1465     int64_t Value = CN->getSExtValue();
1466     if (ValueType == MVT::i64) {
1467       uint64_t UValue = CN->getZExtValue();
1468       uint32_t upper = uint32_t(UValue >> 32);
1469       uint32_t lower = uint32_t(UValue);
1470       if (upper != lower)
1471         return SDValue();
1472       Value = Value >> 32;
1473     }
1474     if (isInt<10>(Value))
1475       return DAG.getTargetConstant(Value, ValueType);
1476   }
1477
1478   return SDValue();
1479 }
1480
1481 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1482 /// and the value fits into a signed 8-bit constant, and if so, return the
1483 /// constant.
1484 ///
1485 /// @note: The incoming vector is v16i8 because that's the only way we can load
1486 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1487 /// same value.
1488 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1489                              EVT ValueType) {
1490   if (ConstantSDNode *CN = getVecImm(N)) {
1491     int Value = (int) CN->getZExtValue();
1492     if (ValueType == MVT::i16
1493         && Value <= 0xffff                 /* truncated from uint64_t */
1494         && ((short) Value >> 8) == ((short) Value & 0xff))
1495       return DAG.getTargetConstant(Value & 0xff, ValueType);
1496     else if (ValueType == MVT::i8
1497              && (Value & 0xff) == Value)
1498       return DAG.getTargetConstant(Value, ValueType);
1499   }
1500
1501   return SDValue();
1502 }
1503
1504 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1505 /// and the value fits into a signed 16-bit constant, and if so, return the
1506 /// constant
1507 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1508                                EVT ValueType) {
1509   if (ConstantSDNode *CN = getVecImm(N)) {
1510     uint64_t Value = CN->getZExtValue();
1511     if ((ValueType == MVT::i32
1512           && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1513         || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1514       return DAG.getTargetConstant(Value >> 16, ValueType);
1515   }
1516
1517   return SDValue();
1518 }
1519
1520 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1521 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1522   if (ConstantSDNode *CN = getVecImm(N)) {
1523     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1524   }
1525
1526   return SDValue();
1527 }
1528
1529 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1530 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1531   if (ConstantSDNode *CN = getVecImm(N)) {
1532     return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1533   }
1534
1535   return SDValue();
1536 }
1537
1538 //! Lower a BUILD_VECTOR instruction creatively:
1539 static SDValue
1540 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1541   EVT VT = Op.getValueType();
1542   EVT EltVT = VT.getVectorElementType();
1543   DebugLoc dl = Op.getDebugLoc();
1544   BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1545   assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1546   unsigned minSplatBits = EltVT.getSizeInBits();
1547
1548   if (minSplatBits < 16)
1549     minSplatBits = 16;
1550
1551   APInt APSplatBits, APSplatUndef;
1552   unsigned SplatBitSize;
1553   bool HasAnyUndefs;
1554
1555   if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1556                             HasAnyUndefs, minSplatBits)
1557       || minSplatBits < SplatBitSize)
1558     return SDValue();   // Wasn't a constant vector or splat exceeded min
1559
1560   uint64_t SplatBits = APSplatBits.getZExtValue();
1561
1562   switch (VT.getSimpleVT().SimpleTy) {
1563   default:
1564     report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1565                        Twine(VT.getEVTString()));
1566     /*NOTREACHED*/
1567   case MVT::v4f32: {
1568     uint32_t Value32 = uint32_t(SplatBits);
1569     assert(SplatBitSize == 32
1570            && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1571     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1572     SDValue T = DAG.getConstant(Value32, MVT::i32);
1573     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1574                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1575     break;
1576   }
1577   case MVT::v2f64: {
1578     uint64_t f64val = uint64_t(SplatBits);
1579     assert(SplatBitSize == 64
1580            && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1581     // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1582     SDValue T = DAG.getConstant(f64val, MVT::i64);
1583     return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1584                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1585     break;
1586   }
1587   case MVT::v16i8: {
1588    // 8-bit constants have to be expanded to 16-bits
1589    unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1590    SmallVector<SDValue, 8> Ops;
1591
1592    Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1593    return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1594                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1595   }
1596   case MVT::v8i16: {
1597     unsigned short Value16 = SplatBits;
1598     SDValue T = DAG.getConstant(Value16, EltVT);
1599     SmallVector<SDValue, 8> Ops;
1600
1601     Ops.assign(8, T);
1602     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1603   }
1604   case MVT::v4i32: {
1605     SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1606     return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1607   }
1608   case MVT::v2i64: {
1609     return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1610   }
1611   }
1612
1613   return SDValue();
1614 }
1615
1616 /*!
1617  */
1618 SDValue
1619 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1620                      DebugLoc dl) {
1621   uint32_t upper = uint32_t(SplatVal >> 32);
1622   uint32_t lower = uint32_t(SplatVal);
1623
1624   if (upper == lower) {
1625     // Magic constant that can be matched by IL, ILA, et. al.
1626     SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1627     return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1628                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1629                                    Val, Val, Val, Val));
1630   } else {
1631     bool upper_special, lower_special;
1632
1633     // NOTE: This code creates common-case shuffle masks that can be easily
1634     // detected as common expressions. It is not attempting to create highly
1635     // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1636
1637     // Detect if the upper or lower half is a special shuffle mask pattern:
1638     upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1639     lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1640
1641     // Both upper and lower are special, lower to a constant pool load:
1642     if (lower_special && upper_special) {
1643       SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1644       return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1645                          SplatValCN, SplatValCN);
1646     }
1647
1648     SDValue LO32;
1649     SDValue HI32;
1650     SmallVector<SDValue, 16> ShufBytes;
1651     SDValue Result;
1652
1653     // Create lower vector if not a special pattern
1654     if (!lower_special) {
1655       SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1656       LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1657                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1658                                      LO32C, LO32C, LO32C, LO32C));
1659     }
1660
1661     // Create upper vector if not a special pattern
1662     if (!upper_special) {
1663       SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1664       HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1665                          DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1666                                      HI32C, HI32C, HI32C, HI32C));
1667     }
1668
1669     // If either upper or lower are special, then the two input operands are
1670     // the same (basically, one of them is a "don't care")
1671     if (lower_special)
1672       LO32 = HI32;
1673     if (upper_special)
1674       HI32 = LO32;
1675
1676     for (int i = 0; i < 4; ++i) {
1677       uint64_t val = 0;
1678       for (int j = 0; j < 4; ++j) {
1679         SDValue V;
1680         bool process_upper, process_lower;
1681         val <<= 8;
1682         process_upper = (upper_special && (i & 1) == 0);
1683         process_lower = (lower_special && (i & 1) == 1);
1684
1685         if (process_upper || process_lower) {
1686           if ((process_upper && upper == 0)
1687                   || (process_lower && lower == 0))
1688             val |= 0x80;
1689           else if ((process_upper && upper == 0xffffffff)
1690                   || (process_lower && lower == 0xffffffff))
1691             val |= 0xc0;
1692           else if ((process_upper && upper == 0x80000000)
1693                   || (process_lower && lower == 0x80000000))
1694             val |= (j == 0 ? 0xe0 : 0x80);
1695         } else
1696           val |= i * 4 + j + ((i & 1) * 16);
1697       }
1698
1699       ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1700     }
1701
1702     return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1703                        DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1704                                    &ShufBytes[0], ShufBytes.size()));
1705   }
1706 }
1707
1708 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1709 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1710 /// permutation vector, V3, is monotonically increasing with one "exception"
1711 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1712 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1713 /// In either case, the net result is going to eventually invoke SHUFB to
1714 /// permute/shuffle the bytes from V1 and V2.
1715 /// \note
1716 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1717 /// control word for byte/halfword/word insertion. This takes care of a single
1718 /// element move from V2 into V1.
1719 /// \note
1720 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1721 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1722   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1723   SDValue V1 = Op.getOperand(0);
1724   SDValue V2 = Op.getOperand(1);
1725   DebugLoc dl = Op.getDebugLoc();
1726
1727   if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1728
1729   // If we have a single element being moved from V1 to V2, this can be handled
1730   // using the C*[DX] compute mask instructions, but the vector elements have
1731   // to be monotonically increasing with one exception element, and the source
1732   // slot of the element to move must be the same as the destination.
1733   EVT VecVT = V1.getValueType();
1734   EVT EltVT = VecVT.getVectorElementType();
1735   unsigned EltsFromV2 = 0;
1736   unsigned V2EltOffset = 0;
1737   unsigned V2EltIdx0 = 0;
1738   unsigned CurrElt = 0;
1739   unsigned MaxElts = VecVT.getVectorNumElements();
1740   unsigned PrevElt = 0;
1741   bool monotonic = true;
1742   bool rotate = true;
1743   int rotamt=0;
1744   EVT maskVT;             // which of the c?d instructions to use
1745
1746   if (EltVT == MVT::i8) {
1747     V2EltIdx0 = 16;
1748     maskVT = MVT::v16i8;
1749   } else if (EltVT == MVT::i16) {
1750     V2EltIdx0 = 8;
1751     maskVT = MVT::v8i16;
1752   } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1753     V2EltIdx0 = 4;
1754     maskVT = MVT::v4i32;
1755   } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1756     V2EltIdx0 = 2;
1757     maskVT = MVT::v2i64;
1758   } else
1759     llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1760
1761   for (unsigned i = 0; i != MaxElts; ++i) {
1762     if (SVN->getMaskElt(i) < 0)
1763       continue;
1764
1765     unsigned SrcElt = SVN->getMaskElt(i);
1766
1767     if (monotonic) {
1768       if (SrcElt >= V2EltIdx0) {
1769         // TODO: optimize for the monotonic case when several consecutive
1770         // elements are taken form V2. Do we ever get such a case?
1771         if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
1772           V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
1773         else
1774           monotonic = false;
1775         ++EltsFromV2;
1776       } else if (CurrElt != SrcElt) {
1777         monotonic = false;
1778       }
1779
1780       ++CurrElt;
1781     }
1782
1783     if (rotate) {
1784       if (PrevElt > 0 && SrcElt < MaxElts) {
1785         if ((PrevElt == SrcElt - 1)
1786             || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1787           rotamt = SrcElt-i;
1788           PrevElt = SrcElt;
1789         } else {
1790           rotate = false;
1791         }
1792       } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
1793         // First time or after a "wrap around"
1794         PrevElt = SrcElt;
1795       } else {
1796         // This isn't a rotation, takes elements from vector 2
1797         rotate = false;
1798       }
1799     }
1800   }
1801
1802   if (EltsFromV2 == 1 && monotonic) {
1803     // Compute mask and shuffle
1804     EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1805
1806     // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1807     // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1808     SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1809                                 DAG.getRegister(SPU::R1, PtrVT),
1810                                 DAG.getConstant(V2EltOffset, MVT::i32));
1811     SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1812                                      maskVT, Pointer);
1813
1814     // Use shuffle mask in SHUFB synthetic instruction:
1815     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1816                        ShufMaskOp);
1817   } else if (rotate) {
1818     if (rotamt < 0)
1819       rotamt +=MaxElts;
1820     rotamt *= EltVT.getSizeInBits()/8;
1821     return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1822                        V1, DAG.getConstant(rotamt, MVT::i16));
1823   } else {
1824    // Convert the SHUFFLE_VECTOR mask's input element units to the
1825    // actual bytes.
1826     unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1827
1828     SmallVector<SDValue, 16> ResultMask;
1829     for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1830       unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1831
1832       for (unsigned j = 0; j < BytesPerElement; ++j)
1833         ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1834     }
1835     SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1836                                     &ResultMask[0], ResultMask.size());
1837     return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1838   }
1839 }
1840
1841 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1842   SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1843   DebugLoc dl = Op.getDebugLoc();
1844
1845   if (Op0.getNode()->getOpcode() == ISD::Constant) {
1846     // For a constant, build the appropriate constant vector, which will
1847     // eventually simplify to a vector register load.
1848
1849     ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1850     SmallVector<SDValue, 16> ConstVecValues;
1851     EVT VT;
1852     size_t n_copies;
1853
1854     // Create a constant vector:
1855     switch (Op.getValueType().getSimpleVT().SimpleTy) {
1856     default: llvm_unreachable("Unexpected constant value type in "
1857                               "LowerSCALAR_TO_VECTOR");
1858     case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1859     case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1860     case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1861     case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1862     case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1863     case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1864     }
1865
1866     SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1867     for (size_t j = 0; j < n_copies; ++j)
1868       ConstVecValues.push_back(CValue);
1869
1870     return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1871                        &ConstVecValues[0], ConstVecValues.size());
1872   } else {
1873     // Otherwise, copy the value from one register to another:
1874     switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1875     default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1876     case MVT::i8:
1877     case MVT::i16:
1878     case MVT::i32:
1879     case MVT::i64:
1880     case MVT::f32:
1881     case MVT::f64:
1882       return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1883     }
1884   }
1885
1886   return SDValue();
1887 }
1888
1889 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1890   EVT VT = Op.getValueType();
1891   SDValue N = Op.getOperand(0);
1892   SDValue Elt = Op.getOperand(1);
1893   DebugLoc dl = Op.getDebugLoc();
1894   SDValue retval;
1895
1896   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1897     // Constant argument:
1898     int EltNo = (int) C->getZExtValue();
1899
1900     // sanity checks:
1901     if (VT == MVT::i8 && EltNo >= 16)
1902       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1903     else if (VT == MVT::i16 && EltNo >= 8)
1904       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1905     else if (VT == MVT::i32 && EltNo >= 4)
1906       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1907     else if (VT == MVT::i64 && EltNo >= 2)
1908       llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1909
1910     if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1911       // i32 and i64: Element 0 is the preferred slot
1912       return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1913     }
1914
1915     // Need to generate shuffle mask and extract:
1916     int prefslot_begin = -1, prefslot_end = -1;
1917     int elt_byte = EltNo * VT.getSizeInBits() / 8;
1918
1919     switch (VT.getSimpleVT().SimpleTy) {
1920     default:
1921       assert(false && "Invalid value type!");
1922     case MVT::i8: {
1923       prefslot_begin = prefslot_end = 3;
1924       break;
1925     }
1926     case MVT::i16: {
1927       prefslot_begin = 2; prefslot_end = 3;
1928       break;
1929     }
1930     case MVT::i32:
1931     case MVT::f32: {
1932       prefslot_begin = 0; prefslot_end = 3;
1933       break;
1934     }
1935     case MVT::i64:
1936     case MVT::f64: {
1937       prefslot_begin = 0; prefslot_end = 7;
1938       break;
1939     }
1940     }
1941
1942     assert(prefslot_begin != -1 && prefslot_end != -1 &&
1943            "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1944
1945     unsigned int ShufBytes[16] = {
1946       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1947     };
1948     for (int i = 0; i < 16; ++i) {
1949       // zero fill uppper part of preferred slot, don't care about the
1950       // other slots:
1951       unsigned int mask_val;
1952       if (i <= prefslot_end) {
1953         mask_val =
1954           ((i < prefslot_begin)
1955            ? 0x80
1956            : elt_byte + (i - prefslot_begin));
1957
1958         ShufBytes[i] = mask_val;
1959       } else
1960         ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1961     }
1962
1963     SDValue ShufMask[4];
1964     for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1965       unsigned bidx = i * 4;
1966       unsigned int bits = ((ShufBytes[bidx] << 24) |
1967                            (ShufBytes[bidx+1] << 16) |
1968                            (ShufBytes[bidx+2] << 8) |
1969                            ShufBytes[bidx+3]);
1970       ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1971     }
1972
1973     SDValue ShufMaskVec =
1974       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1975                   &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1976
1977     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1978                          DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1979                                      N, N, ShufMaskVec));
1980   } else {
1981     // Variable index: Rotate the requested element into slot 0, then replicate
1982     // slot 0 across the vector
1983     EVT VecVT = N.getValueType();
1984     if (!VecVT.isSimple() || !VecVT.isVector()) {
1985       report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
1986                         "vector type!");
1987     }
1988
1989     // Make life easier by making sure the index is zero-extended to i32
1990     if (Elt.getValueType() != MVT::i32)
1991       Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
1992
1993     // Scale the index to a bit/byte shift quantity
1994     APInt scaleFactor =
1995             APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1996     unsigned scaleShift = scaleFactor.logBase2();
1997     SDValue vecShift;
1998
1999     if (scaleShift > 0) {
2000       // Scale the shift factor:
2001       Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2002                         DAG.getConstant(scaleShift, MVT::i32));
2003     }
2004
2005     vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2006
2007     // Replicate the bytes starting at byte 0 across the entire vector (for
2008     // consistency with the notion of a unified register set)
2009     SDValue replicate;
2010
2011     switch (VT.getSimpleVT().SimpleTy) {
2012     default:
2013       report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2014                         "type");
2015       /*NOTREACHED*/
2016     case MVT::i8: {
2017       SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2018       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2019                               factor, factor, factor, factor);
2020       break;
2021     }
2022     case MVT::i16: {
2023       SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2024       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2025                               factor, factor, factor, factor);
2026       break;
2027     }
2028     case MVT::i32:
2029     case MVT::f32: {
2030       SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2031       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2032                               factor, factor, factor, factor);
2033       break;
2034     }
2035     case MVT::i64:
2036     case MVT::f64: {
2037       SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2038       SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2039       replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2040                               loFactor, hiFactor, loFactor, hiFactor);
2041       break;
2042     }
2043     }
2044
2045     retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2046                          DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2047                                      vecShift, vecShift, replicate));
2048   }
2049
2050   return retval;
2051 }
2052
2053 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2054   SDValue VecOp = Op.getOperand(0);
2055   SDValue ValOp = Op.getOperand(1);
2056   SDValue IdxOp = Op.getOperand(2);
2057   DebugLoc dl = Op.getDebugLoc();
2058   EVT VT = Op.getValueType();
2059   EVT eltVT = ValOp.getValueType();
2060
2061   // use 0 when the lane to insert to is 'undef'
2062   int64_t Offset=0;
2063   if (IdxOp.getOpcode() != ISD::UNDEF) {
2064     ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2065     assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2066     Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
2067   }
2068
2069   EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2070   // Use $sp ($1) because it's always 16-byte aligned and it's available:
2071   SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2072                                 DAG.getRegister(SPU::R1, PtrVT),
2073                                 DAG.getConstant(Offset, PtrVT));
2074   // widen the mask when dealing with half vectors
2075   EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2076                                 128/ VT.getVectorElementType().getSizeInBits());
2077   SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2078
2079   SDValue result =
2080     DAG.getNode(SPUISD::SHUFB, dl, VT,
2081                 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2082                 VecOp,
2083                 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2084
2085   return result;
2086 }
2087
2088 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2089                            const TargetLowering &TLI)
2090 {
2091   SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2092   DebugLoc dl = Op.getDebugLoc();
2093   EVT ShiftVT = TLI.getShiftAmountTy();
2094
2095   assert(Op.getValueType() == MVT::i8);
2096   switch (Opc) {
2097   default:
2098     llvm_unreachable("Unhandled i8 math operator");
2099     /*NOTREACHED*/
2100     break;
2101   case ISD::ADD: {
2102     // 8-bit addition: Promote the arguments up to 16-bits and truncate
2103     // the result:
2104     SDValue N1 = Op.getOperand(1);
2105     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2106     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2107     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2108                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2109
2110   }
2111
2112   case ISD::SUB: {
2113     // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2114     // the result:
2115     SDValue N1 = Op.getOperand(1);
2116     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2117     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2118     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2119                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2120   }
2121   case ISD::ROTR:
2122   case ISD::ROTL: {
2123     SDValue N1 = Op.getOperand(1);
2124     EVT N1VT = N1.getValueType();
2125
2126     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2127     if (!N1VT.bitsEq(ShiftVT)) {
2128       unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2129                        ? ISD::ZERO_EXTEND
2130                        : ISD::TRUNCATE;
2131       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2132     }
2133
2134     // Replicate lower 8-bits into upper 8:
2135     SDValue ExpandArg =
2136       DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2137                   DAG.getNode(ISD::SHL, dl, MVT::i16,
2138                               N0, DAG.getConstant(8, MVT::i32)));
2139
2140     // Truncate back down to i8
2141     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2142                        DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2143   }
2144   case ISD::SRL:
2145   case ISD::SHL: {
2146     SDValue N1 = Op.getOperand(1);
2147     EVT N1VT = N1.getValueType();
2148
2149     N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2150     if (!N1VT.bitsEq(ShiftVT)) {
2151       unsigned N1Opc = ISD::ZERO_EXTEND;
2152
2153       if (N1.getValueType().bitsGT(ShiftVT))
2154         N1Opc = ISD::TRUNCATE;
2155
2156       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2157     }
2158
2159     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2160                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2161   }
2162   case ISD::SRA: {
2163     SDValue N1 = Op.getOperand(1);
2164     EVT N1VT = N1.getValueType();
2165
2166     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2167     if (!N1VT.bitsEq(ShiftVT)) {
2168       unsigned N1Opc = ISD::SIGN_EXTEND;
2169
2170       if (N1VT.bitsGT(ShiftVT))
2171         N1Opc = ISD::TRUNCATE;
2172       N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2173     }
2174
2175     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2176                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2177   }
2178   case ISD::MUL: {
2179     SDValue N1 = Op.getOperand(1);
2180
2181     N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2182     N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2183     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2184                        DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2185     break;
2186   }
2187   }
2188
2189   return SDValue();
2190 }
2191
2192 //! Lower byte immediate operations for v16i8 vectors:
2193 static SDValue
2194 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2195   SDValue ConstVec;
2196   SDValue Arg;
2197   EVT VT = Op.getValueType();
2198   DebugLoc dl = Op.getDebugLoc();
2199
2200   ConstVec = Op.getOperand(0);
2201   Arg = Op.getOperand(1);
2202   if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2203     if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2204       ConstVec = ConstVec.getOperand(0);
2205     } else {
2206       ConstVec = Op.getOperand(1);
2207       Arg = Op.getOperand(0);
2208       if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2209         ConstVec = ConstVec.getOperand(0);
2210       }
2211     }
2212   }
2213
2214   if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2215     BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2216     assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2217
2218     APInt APSplatBits, APSplatUndef;
2219     unsigned SplatBitSize;
2220     bool HasAnyUndefs;
2221     unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2222
2223     if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2224                               HasAnyUndefs, minSplatBits)
2225         && minSplatBits <= SplatBitSize) {
2226       uint64_t SplatBits = APSplatBits.getZExtValue();
2227       SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2228
2229       SmallVector<SDValue, 16> tcVec;
2230       tcVec.assign(16, tc);
2231       return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2232                          DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2233     }
2234   }
2235
2236   // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2237   // lowered.  Return the operation, rather than a null SDValue.
2238   return Op;
2239 }
2240
2241 //! Custom lowering for CTPOP (count population)
2242 /*!
2243   Custom lowering code that counts the number ones in the input
2244   operand. SPU has such an instruction, but it counts the number of
2245   ones per byte, which then have to be accumulated.
2246 */
2247 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2248   EVT VT = Op.getValueType();
2249   EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2250                                VT, (128 / VT.getSizeInBits()));
2251   DebugLoc dl = Op.getDebugLoc();
2252
2253   switch (VT.getSimpleVT().SimpleTy) {
2254   default:
2255     assert(false && "Invalid value type!");
2256   case MVT::i8: {
2257     SDValue N = Op.getOperand(0);
2258     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2259
2260     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2261     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2262
2263     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2264   }
2265
2266   case MVT::i16: {
2267     MachineFunction &MF = DAG.getMachineFunction();
2268     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2269
2270     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2271
2272     SDValue N = Op.getOperand(0);
2273     SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2274     SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2275     SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2276
2277     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2278     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2279
2280     // CNTB_result becomes the chain to which all of the virtual registers
2281     // CNTB_reg, SUM1_reg become associated:
2282     SDValue CNTB_result =
2283       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2284
2285     SDValue CNTB_rescopy =
2286       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2287
2288     SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2289
2290     return DAG.getNode(ISD::AND, dl, MVT::i16,
2291                        DAG.getNode(ISD::ADD, dl, MVT::i16,
2292                                    DAG.getNode(ISD::SRL, dl, MVT::i16,
2293                                                Tmp1, Shift1),
2294                                    Tmp1),
2295                        Mask0);
2296   }
2297
2298   case MVT::i32: {
2299     MachineFunction &MF = DAG.getMachineFunction();
2300     MachineRegisterInfo &RegInfo = MF.getRegInfo();
2301
2302     unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2303     unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2304
2305     SDValue N = Op.getOperand(0);
2306     SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2307     SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2308     SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2309     SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2310
2311     SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2312     SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2313
2314     // CNTB_result becomes the chain to which all of the virtual registers
2315     // CNTB_reg, SUM1_reg become associated:
2316     SDValue CNTB_result =
2317       DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2318
2319     SDValue CNTB_rescopy =
2320       DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2321
2322     SDValue Comp1 =
2323       DAG.getNode(ISD::SRL, dl, MVT::i32,
2324                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2325                   Shift1);
2326
2327     SDValue Sum1 =
2328       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2329                   DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2330
2331     SDValue Sum1_rescopy =
2332       DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2333
2334     SDValue Comp2 =
2335       DAG.getNode(ISD::SRL, dl, MVT::i32,
2336                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2337                   Shift2);
2338     SDValue Sum2 =
2339       DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2340                   DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2341
2342     return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2343   }
2344
2345   case MVT::i64:
2346     break;
2347   }
2348
2349   return SDValue();
2350 }
2351
2352 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2353 /*!
2354  f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2355  All conversions to i64 are expanded to a libcall.
2356  */
2357 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2358                               const SPUTargetLowering &TLI) {
2359   EVT OpVT = Op.getValueType();
2360   SDValue Op0 = Op.getOperand(0);
2361   EVT Op0VT = Op0.getValueType();
2362
2363   if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2364       || OpVT == MVT::i64) {
2365     // Convert f32 / f64 to i32 / i64 via libcall.
2366     RTLIB::Libcall LC =
2367             (Op.getOpcode() == ISD::FP_TO_SINT)
2368              ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2369              : RTLIB::getFPTOUINT(Op0VT, OpVT);
2370     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2371     SDValue Dummy;
2372     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2373   }
2374
2375   return Op;
2376 }
2377
2378 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2379 /*!
2380  i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2381  All conversions from i64 are expanded to a libcall.
2382  */
2383 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2384                               const SPUTargetLowering &TLI) {
2385   EVT OpVT = Op.getValueType();
2386   SDValue Op0 = Op.getOperand(0);
2387   EVT Op0VT = Op0.getValueType();
2388
2389   if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2390       || Op0VT == MVT::i64) {
2391     // Convert i32, i64 to f64 via libcall:
2392     RTLIB::Libcall LC =
2393             (Op.getOpcode() == ISD::SINT_TO_FP)
2394              ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2395              : RTLIB::getUINTTOFP(Op0VT, OpVT);
2396     assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2397     SDValue Dummy;
2398     return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2399   }
2400
2401   return Op;
2402 }
2403
2404 //! Lower ISD::SETCC
2405 /*!
2406  This handles MVT::f64 (double floating point) condition lowering
2407  */
2408 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2409                           const TargetLowering &TLI) {
2410   CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2411   DebugLoc dl = Op.getDebugLoc();
2412   assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2413
2414   SDValue lhs = Op.getOperand(0);
2415   SDValue rhs = Op.getOperand(1);
2416   EVT lhsVT = lhs.getValueType();
2417   assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2418
2419   EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2420   APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2421   EVT IntVT(MVT::i64);
2422
2423   // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2424   // selected to a NOP:
2425   SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2426   SDValue lhsHi32 =
2427           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2428                       DAG.getNode(ISD::SRL, dl, IntVT,
2429                                   i64lhs, DAG.getConstant(32, MVT::i32)));
2430   SDValue lhsHi32abs =
2431           DAG.getNode(ISD::AND, dl, MVT::i32,
2432                       lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2433   SDValue lhsLo32 =
2434           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2435
2436   // SETO and SETUO only use the lhs operand:
2437   if (CC->get() == ISD::SETO) {
2438     // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2439     // SETUO
2440     APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2441     return DAG.getNode(ISD::XOR, dl, ccResultVT,
2442                        DAG.getSetCC(dl, ccResultVT,
2443                                     lhs, DAG.getConstantFP(0.0, lhsVT),
2444                                     ISD::SETUO),
2445                        DAG.getConstant(ccResultAllOnes, ccResultVT));
2446   } else if (CC->get() == ISD::SETUO) {
2447     // Evaluates to true if Op0 is [SQ]NaN
2448     return DAG.getNode(ISD::AND, dl, ccResultVT,
2449                        DAG.getSetCC(dl, ccResultVT,
2450                                     lhsHi32abs,
2451                                     DAG.getConstant(0x7ff00000, MVT::i32),
2452                                     ISD::SETGE),
2453                        DAG.getSetCC(dl, ccResultVT,
2454                                     lhsLo32,
2455                                     DAG.getConstant(0, MVT::i32),
2456                                     ISD::SETGT));
2457   }
2458
2459   SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2460   SDValue rhsHi32 =
2461           DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2462                       DAG.getNode(ISD::SRL, dl, IntVT,
2463                                   i64rhs, DAG.getConstant(32, MVT::i32)));
2464
2465   // If a value is negative, subtract from the sign magnitude constant:
2466   SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2467
2468   // Convert the sign-magnitude representation into 2's complement:
2469   SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2470                                       lhsHi32, DAG.getConstant(31, MVT::i32));
2471   SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2472   SDValue lhsSelect =
2473           DAG.getNode(ISD::SELECT, dl, IntVT,
2474                       lhsSelectMask, lhsSignMag2TC, i64lhs);
2475
2476   SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2477                                       rhsHi32, DAG.getConstant(31, MVT::i32));
2478   SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2479   SDValue rhsSelect =
2480           DAG.getNode(ISD::SELECT, dl, IntVT,
2481                       rhsSelectMask, rhsSignMag2TC, i64rhs);
2482
2483   unsigned compareOp;
2484
2485   switch (CC->get()) {
2486   case ISD::SETOEQ:
2487   case ISD::SETUEQ:
2488     compareOp = ISD::SETEQ; break;
2489   case ISD::SETOGT:
2490   case ISD::SETUGT:
2491     compareOp = ISD::SETGT; break;
2492   case ISD::SETOGE:
2493   case ISD::SETUGE:
2494     compareOp = ISD::SETGE; break;
2495   case ISD::SETOLT:
2496   case ISD::SETULT:
2497     compareOp = ISD::SETLT; break;
2498   case ISD::SETOLE:
2499   case ISD::SETULE:
2500     compareOp = ISD::SETLE; break;
2501   case ISD::SETUNE:
2502   case ISD::SETONE:
2503     compareOp = ISD::SETNE; break;
2504   default:
2505     report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2506   }
2507
2508   SDValue result =
2509           DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2510                        (ISD::CondCode) compareOp);
2511
2512   if ((CC->get() & 0x8) == 0) {
2513     // Ordered comparison:
2514     SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2515                                   lhs, DAG.getConstantFP(0.0, MVT::f64),
2516                                   ISD::SETO);
2517     SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2518                                   rhs, DAG.getConstantFP(0.0, MVT::f64),
2519                                   ISD::SETO);
2520     SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2521
2522     result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2523   }
2524
2525   return result;
2526 }
2527
2528 //! Lower ISD::SELECT_CC
2529 /*!
2530   ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2531   SELB instruction.
2532
2533   \note Need to revisit this in the future: if the code path through the true
2534   and false value computations is longer than the latency of a branch (6
2535   cycles), then it would be more advantageous to branch and insert a new basic
2536   block and branch on the condition. However, this code does not make that
2537   assumption, given the simplisitc uses so far.
2538  */
2539
2540 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2541                               const TargetLowering &TLI) {
2542   EVT VT = Op.getValueType();
2543   SDValue lhs = Op.getOperand(0);
2544   SDValue rhs = Op.getOperand(1);
2545   SDValue trueval = Op.getOperand(2);
2546   SDValue falseval = Op.getOperand(3);
2547   SDValue condition = Op.getOperand(4);
2548   DebugLoc dl = Op.getDebugLoc();
2549
2550   // NOTE: SELB's arguments: $rA, $rB, $mask
2551   //
2552   // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2553   // where bits in $mask are 1. CCond will be inverted, having 1s where the
2554   // condition was true and 0s where the condition was false. Hence, the
2555   // arguments to SELB get reversed.
2556
2557   // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2558   // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2559   // with another "cannot select select_cc" assert:
2560
2561   SDValue compare = DAG.getNode(ISD::SETCC, dl,
2562                                 TLI.getSetCCResultType(Op.getValueType()),
2563                                 lhs, rhs, condition);
2564   return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2565 }
2566
2567 //! Custom lower ISD::TRUNCATE
2568 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2569 {
2570   // Type to truncate to
2571   EVT VT = Op.getValueType();
2572   MVT simpleVT = VT.getSimpleVT();
2573   EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2574                                VT, (128 / VT.getSizeInBits()));
2575   DebugLoc dl = Op.getDebugLoc();
2576
2577   // Type to truncate from
2578   SDValue Op0 = Op.getOperand(0);
2579   EVT Op0VT = Op0.getValueType();
2580
2581   if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
2582     // Create shuffle mask, least significant doubleword of quadword
2583     unsigned maskHigh = 0x08090a0b;
2584     unsigned maskLow = 0x0c0d0e0f;
2585     // Use a shuffle to perform the truncation
2586     SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2587                                    DAG.getConstant(maskHigh, MVT::i32),
2588                                    DAG.getConstant(maskLow, MVT::i32),
2589                                    DAG.getConstant(maskHigh, MVT::i32),
2590                                    DAG.getConstant(maskLow, MVT::i32));
2591
2592     SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2593                                        Op0, Op0, shufMask);
2594
2595     return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2596   }
2597
2598   return SDValue();             // Leave the truncate unmolested
2599 }
2600
2601 /*!
2602  * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2603  * algorithm is to duplicate the sign bit using rotmai to generate at
2604  * least one byte full of sign bits. Then propagate the "sign-byte" into
2605  * the leftmost words and the i64/i32 into the rightmost words using shufb.
2606  *
2607  * @param Op The sext operand
2608  * @param DAG The current DAG
2609  * @return The SDValue with the entire instruction sequence
2610  */
2611 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2612 {
2613   DebugLoc dl = Op.getDebugLoc();
2614
2615   // Type to extend to
2616   MVT OpVT = Op.getValueType().getSimpleVT();
2617
2618   // Type to extend from
2619   SDValue Op0 = Op.getOperand(0);
2620   MVT Op0VT = Op0.getValueType().getSimpleVT();
2621
2622   // The type to extend to needs to be a i128 and
2623   // the type to extend from needs to be i64 or i32.
2624   assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2625           "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2626
2627   // Create shuffle mask
2628   unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2629   unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2630   unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2631   SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2632                                  DAG.getConstant(mask1, MVT::i32),
2633                                  DAG.getConstant(mask1, MVT::i32),
2634                                  DAG.getConstant(mask2, MVT::i32),
2635                                  DAG.getConstant(mask3, MVT::i32));
2636
2637   // Word wise arithmetic right shift to generate at least one byte
2638   // that contains sign bits.
2639   MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2640   SDValue sraVal = DAG.getNode(ISD::SRA,
2641                  dl,
2642                  mvt,
2643                  DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2644                  DAG.getConstant(31, MVT::i32));
2645
2646   // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
2647   SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2648                                         dl, Op0VT, Op0,
2649                                         DAG.getTargetConstant(
2650                                                   SPU::GPRCRegClass.getID(),
2651                                                   MVT::i32)), 0);
2652   // Shuffle bytes - Copy the sign bits into the upper 64 bits
2653   // and the input value into the lower 64 bits.
2654   SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2655         extended, sraVal, shufMask);
2656   return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2657 }
2658
2659 //! Custom (target-specific) lowering entry point
2660 /*!
2661   This is where LLVM's DAG selection process calls to do target-specific
2662   lowering of nodes.
2663  */
2664 SDValue
2665 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2666 {
2667   unsigned Opc = (unsigned) Op.getOpcode();
2668   EVT VT = Op.getValueType();
2669
2670   switch (Opc) {
2671   default: {
2672 #ifndef NDEBUG
2673     errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2674     errs() << "Op.getOpcode() = " << Opc << "\n";
2675     errs() << "*Op.getNode():\n";
2676     Op.getNode()->dump();
2677 #endif
2678     llvm_unreachable(0);
2679   }
2680   case ISD::LOAD:
2681   case ISD::EXTLOAD:
2682   case ISD::SEXTLOAD:
2683   case ISD::ZEXTLOAD:
2684     return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2685   case ISD::STORE:
2686     return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2687   case ISD::ConstantPool:
2688     return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2689   case ISD::GlobalAddress:
2690     return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2691   case ISD::JumpTable:
2692     return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2693   case ISD::ConstantFP:
2694     return LowerConstantFP(Op, DAG);
2695
2696   // i8, i64 math ops:
2697   case ISD::ADD:
2698   case ISD::SUB:
2699   case ISD::ROTR:
2700   case ISD::ROTL:
2701   case ISD::SRL:
2702   case ISD::SHL:
2703   case ISD::SRA: {
2704     if (VT == MVT::i8)
2705       return LowerI8Math(Op, DAG, Opc, *this);
2706     break;
2707   }
2708
2709   case ISD::FP_TO_SINT:
2710   case ISD::FP_TO_UINT:
2711     return LowerFP_TO_INT(Op, DAG, *this);
2712
2713   case ISD::SINT_TO_FP:
2714   case ISD::UINT_TO_FP:
2715     return LowerINT_TO_FP(Op, DAG, *this);
2716
2717   // Vector-related lowering.
2718   case ISD::BUILD_VECTOR:
2719     return LowerBUILD_VECTOR(Op, DAG);
2720   case ISD::SCALAR_TO_VECTOR:
2721     return LowerSCALAR_TO_VECTOR(Op, DAG);
2722   case ISD::VECTOR_SHUFFLE:
2723     return LowerVECTOR_SHUFFLE(Op, DAG);
2724   case ISD::EXTRACT_VECTOR_ELT:
2725     return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2726   case ISD::INSERT_VECTOR_ELT:
2727     return LowerINSERT_VECTOR_ELT(Op, DAG);
2728
2729   // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2730   case ISD::AND:
2731   case ISD::OR:
2732   case ISD::XOR:
2733     return LowerByteImmed(Op, DAG);
2734
2735   // Vector and i8 multiply:
2736   case ISD::MUL:
2737     if (VT == MVT::i8)
2738       return LowerI8Math(Op, DAG, Opc, *this);
2739
2740   case ISD::CTPOP:
2741     return LowerCTPOP(Op, DAG);
2742
2743   case ISD::SELECT_CC:
2744     return LowerSELECT_CC(Op, DAG, *this);
2745
2746   case ISD::SETCC:
2747     return LowerSETCC(Op, DAG, *this);
2748
2749   case ISD::TRUNCATE:
2750     return LowerTRUNCATE(Op, DAG);
2751
2752   case ISD::SIGN_EXTEND:
2753     return LowerSIGN_EXTEND(Op, DAG);
2754   }
2755
2756   return SDValue();
2757 }
2758
2759 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2760                                            SmallVectorImpl<SDValue>&Results,
2761                                            SelectionDAG &DAG) const
2762 {
2763 #if 0
2764   unsigned Opc = (unsigned) N->getOpcode();
2765   EVT OpVT = N->getValueType(0);
2766
2767   switch (Opc) {
2768   default: {
2769     errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2770     errs() << "Op.getOpcode() = " << Opc << "\n";
2771     errs() << "*Op.getNode():\n";
2772     N->dump();
2773     abort();
2774     /*NOTREACHED*/
2775   }
2776   }
2777 #endif
2778
2779   /* Otherwise, return unchanged */
2780 }
2781
2782 //===----------------------------------------------------------------------===//
2783 // Target Optimization Hooks
2784 //===----------------------------------------------------------------------===//
2785
2786 SDValue
2787 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2788 {
2789 #if 0
2790   TargetMachine &TM = getTargetMachine();
2791 #endif
2792   const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2793   SelectionDAG &DAG = DCI.DAG;
2794   SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2795   EVT NodeVT = N->getValueType(0);      // The node's value type
2796   EVT Op0VT = Op0.getValueType();       // The first operand's result
2797   SDValue Result;                       // Initially, empty result
2798   DebugLoc dl = N->getDebugLoc();
2799
2800   switch (N->getOpcode()) {
2801   default: break;
2802   case ISD::ADD: {
2803     SDValue Op1 = N->getOperand(1);
2804
2805     if (Op0.getOpcode() == SPUISD::IndirectAddr
2806         || Op1.getOpcode() == SPUISD::IndirectAddr) {
2807       // Normalize the operands to reduce repeated code
2808       SDValue IndirectArg = Op0, AddArg = Op1;
2809
2810       if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2811         IndirectArg = Op1;
2812         AddArg = Op0;
2813       }
2814
2815       if (isa<ConstantSDNode>(AddArg)) {
2816         ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2817         SDValue IndOp1 = IndirectArg.getOperand(1);
2818
2819         if (CN0->isNullValue()) {
2820           // (add (SPUindirect <arg>, <arg>), 0) ->
2821           // (SPUindirect <arg>, <arg>)
2822
2823 #if !defined(NDEBUG)
2824           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2825             errs() << "\n"
2826                  << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2827                  << "With:    (SPUindirect <arg>, <arg>)\n";
2828           }
2829 #endif
2830
2831           return IndirectArg;
2832         } else if (isa<ConstantSDNode>(IndOp1)) {
2833           // (add (SPUindirect <arg>, <const>), <const>) ->
2834           // (SPUindirect <arg>, <const + const>)
2835           ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2836           int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2837           SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2838
2839 #if !defined(NDEBUG)
2840           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2841             errs() << "\n"
2842                  << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2843                  << "), " << CN0->getSExtValue() << ")\n"
2844                  << "With:    (SPUindirect <arg>, "
2845                  << combinedConst << ")\n";
2846           }
2847 #endif
2848
2849           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2850                              IndirectArg, combinedValue);
2851         }
2852       }
2853     }
2854     break;
2855   }
2856   case ISD::SIGN_EXTEND:
2857   case ISD::ZERO_EXTEND:
2858   case ISD::ANY_EXTEND: {
2859     if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2860       // (any_extend (SPUextract_elt0 <arg>)) ->
2861       // (SPUextract_elt0 <arg>)
2862       // Types must match, however...
2863 #if !defined(NDEBUG)
2864       if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2865         errs() << "\nReplace: ";
2866         N->dump(&DAG);
2867         errs() << "\nWith:    ";
2868         Op0.getNode()->dump(&DAG);
2869         errs() << "\n";
2870       }
2871 #endif
2872
2873       return Op0;
2874     }
2875     break;
2876   }
2877   case SPUISD::IndirectAddr: {
2878     if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2879       ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2880       if (CN != 0 && CN->isNullValue()) {
2881         // (SPUindirect (SPUaform <addr>, 0), 0) ->
2882         // (SPUaform <addr>, 0)
2883
2884         DEBUG(errs() << "Replace: ");
2885         DEBUG(N->dump(&DAG));
2886         DEBUG(errs() << "\nWith:    ");
2887         DEBUG(Op0.getNode()->dump(&DAG));
2888         DEBUG(errs() << "\n");
2889
2890         return Op0;
2891       }
2892     } else if (Op0.getOpcode() == ISD::ADD) {
2893       SDValue Op1 = N->getOperand(1);
2894       if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2895         // (SPUindirect (add <arg>, <arg>), 0) ->
2896         // (SPUindirect <arg>, <arg>)
2897         if (CN1->isNullValue()) {
2898
2899 #if !defined(NDEBUG)
2900           if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2901             errs() << "\n"
2902                  << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2903                  << "With:    (SPUindirect <arg>, <arg>)\n";
2904           }
2905 #endif
2906
2907           return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2908                              Op0.getOperand(0), Op0.getOperand(1));
2909         }
2910       }
2911     }
2912     break;
2913   }
2914   case SPUISD::SHLQUAD_L_BITS:
2915   case SPUISD::SHLQUAD_L_BYTES:
2916   case SPUISD::ROTBYTES_LEFT: {
2917     SDValue Op1 = N->getOperand(1);
2918
2919     // Kill degenerate vector shifts:
2920     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2921       if (CN->isNullValue()) {
2922         Result = Op0;
2923       }
2924     }
2925     break;
2926   }
2927   case SPUISD::PREFSLOT2VEC: {
2928     switch (Op0.getOpcode()) {
2929     default:
2930       break;
2931     case ISD::ANY_EXTEND:
2932     case ISD::ZERO_EXTEND:
2933     case ISD::SIGN_EXTEND: {
2934       // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2935       // <arg>
2936       // but only if the SPUprefslot2vec and <arg> types match.
2937       SDValue Op00 = Op0.getOperand(0);
2938       if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2939         SDValue Op000 = Op00.getOperand(0);
2940         if (Op000.getValueType() == NodeVT) {
2941           Result = Op000;
2942         }
2943       }
2944       break;
2945     }
2946     case SPUISD::VEC2PREFSLOT: {
2947       // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2948       // <arg>
2949       Result = Op0.getOperand(0);
2950       break;
2951     }
2952     }
2953     break;
2954   }
2955   }
2956
2957   // Otherwise, return unchanged.
2958 #ifndef NDEBUG
2959   if (Result.getNode()) {
2960     DEBUG(errs() << "\nReplace.SPU: ");
2961     DEBUG(N->dump(&DAG));
2962     DEBUG(errs() << "\nWith:        ");
2963     DEBUG(Result.getNode()->dump(&DAG));
2964     DEBUG(errs() << "\n");
2965   }
2966 #endif
2967
2968   return Result;
2969 }
2970
2971 //===----------------------------------------------------------------------===//
2972 // Inline Assembly Support
2973 //===----------------------------------------------------------------------===//
2974
2975 /// getConstraintType - Given a constraint letter, return the type of
2976 /// constraint it is for this target.
2977 SPUTargetLowering::ConstraintType
2978 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2979   if (ConstraintLetter.size() == 1) {
2980     switch (ConstraintLetter[0]) {
2981     default: break;
2982     case 'b':
2983     case 'r':
2984     case 'f':
2985     case 'v':
2986     case 'y':
2987       return C_RegisterClass;
2988     }
2989   }
2990   return TargetLowering::getConstraintType(ConstraintLetter);
2991 }
2992
2993 /// Examine constraint type and operand type and determine a weight value.
2994 /// This object must already have been set up with the operand type
2995 /// and the current alternative constraint selected.
2996 TargetLowering::ConstraintWeight
2997 SPUTargetLowering::getSingleConstraintMatchWeight(
2998     AsmOperandInfo &info, const char *constraint) const {
2999   ConstraintWeight weight = CW_Invalid;
3000   Value *CallOperandVal = info.CallOperandVal;
3001     // If we don't have a value, we can't do a match,
3002     // but allow it at the lowest weight.
3003   if (CallOperandVal == NULL)
3004     return CW_Default;
3005   // Look at the constraint type.
3006   switch (*constraint) {
3007   default:
3008     weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
3009     break;
3010     //FIXME: Seems like the supported constraint letters were just copied
3011     // from PPC, as the following doesn't correspond to the GCC docs.
3012     // I'm leaving it so until someone adds the corresponding lowering support.
3013   case 'b':
3014   case 'r':
3015   case 'f':
3016   case 'd':
3017   case 'v':
3018   case 'y':
3019     weight = CW_Register;
3020     break;
3021   }
3022   return weight;
3023 }
3024
3025 std::pair<unsigned, const TargetRegisterClass*>
3026 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3027                                                 EVT VT) const
3028 {
3029   if (Constraint.size() == 1) {
3030     // GCC RS6000 Constraint Letters
3031     switch (Constraint[0]) {
3032     case 'b':   // R1-R31
3033     case 'r':   // R0-R31
3034       if (VT == MVT::i64)
3035         return std::make_pair(0U, SPU::R64CRegisterClass);
3036       return std::make_pair(0U, SPU::R32CRegisterClass);
3037     case 'f':
3038       if (VT == MVT::f32)
3039         return std::make_pair(0U, SPU::R32FPRegisterClass);
3040       else if (VT == MVT::f64)
3041         return std::make_pair(0U, SPU::R64FPRegisterClass);
3042       break;
3043     case 'v':
3044       return std::make_pair(0U, SPU::GPRCRegisterClass);
3045     }
3046   }
3047
3048   return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3049 }
3050
3051 //! Compute used/known bits for a SPU operand
3052 void
3053 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3054                                                   const APInt &Mask,
3055                                                   APInt &KnownZero,
3056                                                   APInt &KnownOne,
3057                                                   const SelectionDAG &DAG,
3058                                                   unsigned Depth ) const {
3059 #if 0
3060   const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3061
3062   switch (Op.getOpcode()) {
3063   default:
3064     // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3065     break;
3066   case CALL:
3067   case SHUFB:
3068   case SHUFFLE_MASK:
3069   case CNTB:
3070   case SPUISD::PREFSLOT2VEC:
3071   case SPUISD::LDRESULT:
3072   case SPUISD::VEC2PREFSLOT:
3073   case SPUISD::SHLQUAD_L_BITS:
3074   case SPUISD::SHLQUAD_L_BYTES:
3075   case SPUISD::VEC_ROTL:
3076   case SPUISD::VEC_ROTR:
3077   case SPUISD::ROTBYTES_LEFT:
3078   case SPUISD::SELECT_MASK:
3079   case SPUISD::SELB:
3080   }
3081 #endif
3082 }
3083
3084 unsigned
3085 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3086                                                    unsigned Depth) const {
3087   switch (Op.getOpcode()) {
3088   default:
3089     return 1;
3090
3091   case ISD::SETCC: {
3092     EVT VT = Op.getValueType();
3093
3094     if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3095       VT = MVT::i32;
3096     }
3097     return VT.getSizeInBits();
3098   }
3099   }
3100 }
3101
3102 // LowerAsmOperandForConstraint
3103 void
3104 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3105                                                 char ConstraintLetter,
3106                                                 std::vector<SDValue> &Ops,
3107                                                 SelectionDAG &DAG) const {
3108   // Default, for the time being, to the base class handler
3109   TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3110 }
3111
3112 /// isLegalAddressImmediate - Return true if the integer value can be used
3113 /// as the offset of the target addressing mode.
3114 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3115                                                 const Type *Ty) const {
3116   // SPU's addresses are 256K:
3117   return (V > -(1 << 18) && V < (1 << 18) - 1);
3118 }
3119
3120 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3121   return false;
3122 }
3123
3124 bool
3125 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3126   // The SPU target isn't yet aware of offsets.
3127   return false;
3128 }
3129
3130 // can we compare to Imm without writing it into a register?
3131 bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3132   //ceqi, cgti, etc. all take s10 operand
3133   return isInt<10>(Imm);
3134 }
3135
3136 bool
3137 SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3138                                          const Type * ) const{
3139
3140   // A-form: 18bit absolute address.
3141   if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
3142     return true;
3143
3144   // D-form: reg + 14bit offset
3145   if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
3146     return true;
3147
3148   // X-form: reg+reg
3149   if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
3150     return true;
3151
3152   return false;
3153 }
3154