Now that PR2957 is resolved, remove a bunch of
[llvm/msp430.git] / lib / Target / CellSPU / SPUISelLowering.cpp
blobcef87e9a498d175fdf6b8e8c022ad82fb04a748f
1 //
2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
34 #include <map>
36 using namespace llvm;
38 // Used in getTargetNodeName() below
39 namespace {
40 std::map<unsigned, const char *> node_names;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s {
44 const MVT valtype;
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
49 { MVT::i1, 3 },
50 { MVT::i8, 3 },
51 { MVT::i16, 2 },
52 { MVT::i32, 0 },
53 { MVT::f32, 0 },
54 { MVT::i64, 0 },
55 { MVT::f64, 0 },
56 { MVT::i128, 0 }
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
67 break;
71 #ifndef NDEBUG
72 if (retval == 0) {
73 cerr << "getValueTypeMapEntry returns NULL for "
74 << VT.getMVTString()
75 << "\n";
76 abort();
78 #endif
80 return retval;
83 //! Expand a library call into an actual call DAG node
84 /*!
85 \note
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
90 SDValue
91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
95 // dependence.
96 SDValue InChain = DAG.getEntryNode();
98 TargetLowering::ArgListTy Args;
99 TargetLowering::ArgListEntry Entry;
100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101 MVT ArgVT = Op.getOperand(i).getValueType();
102 const Type *ArgTy = ArgVT.getTypeForMVT();
103 Entry.Node = Op.getOperand(i);
104 Entry.Ty = ArgTy;
105 Entry.isSExt = isSigned;
106 Entry.isZExt = !isSigned;
107 Args.push_back(Entry);
109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
110 TLI.getPointerTy());
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
114 std::pair<SDValue, SDValue> CallInfo =
115 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
116 CallingConv::C, false, Callee, Args, DAG,
117 Op.getDebugLoc());
119 return CallInfo.first;
123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
124 : TargetLowering(TM),
125 SPUTM(TM)
127 // Fold away setcc operations if possible.
128 setPow2DivIsCheap();
130 // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 setUseUnderscoreSetJmp(true);
132 setUseUnderscoreLongJmp(true);
134 // Set RTLIB libcall names as used by SPU:
135 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
137 // Set up the SPU's register classes:
138 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
139 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
140 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
141 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
142 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
143 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
144 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
146 // SPU has no sign or zero extended loads for i1, i8, i16:
147 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
148 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
149 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
152 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
156 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
160 ++sctype) {
161 MVT VT = (MVT::SimpleValueType)sctype;
163 setOperationAction(ISD::LOAD, VT, Custom);
164 setOperationAction(ISD::STORE, VT, Custom);
165 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
166 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
167 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
169 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
170 MVT StoreVT = (MVT::SimpleValueType) stype;
171 setTruncStoreAction(VT, StoreVT, Expand);
175 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
176 ++sctype) {
177 MVT VT = (MVT::SimpleValueType) sctype;
179 setOperationAction(ISD::LOAD, VT, Custom);
180 setOperationAction(ISD::STORE, VT, Custom);
182 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
183 MVT StoreVT = (MVT::SimpleValueType) stype;
184 setTruncStoreAction(VT, StoreVT, Expand);
188 // Expand the jumptable branches
189 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
190 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
192 // Custom lower SELECT_CC for most cases, but expand by default
193 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
194 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
195 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
196 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
197 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
199 // SPU has no intrinsics for these particular operations:
200 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
202 // SPU has no SREM/UREM instructions
203 setOperationAction(ISD::SREM, MVT::i32, Expand);
204 setOperationAction(ISD::UREM, MVT::i32, Expand);
205 setOperationAction(ISD::SREM, MVT::i64, Expand);
206 setOperationAction(ISD::UREM, MVT::i64, Expand);
208 // We don't support sin/cos/sqrt/fmod
209 setOperationAction(ISD::FSIN , MVT::f64, Expand);
210 setOperationAction(ISD::FCOS , MVT::f64, Expand);
211 setOperationAction(ISD::FREM , MVT::f64, Expand);
212 setOperationAction(ISD::FSIN , MVT::f32, Expand);
213 setOperationAction(ISD::FCOS , MVT::f32, Expand);
214 setOperationAction(ISD::FREM , MVT::f32, Expand);
216 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
217 // for f32!)
218 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
219 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
221 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
222 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
224 // SPU can do rotate right and left, so legalize it... but customize for i8
225 // because instructions don't exist.
227 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
228 // .td files.
229 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
230 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
231 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
233 setOperationAction(ISD::ROTL, MVT::i32, Legal);
234 setOperationAction(ISD::ROTL, MVT::i16, Legal);
235 setOperationAction(ISD::ROTL, MVT::i8, Custom);
237 // SPU has no native version of shift left/right for i8
238 setOperationAction(ISD::SHL, MVT::i8, Custom);
239 setOperationAction(ISD::SRL, MVT::i8, Custom);
240 setOperationAction(ISD::SRA, MVT::i8, Custom);
242 // Make these operations legal and handle them during instruction selection:
243 setOperationAction(ISD::SHL, MVT::i64, Legal);
244 setOperationAction(ISD::SRL, MVT::i64, Legal);
245 setOperationAction(ISD::SRA, MVT::i64, Legal);
247 // Custom lower i8, i32 and i64 multiplications
248 setOperationAction(ISD::MUL, MVT::i8, Custom);
249 setOperationAction(ISD::MUL, MVT::i32, Legal);
250 setOperationAction(ISD::MUL, MVT::i64, Legal);
252 // Need to custom handle (some) common i8, i64 math ops
253 setOperationAction(ISD::ADD, MVT::i8, Custom);
254 setOperationAction(ISD::ADD, MVT::i64, Legal);
255 setOperationAction(ISD::SUB, MVT::i8, Custom);
256 setOperationAction(ISD::SUB, MVT::i64, Legal);
258 // SPU does not have BSWAP. It does have i32 support CTLZ.
259 // CTPOP has to be custom lowered.
260 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
261 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
263 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
264 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
265 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
266 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
268 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
269 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
271 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
273 // SPU has a version of select that implements (a&~c)|(b&c), just like
274 // select ought to work:
275 setOperationAction(ISD::SELECT, MVT::i8, Legal);
276 setOperationAction(ISD::SELECT, MVT::i16, Legal);
277 setOperationAction(ISD::SELECT, MVT::i32, Legal);
278 setOperationAction(ISD::SELECT, MVT::i64, Legal);
280 setOperationAction(ISD::SETCC, MVT::i8, Legal);
281 setOperationAction(ISD::SETCC, MVT::i16, Legal);
282 setOperationAction(ISD::SETCC, MVT::i32, Legal);
283 setOperationAction(ISD::SETCC, MVT::i64, Legal);
284 setOperationAction(ISD::SETCC, MVT::f64, Custom);
286 // Custom lower i128 -> i64 truncates
287 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
289 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
290 // to expand to a libcall, hence the custom lowering:
291 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
297 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
298 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
299 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
304 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
305 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
308 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
312 // We cannot sextinreg(i1). Expand to shifts.
313 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
315 // Support label based line numbers.
316 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
317 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
319 // We want to legalize GlobalAddress and ConstantPool nodes into the
320 // appropriate instructions to materialize the address.
321 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
322 ++sctype) {
323 MVT VT = (MVT::SimpleValueType)sctype;
325 setOperationAction(ISD::GlobalAddress, VT, Custom);
326 setOperationAction(ISD::ConstantPool, VT, Custom);
327 setOperationAction(ISD::JumpTable, VT, Custom);
330 // RET must be custom lowered, to meet ABI requirements
331 setOperationAction(ISD::RET, MVT::Other, Custom);
333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
334 setOperationAction(ISD::VASTART , MVT::Other, Custom);
336 // Use the default implementation.
337 setOperationAction(ISD::VAARG , MVT::Other, Expand);
338 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
339 setOperationAction(ISD::VAEND , MVT::Other, Expand);
340 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
341 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
345 // Cell SPU has instructions for converting between i64 and fp.
346 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
347 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
352 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
353 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
355 // First set operation action for all vector types to expand. Then we
356 // will selectively turn on ones that can be effectively codegen'd.
357 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
358 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
364 // "Odd size" vector classes that we're willing to support:
365 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
367 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
368 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
369 MVT VT = (MVT::SimpleValueType)i;
371 // add/sub are legal for all supported vector VT's.
372 setOperationAction(ISD::ADD, VT, Legal);
373 setOperationAction(ISD::SUB, VT, Legal);
374 // mul has to be custom lowered.
375 setOperationAction(ISD::MUL, VT, Legal);
377 setOperationAction(ISD::AND, VT, Legal);
378 setOperationAction(ISD::OR, VT, Legal);
379 setOperationAction(ISD::XOR, VT, Legal);
380 setOperationAction(ISD::LOAD, VT, Legal);
381 setOperationAction(ISD::SELECT, VT, Legal);
382 setOperationAction(ISD::STORE, VT, Legal);
384 // These operations need to be expanded:
385 setOperationAction(ISD::SDIV, VT, Expand);
386 setOperationAction(ISD::SREM, VT, Expand);
387 setOperationAction(ISD::UDIV, VT, Expand);
388 setOperationAction(ISD::UREM, VT, Expand);
390 // Custom lower build_vector, constant pool spills, insert and
391 // extract vector elements:
392 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
393 setOperationAction(ISD::ConstantPool, VT, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
395 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
400 setOperationAction(ISD::AND, MVT::v16i8, Custom);
401 setOperationAction(ISD::OR, MVT::v16i8, Custom);
402 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
403 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
405 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
407 setShiftAmountType(MVT::i32);
408 setBooleanContents(ZeroOrNegativeOneBooleanContent);
410 setStackPointerRegisterToSaveRestore(SPU::R1);
412 // We have target-specific dag combine patterns for the following nodes:
413 setTargetDAGCombine(ISD::ADD);
414 setTargetDAGCombine(ISD::ZERO_EXTEND);
415 setTargetDAGCombine(ISD::SIGN_EXTEND);
416 setTargetDAGCombine(ISD::ANY_EXTEND);
418 computeRegisterProperties();
420 // Set pre-RA register scheduler default to BURR, which produces slightly
421 // better code than the default (could also be TDRR, but TargetLowering.h
422 // needs a mod to support that model):
423 setSchedulingPreference(SchedulingForRegPressure);
426 const char *
427 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
429 if (node_names.empty()) {
430 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
431 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
432 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
433 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
434 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
435 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
436 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
437 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
438 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
439 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
440 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
441 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
442 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
444 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
446 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
447 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
448 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
449 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
452 "SPUISD::ROTBYTES_LEFT_BITS";
453 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
454 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
455 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
456 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
457 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
460 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
462 return ((i != node_names.end()) ? i->second : 0);
465 //===----------------------------------------------------------------------===//
466 // Return the Cell SPU's SETCC result type
467 //===----------------------------------------------------------------------===//
469 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
470 // i16 and i32 are valid SETCC result types
471 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
474 //===----------------------------------------------------------------------===//
475 // Calling convention code:
476 //===----------------------------------------------------------------------===//
478 #include "SPUGenCallingConv.inc"
480 //===----------------------------------------------------------------------===//
481 // LowerOperation implementation
482 //===----------------------------------------------------------------------===//
484 /// Custom lower loads for CellSPU
486 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
487 within a 16-byte block, we have to rotate to extract the requested element.
489 For extending loads, we also want to ensure that the following sequence is
490 emitted, e.g. for MVT::f32 extending load to MVT::f64:
492 \verbatim
493 %1 v16i8,ch = load
494 %2 v16i8,ch = rotate %1
495 %3 v4f8, ch = bitconvert %2
496 %4 f32 = vec2perfslot %3
497 %5 f64 = fp_extend %4
498 \endverbatim
500 static SDValue
501 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
502 LoadSDNode *LN = cast<LoadSDNode>(Op);
503 SDValue the_chain = LN->getChain();
504 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
505 MVT InVT = LN->getMemoryVT();
506 MVT OutVT = Op.getValueType();
507 ISD::LoadExtType ExtType = LN->getExtensionType();
508 unsigned alignment = LN->getAlignment();
509 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
510 DebugLoc dl = Op.getDebugLoc();
512 switch (LN->getAddressingMode()) {
513 case ISD::UNINDEXED: {
514 SDValue result;
515 SDValue basePtr = LN->getBasePtr();
516 SDValue rotate;
518 if (alignment == 16) {
519 ConstantSDNode *CN;
521 // Special cases for a known aligned load to simplify the base pointer
522 // and the rotation amount:
523 if (basePtr.getOpcode() == ISD::ADD
524 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
525 // Known offset into basePtr
526 int64_t offset = CN->getSExtValue();
527 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
529 if (rotamt < 0)
530 rotamt += 16;
532 rotate = DAG.getConstant(rotamt, MVT::i16);
534 // Simplify the base pointer for this case:
535 basePtr = basePtr.getOperand(0);
536 if ((offset & ~0xf) > 0) {
537 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
538 basePtr,
539 DAG.getConstant((offset & ~0xf), PtrVT));
541 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
542 || (basePtr.getOpcode() == SPUISD::IndirectAddr
543 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
544 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
545 // Plain aligned a-form address: rotate into preferred slot
546 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
547 int64_t rotamt = -vtm->prefslot_byte;
548 if (rotamt < 0)
549 rotamt += 16;
550 rotate = DAG.getConstant(rotamt, MVT::i16);
551 } else {
552 // Offset the rotate amount by the basePtr and the preferred slot
553 // byte offset
554 int64_t rotamt = -vtm->prefslot_byte;
555 if (rotamt < 0)
556 rotamt += 16;
557 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
558 basePtr,
559 DAG.getConstant(rotamt, PtrVT));
561 } else {
562 // Unaligned load: must be more pessimistic about addressing modes:
563 if (basePtr.getOpcode() == ISD::ADD) {
564 MachineFunction &MF = DAG.getMachineFunction();
565 MachineRegisterInfo &RegInfo = MF.getRegInfo();
566 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
567 SDValue Flag;
569 SDValue Op0 = basePtr.getOperand(0);
570 SDValue Op1 = basePtr.getOperand(1);
572 if (isa<ConstantSDNode>(Op1)) {
573 // Convert the (add <ptr>, <const>) to an indirect address contained
574 // in a register. Note that this is done because we need to avoid
575 // creating a 0(reg) d-form address due to the SPU's block loads.
576 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
577 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
578 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
579 } else {
580 // Convert the (add <arg1>, <arg2>) to an indirect address, which
581 // will likely be lowered as a reg(reg) x-form address.
582 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
584 } else {
585 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
586 basePtr,
587 DAG.getConstant(0, PtrVT));
590 // Offset the rotate amount by the basePtr and the preferred slot
591 // byte offset
592 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
593 basePtr,
594 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
597 // Re-emit as a v16i8 vector load
598 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
599 LN->getSrcValue(), LN->getSrcValueOffset(),
600 LN->isVolatile(), 16);
602 // Update the chain
603 the_chain = result.getValue(1);
605 // Rotate into the preferred slot:
606 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
607 result.getValue(0), rotate);
609 // Convert the loaded v16i8 vector to the appropriate vector type
610 // specified by the operand:
611 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
612 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
613 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
615 // Handle extending loads by extending the scalar result:
616 if (ExtType == ISD::SEXTLOAD) {
617 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
618 } else if (ExtType == ISD::ZEXTLOAD) {
619 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
620 } else if (ExtType == ISD::EXTLOAD) {
621 unsigned NewOpc = ISD::ANY_EXTEND;
623 if (OutVT.isFloatingPoint())
624 NewOpc = ISD::FP_EXTEND;
626 result = DAG.getNode(NewOpc, dl, OutVT, result);
629 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
630 SDValue retops[2] = {
631 result,
632 the_chain
635 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
636 retops, sizeof(retops) / sizeof(retops[0]));
637 return result;
639 case ISD::PRE_INC:
640 case ISD::PRE_DEC:
641 case ISD::POST_INC:
642 case ISD::POST_DEC:
643 case ISD::LAST_INDEXED_MODE:
644 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
645 "UNINDEXED\n";
646 cerr << (unsigned) LN->getAddressingMode() << "\n";
647 abort();
648 /*NOTREACHED*/
651 return SDValue();
654 /// Custom lower stores for CellSPU
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
660 static SDValue
661 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
662 StoreSDNode *SN = cast<StoreSDNode>(Op);
663 SDValue Value = SN->getValue();
664 MVT VT = Value.getValueType();
665 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
666 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
667 DebugLoc dl = Op.getDebugLoc();
668 unsigned alignment = SN->getAlignment();
670 switch (SN->getAddressingMode()) {
671 case ISD::UNINDEXED: {
672 // The vector type we really want to load from the 16-byte chunk.
673 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
674 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
676 SDValue alignLoadVec;
677 SDValue basePtr = SN->getBasePtr();
678 SDValue the_chain = SN->getChain();
679 SDValue insertEltOffs;
681 if (alignment == 16) {
682 ConstantSDNode *CN;
684 // Special cases for a known aligned load to simplify the base pointer
685 // and insertion byte:
686 if (basePtr.getOpcode() == ISD::ADD
687 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
688 // Known offset into basePtr
689 int64_t offset = CN->getSExtValue();
691 // Simplify the base pointer for this case:
692 basePtr = basePtr.getOperand(0);
693 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
694 basePtr,
695 DAG.getConstant((offset & 0xf), PtrVT));
697 if ((offset & ~0xf) > 0) {
698 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
699 basePtr,
700 DAG.getConstant((offset & ~0xf), PtrVT));
702 } else {
703 // Otherwise, assume it's at byte 0 of basePtr
704 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
705 basePtr,
706 DAG.getConstant(0, PtrVT));
708 } else {
709 // Unaligned load: must be more pessimistic about addressing modes:
710 if (basePtr.getOpcode() == ISD::ADD) {
711 MachineFunction &MF = DAG.getMachineFunction();
712 MachineRegisterInfo &RegInfo = MF.getRegInfo();
713 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
714 SDValue Flag;
716 SDValue Op0 = basePtr.getOperand(0);
717 SDValue Op1 = basePtr.getOperand(1);
719 if (isa<ConstantSDNode>(Op1)) {
720 // Convert the (add <ptr>, <const>) to an indirect address contained
721 // in a register. Note that this is done because we need to avoid
722 // creating a 0(reg) d-form address due to the SPU's block loads.
723 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
724 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
725 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
726 } else {
727 // Convert the (add <arg1>, <arg2>) to an indirect address, which
728 // will likely be lowered as a reg(reg) x-form address.
729 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
731 } else {
732 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
733 basePtr,
734 DAG.getConstant(0, PtrVT));
737 // Insertion point is solely determined by basePtr's contents
738 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
739 basePtr,
740 DAG.getConstant(0, PtrVT));
743 // Re-emit as a v16i8 vector load
744 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
745 SN->getSrcValue(), SN->getSrcValueOffset(),
746 SN->isVolatile(), 16);
748 // Update the chain
749 the_chain = alignLoadVec.getValue(1);
751 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
752 SDValue theValue = SN->getValue();
753 SDValue result;
755 if (StVT != VT
756 && (theValue.getOpcode() == ISD::AssertZext
757 || theValue.getOpcode() == ISD::AssertSext)) {
758 // Drill down and get the value for zero- and sign-extended
759 // quantities
760 theValue = theValue.getOperand(0);
763 // If the base pointer is already a D-form address, then just create
764 // a new D-form address with a slot offset and the orignal base pointer.
765 // Otherwise generate a D-form address with the slot offset relative
766 // to the stack pointer, which is always aligned.
767 #if !defined(NDEBUG)
768 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
769 cerr << "CellSPU LowerSTORE: basePtr = ";
770 basePtr.getNode()->dump(&DAG);
771 cerr << "\n";
773 #endif
775 SDValue insertEltOp =
776 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
777 SDValue vectorizeOp =
778 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
780 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
781 vectorizeOp, alignLoadVec,
782 DAG.getNode(ISD::BIT_CONVERT, dl,
783 MVT::v4i32, insertEltOp));
785 result = DAG.getStore(the_chain, dl, result, basePtr,
786 LN->getSrcValue(), LN->getSrcValueOffset(),
787 LN->isVolatile(), LN->getAlignment());
789 #if 0 && !defined(NDEBUG)
790 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
791 const SDValue &currentRoot = DAG.getRoot();
793 DAG.setRoot(result);
794 cerr << "------- CellSPU:LowerStore result:\n";
795 DAG.dump();
796 cerr << "-------\n";
797 DAG.setRoot(currentRoot);
799 #endif
801 return result;
802 /*UNREACHED*/
804 case ISD::PRE_INC:
805 case ISD::PRE_DEC:
806 case ISD::POST_INC:
807 case ISD::POST_DEC:
808 case ISD::LAST_INDEXED_MODE:
809 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
810 "UNINDEXED\n";
811 cerr << (unsigned) SN->getAddressingMode() << "\n";
812 abort();
813 /*NOTREACHED*/
816 return SDValue();
819 //! Generate the address of a constant pool entry.
820 SDValue
821 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
822 MVT PtrVT = Op.getValueType();
823 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
824 Constant *C = CP->getConstVal();
825 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
826 SDValue Zero = DAG.getConstant(0, PtrVT);
827 const TargetMachine &TM = DAG.getTarget();
828 // FIXME there is no actual debug info here
829 DebugLoc dl = Op.getDebugLoc();
831 if (TM.getRelocationModel() == Reloc::Static) {
832 if (!ST->usingLargeMem()) {
833 // Just return the SDValue with the constant pool address in it.
834 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
835 } else {
836 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
837 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
838 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
842 assert(0 &&
843 "LowerConstantPool: Relocation model other than static"
844 " not supported.");
845 return SDValue();
848 //! Alternate entry point for generating the address of a constant pool entry
849 SDValue
850 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
851 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
854 static SDValue
855 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
856 MVT PtrVT = Op.getValueType();
857 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
858 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
859 SDValue Zero = DAG.getConstant(0, PtrVT);
860 const TargetMachine &TM = DAG.getTarget();
861 // FIXME there is no actual debug info here
862 DebugLoc dl = Op.getDebugLoc();
864 if (TM.getRelocationModel() == Reloc::Static) {
865 if (!ST->usingLargeMem()) {
866 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
867 } else {
868 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
869 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
870 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
874 assert(0 &&
875 "LowerJumpTable: Relocation model other than static not supported.");
876 return SDValue();
879 static SDValue
880 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
881 MVT PtrVT = Op.getValueType();
882 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
883 GlobalValue *GV = GSDN->getGlobal();
884 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
885 const TargetMachine &TM = DAG.getTarget();
886 SDValue Zero = DAG.getConstant(0, PtrVT);
887 // FIXME there is no actual debug info here
888 DebugLoc dl = Op.getDebugLoc();
890 if (TM.getRelocationModel() == Reloc::Static) {
891 if (!ST->usingLargeMem()) {
892 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
893 } else {
894 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
895 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
896 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
898 } else {
899 cerr << "LowerGlobalAddress: Relocation model other than static not "
900 << "supported.\n";
901 abort();
902 /*NOTREACHED*/
905 return SDValue();
908 //! Custom lower double precision floating point constants
909 static SDValue
910 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
911 MVT VT = Op.getValueType();
912 // FIXME there is no actual debug info here
913 DebugLoc dl = Op.getDebugLoc();
915 if (VT == MVT::f64) {
916 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
918 assert((FP != 0) &&
919 "LowerConstantFP: Node is not ConstantFPSDNode");
921 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
922 SDValue T = DAG.getConstant(dbits, MVT::i64);
923 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
924 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
925 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
928 return SDValue();
931 static SDValue
932 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
934 MachineFunction &MF = DAG.getMachineFunction();
935 MachineFrameInfo *MFI = MF.getFrameInfo();
936 MachineRegisterInfo &RegInfo = MF.getRegInfo();
937 SmallVector<SDValue, 48> ArgValues;
938 SDValue Root = Op.getOperand(0);
939 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
940 DebugLoc dl = Op.getDebugLoc();
942 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
943 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
945 unsigned ArgOffset = SPUFrameInfo::minStackSize();
946 unsigned ArgRegIdx = 0;
947 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
949 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
951 // Add DAG nodes to load the arguments or copy them out of registers.
952 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
953 ArgNo != e; ++ArgNo) {
954 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
955 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
956 SDValue ArgVal;
958 if (ArgRegIdx < NumArgRegs) {
959 const TargetRegisterClass *ArgRegClass;
961 switch (ObjectVT.getSimpleVT()) {
962 default: {
963 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
964 << ObjectVT.getMVTString()
965 << "\n";
966 abort();
968 case MVT::i8:
969 ArgRegClass = &SPU::R8CRegClass;
970 break;
971 case MVT::i16:
972 ArgRegClass = &SPU::R16CRegClass;
973 break;
974 case MVT::i32:
975 ArgRegClass = &SPU::R32CRegClass;
976 break;
977 case MVT::i64:
978 ArgRegClass = &SPU::R64CRegClass;
979 break;
980 case MVT::i128:
981 ArgRegClass = &SPU::GPRCRegClass;
982 break;
983 case MVT::f32:
984 ArgRegClass = &SPU::R32FPRegClass;
985 break;
986 case MVT::f64:
987 ArgRegClass = &SPU::R64FPRegClass;
988 break;
989 case MVT::v2f64:
990 case MVT::v4f32:
991 case MVT::v2i64:
992 case MVT::v4i32:
993 case MVT::v8i16:
994 case MVT::v16i8:
995 ArgRegClass = &SPU::VECREGRegClass;
996 break;
999 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1002 ++ArgRegIdx;
1003 } else {
1004 // We need to load the argument to a virtual register if we determined
1005 // above that we ran out of physical registers of the appropriate type
1006 // or we're forced to do vararg
1007 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1008 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1009 ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1010 ArgOffset += StackSlotSize;
1013 ArgValues.push_back(ArgVal);
1014 // Update the chain
1015 Root = ArgVal.getOperand(0);
1018 // vararg handling:
1019 if (isVarArg) {
1020 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1021 // We will spill (79-3)+1 registers to the stack
1022 SmallVector<SDValue, 79-3+1> MemOps;
1024 // Create the frame slot
1026 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1027 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1028 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1029 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1030 SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1031 Root = Store.getOperand(0);
1032 MemOps.push_back(Store);
1034 // Increment address by stack slot size for the next stored argument
1035 ArgOffset += StackSlotSize;
1037 if (!MemOps.empty())
1038 Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1039 &MemOps[0], MemOps.size());
1042 ArgValues.push_back(Root);
1044 // Return the new list of results.
1045 return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1046 &ArgValues[0], ArgValues.size());
1049 /// isLSAAddress - Return the immediate to use if the specified
1050 /// value is representable as a LSA address.
1051 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1052 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1053 if (!C) return 0;
1055 int Addr = C->getZExtValue();
1056 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1057 (Addr << 14 >> 14) != Addr)
1058 return 0; // Top 14 bits have to be sext of immediate.
1060 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1063 static SDValue
1064 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1065 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1066 SDValue Chain = TheCall->getChain();
1067 SDValue Callee = TheCall->getCallee();
1068 unsigned NumOps = TheCall->getNumArgs();
1069 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1070 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1071 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1072 DebugLoc dl = TheCall->getDebugLoc();
1074 // Handy pointer type
1075 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1077 // Accumulate how many bytes are to be pushed on the stack, including the
1078 // linkage area, and parameter passing area. According to the SPU ABI,
1079 // we minimally need space for [LR] and [SP]
1080 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1082 // Set up a copy of the stack pointer for use loading and storing any
1083 // arguments that may not fit in the registers available for argument
1084 // passing.
1085 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1087 // Figure out which arguments are going to go in registers, and which in
1088 // memory.
1089 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1090 unsigned ArgRegIdx = 0;
1092 // Keep track of registers passing arguments
1093 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1094 // And the arguments passed on the stack
1095 SmallVector<SDValue, 8> MemOpChains;
1097 for (unsigned i = 0; i != NumOps; ++i) {
1098 SDValue Arg = TheCall->getArg(i);
1100 // PtrOff will be used to store the current argument to the stack if a
1101 // register cannot be found for it.
1102 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1103 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1105 switch (Arg.getValueType().getSimpleVT()) {
1106 default: assert(0 && "Unexpected ValueType for argument!");
1107 case MVT::i8:
1108 case MVT::i16:
1109 case MVT::i32:
1110 case MVT::i64:
1111 case MVT::i128:
1112 if (ArgRegIdx != NumArgRegs) {
1113 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1114 } else {
1115 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1116 ArgOffset += StackSlotSize;
1118 break;
1119 case MVT::f32:
1120 case MVT::f64:
1121 if (ArgRegIdx != NumArgRegs) {
1122 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1123 } else {
1124 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1125 ArgOffset += StackSlotSize;
1127 break;
1128 case MVT::v2i64:
1129 case MVT::v2f64:
1130 case MVT::v4f32:
1131 case MVT::v4i32:
1132 case MVT::v8i16:
1133 case MVT::v16i8:
1134 if (ArgRegIdx != NumArgRegs) {
1135 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1136 } else {
1137 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1138 ArgOffset += StackSlotSize;
1140 break;
1144 // Update number of stack bytes actually used, insert a call sequence start
1145 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1146 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1147 true));
1149 if (!MemOpChains.empty()) {
1150 // Adjust the stack pointer for the stack arguments.
1151 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1152 &MemOpChains[0], MemOpChains.size());
1155 // Build a sequence of copy-to-reg nodes chained together with token chain
1156 // and flag operands which copy the outgoing args into the appropriate regs.
1157 SDValue InFlag;
1158 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1159 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1160 RegsToPass[i].second, InFlag);
1161 InFlag = Chain.getValue(1);
1164 SmallVector<SDValue, 8> Ops;
1165 unsigned CallOpc = SPUISD::CALL;
1167 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1168 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1169 // node so that legalize doesn't hack it.
1170 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1171 GlobalValue *GV = G->getGlobal();
1172 MVT CalleeVT = Callee.getValueType();
1173 SDValue Zero = DAG.getConstant(0, PtrVT);
1174 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1176 if (!ST->usingLargeMem()) {
1177 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1178 // style calls, otherwise, external symbols are BRASL calls. This assumes
1179 // that declared/defined symbols are in the same compilation unit and can
1180 // be reached through PC-relative jumps.
1182 // NOTE:
1183 // This may be an unsafe assumption for JIT and really large compilation
1184 // units.
1185 if (GV->isDeclaration()) {
1186 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1187 } else {
1188 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1190 } else {
1191 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1192 // address pairs:
1193 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1195 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1196 MVT CalleeVT = Callee.getValueType();
1197 SDValue Zero = DAG.getConstant(0, PtrVT);
1198 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1199 Callee.getValueType());
1201 if (!ST->usingLargeMem()) {
1202 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1203 } else {
1204 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1206 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1207 // If this is an absolute destination address that appears to be a legal
1208 // local store address, use the munged value.
1209 Callee = SDValue(Dest, 0);
1212 Ops.push_back(Chain);
1213 Ops.push_back(Callee);
1215 // Add argument registers to the end of the list so that they are known live
1216 // into the call.
1217 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1218 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1219 RegsToPass[i].second.getValueType()));
1221 if (InFlag.getNode())
1222 Ops.push_back(InFlag);
1223 // Returns a chain and a flag for retval copy to use.
1224 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1225 &Ops[0], Ops.size());
1226 InFlag = Chain.getValue(1);
1228 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1229 DAG.getIntPtrConstant(0, true), InFlag);
1230 if (TheCall->getValueType(0) != MVT::Other)
1231 InFlag = Chain.getValue(1);
1233 SDValue ResultVals[3];
1234 unsigned NumResults = 0;
1236 // If the call has results, copy the values out of the ret val registers.
1237 switch (TheCall->getValueType(0).getSimpleVT()) {
1238 default: assert(0 && "Unexpected ret value!");
1239 case MVT::Other: break;
1240 case MVT::i32:
1241 if (TheCall->getValueType(1) == MVT::i32) {
1242 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1243 MVT::i32, InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1245 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1246 Chain.getValue(2)).getValue(1);
1247 ResultVals[1] = Chain.getValue(0);
1248 NumResults = 2;
1249 } else {
1250 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1251 InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1253 NumResults = 1;
1255 break;
1256 case MVT::i64:
1257 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1260 NumResults = 1;
1261 break;
1262 case MVT::i128:
1263 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1264 InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1266 NumResults = 1;
1267 break;
1268 case MVT::f32:
1269 case MVT::f64:
1270 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1271 InFlag).getValue(1);
1272 ResultVals[0] = Chain.getValue(0);
1273 NumResults = 1;
1274 break;
1275 case MVT::v2f64:
1276 case MVT::v2i64:
1277 case MVT::v4f32:
1278 case MVT::v4i32:
1279 case MVT::v8i16:
1280 case MVT::v16i8:
1281 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1282 InFlag).getValue(1);
1283 ResultVals[0] = Chain.getValue(0);
1284 NumResults = 1;
1285 break;
1288 // If the function returns void, just return the chain.
1289 if (NumResults == 0)
1290 return Chain;
1292 // Otherwise, merge everything together with a MERGE_VALUES node.
1293 ResultVals[NumResults++] = Chain;
1294 SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1295 return Res.getValue(Op.getResNo());
1298 static SDValue
1299 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1300 SmallVector<CCValAssign, 16> RVLocs;
1301 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1302 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1303 DebugLoc dl = Op.getDebugLoc();
1304 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1305 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1307 // If this is the first return lowered for this function, add the regs to the
1308 // liveout set for the function.
1309 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1310 for (unsigned i = 0; i != RVLocs.size(); ++i)
1311 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1314 SDValue Chain = Op.getOperand(0);
1315 SDValue Flag;
1317 // Copy the result values into the output registers.
1318 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1319 CCValAssign &VA = RVLocs[i];
1320 assert(VA.isRegLoc() && "Can only return in registers!");
1321 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1322 Op.getOperand(i*2+1), Flag);
1323 Flag = Chain.getValue(1);
1326 if (Flag.getNode())
1327 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1328 else
1329 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1333 //===----------------------------------------------------------------------===//
1334 // Vector related lowering:
1335 //===----------------------------------------------------------------------===//
1337 static ConstantSDNode *
1338 getVecImm(SDNode *N) {
1339 SDValue OpVal(0, 0);
1341 // Check to see if this buildvec has a single non-undef value in its elements.
1342 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1343 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1344 if (OpVal.getNode() == 0)
1345 OpVal = N->getOperand(i);
1346 else if (OpVal != N->getOperand(i))
1347 return 0;
1350 if (OpVal.getNode() != 0) {
1351 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1352 return CN;
1356 return 0;
1359 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1360 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1361 /// constant
1362 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1363 MVT ValueType) {
1364 if (ConstantSDNode *CN = getVecImm(N)) {
1365 uint64_t Value = CN->getZExtValue();
1366 if (ValueType == MVT::i64) {
1367 uint64_t UValue = CN->getZExtValue();
1368 uint32_t upper = uint32_t(UValue >> 32);
1369 uint32_t lower = uint32_t(UValue);
1370 if (upper != lower)
1371 return SDValue();
1372 Value = Value >> 32;
1374 if (Value <= 0x3ffff)
1375 return DAG.getTargetConstant(Value, ValueType);
1378 return SDValue();
1381 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1382 /// and the value fits into a signed 16-bit constant, and if so, return the
1383 /// constant
1384 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1385 MVT ValueType) {
1386 if (ConstantSDNode *CN = getVecImm(N)) {
1387 int64_t Value = CN->getSExtValue();
1388 if (ValueType == MVT::i64) {
1389 uint64_t UValue = CN->getZExtValue();
1390 uint32_t upper = uint32_t(UValue >> 32);
1391 uint32_t lower = uint32_t(UValue);
1392 if (upper != lower)
1393 return SDValue();
1394 Value = Value >> 32;
1396 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1397 return DAG.getTargetConstant(Value, ValueType);
1401 return SDValue();
1404 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1405 /// and the value fits into a signed 10-bit constant, and if so, return the
1406 /// constant
1407 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1408 MVT ValueType) {
1409 if (ConstantSDNode *CN = getVecImm(N)) {
1410 int64_t Value = CN->getSExtValue();
1411 if (ValueType == MVT::i64) {
1412 uint64_t UValue = CN->getZExtValue();
1413 uint32_t upper = uint32_t(UValue >> 32);
1414 uint32_t lower = uint32_t(UValue);
1415 if (upper != lower)
1416 return SDValue();
1417 Value = Value >> 32;
1419 if (isS10Constant(Value))
1420 return DAG.getTargetConstant(Value, ValueType);
1423 return SDValue();
1426 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1427 /// and the value fits into a signed 8-bit constant, and if so, return the
1428 /// constant.
1430 /// @note: The incoming vector is v16i8 because that's the only way we can load
1431 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1432 /// same value.
1433 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1434 MVT ValueType) {
1435 if (ConstantSDNode *CN = getVecImm(N)) {
1436 int Value = (int) CN->getZExtValue();
1437 if (ValueType == MVT::i16
1438 && Value <= 0xffff /* truncated from uint64_t */
1439 && ((short) Value >> 8) == ((short) Value & 0xff))
1440 return DAG.getTargetConstant(Value & 0xff, ValueType);
1441 else if (ValueType == MVT::i8
1442 && (Value & 0xff) == Value)
1443 return DAG.getTargetConstant(Value, ValueType);
1446 return SDValue();
1449 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1450 /// and the value fits into a signed 16-bit constant, and if so, return the
1451 /// constant
1452 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1453 MVT ValueType) {
1454 if (ConstantSDNode *CN = getVecImm(N)) {
1455 uint64_t Value = CN->getZExtValue();
1456 if ((ValueType == MVT::i32
1457 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1458 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1459 return DAG.getTargetConstant(Value >> 16, ValueType);
1462 return SDValue();
1465 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1466 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1467 if (ConstantSDNode *CN = getVecImm(N)) {
1468 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1471 return SDValue();
1474 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1475 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1476 if (ConstantSDNode *CN = getVecImm(N)) {
1477 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1480 return SDValue();
1483 //! Lower a BUILD_VECTOR instruction creatively:
1484 SDValue
1485 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1486 MVT VT = Op.getValueType();
1487 MVT EltVT = VT.getVectorElementType();
1488 DebugLoc dl = Op.getDebugLoc();
1489 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1490 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1491 unsigned minSplatBits = EltVT.getSizeInBits();
1493 if (minSplatBits < 16)
1494 minSplatBits = 16;
1496 APInt APSplatBits, APSplatUndef;
1497 unsigned SplatBitSize;
1498 bool HasAnyUndefs;
1500 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1501 HasAnyUndefs, minSplatBits)
1502 || minSplatBits < SplatBitSize)
1503 return SDValue(); // Wasn't a constant vector or splat exceeded min
1505 uint64_t SplatBits = APSplatBits.getZExtValue();
1507 switch (VT.getSimpleVT()) {
1508 default:
1509 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1510 << VT.getMVTString()
1511 << "\n";
1512 abort();
1513 /*NOTREACHED*/
1514 case MVT::v4f32: {
1515 uint32_t Value32 = uint32_t(SplatBits);
1516 assert(SplatBitSize == 32
1517 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1518 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1519 SDValue T = DAG.getConstant(Value32, MVT::i32);
1520 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1521 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1522 break;
1524 case MVT::v2f64: {
1525 uint64_t f64val = uint64_t(SplatBits);
1526 assert(SplatBitSize == 64
1527 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1528 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1529 SDValue T = DAG.getConstant(f64val, MVT::i64);
1530 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1531 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1532 break;
1534 case MVT::v16i8: {
1535 // 8-bit constants have to be expanded to 16-bits
1536 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1537 SmallVector<SDValue, 8> Ops;
1539 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1540 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1541 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1543 case MVT::v8i16: {
1544 unsigned short Value16 = SplatBits;
1545 SDValue T = DAG.getConstant(Value16, EltVT);
1546 SmallVector<SDValue, 8> Ops;
1548 Ops.assign(8, T);
1549 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1551 case MVT::v4i32: {
1552 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1553 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1555 case MVT::v2i32: {
1556 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1557 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1559 case MVT::v2i64: {
1560 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1564 return SDValue();
1569 SDValue
1570 SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1571 DebugLoc dl) {
1572 uint32_t upper = uint32_t(SplatVal >> 32);
1573 uint32_t lower = uint32_t(SplatVal);
1575 if (upper == lower) {
1576 // Magic constant that can be matched by IL, ILA, et. al.
1577 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1578 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1579 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1580 Val, Val, Val, Val));
1581 } else {
1582 bool upper_special, lower_special;
1584 // NOTE: This code creates common-case shuffle masks that can be easily
1585 // detected as common expressions. It is not attempting to create highly
1586 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1588 // Detect if the upper or lower half is a special shuffle mask pattern:
1589 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1590 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1592 // Both upper and lower are special, lower to a constant pool load:
1593 if (lower_special && upper_special) {
1594 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1595 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1596 SplatValCN, SplatValCN);
1599 SDValue LO32;
1600 SDValue HI32;
1601 SmallVector<SDValue, 16> ShufBytes;
1602 SDValue Result;
1604 // Create lower vector if not a special pattern
1605 if (!lower_special) {
1606 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1607 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1608 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1609 LO32C, LO32C, LO32C, LO32C));
1612 // Create upper vector if not a special pattern
1613 if (!upper_special) {
1614 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1615 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1616 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1617 HI32C, HI32C, HI32C, HI32C));
1620 // If either upper or lower are special, then the two input operands are
1621 // the same (basically, one of them is a "don't care")
1622 if (lower_special)
1623 LO32 = HI32;
1624 if (upper_special)
1625 HI32 = LO32;
1627 for (int i = 0; i < 4; ++i) {
1628 uint64_t val = 0;
1629 for (int j = 0; j < 4; ++j) {
1630 SDValue V;
1631 bool process_upper, process_lower;
1632 val <<= 8;
1633 process_upper = (upper_special && (i & 1) == 0);
1634 process_lower = (lower_special && (i & 1) == 1);
1636 if (process_upper || process_lower) {
1637 if ((process_upper && upper == 0)
1638 || (process_lower && lower == 0))
1639 val |= 0x80;
1640 else if ((process_upper && upper == 0xffffffff)
1641 || (process_lower && lower == 0xffffffff))
1642 val |= 0xc0;
1643 else if ((process_upper && upper == 0x80000000)
1644 || (process_lower && lower == 0x80000000))
1645 val |= (j == 0 ? 0xe0 : 0x80);
1646 } else
1647 val |= i * 4 + j + ((i & 1) * 16);
1650 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1653 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1654 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1655 &ShufBytes[0], ShufBytes.size()));
1659 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1660 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1661 /// permutation vector, V3, is monotonically increasing with one "exception"
1662 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1663 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1664 /// In either case, the net result is going to eventually invoke SHUFB to
1665 /// permute/shuffle the bytes from V1 and V2.
1666 /// \note
1667 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1668 /// control word for byte/halfword/word insertion. This takes care of a single
1669 /// element move from V2 into V1.
1670 /// \note
1671 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1672 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1673 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1674 SDValue V1 = Op.getOperand(0);
1675 SDValue V2 = Op.getOperand(1);
1676 DebugLoc dl = Op.getDebugLoc();
1678 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1680 // If we have a single element being moved from V1 to V2, this can be handled
1681 // using the C*[DX] compute mask instructions, but the vector elements have
1682 // to be monotonically increasing with one exception element.
1683 MVT VecVT = V1.getValueType();
1684 MVT EltVT = VecVT.getVectorElementType();
1685 unsigned EltsFromV2 = 0;
1686 unsigned V2Elt = 0;
1687 unsigned V2EltIdx0 = 0;
1688 unsigned CurrElt = 0;
1689 unsigned MaxElts = VecVT.getVectorNumElements();
1690 unsigned PrevElt = 0;
1691 unsigned V0Elt = 0;
1692 bool monotonic = true;
1693 bool rotate = true;
1695 if (EltVT == MVT::i8) {
1696 V2EltIdx0 = 16;
1697 } else if (EltVT == MVT::i16) {
1698 V2EltIdx0 = 8;
1699 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1700 V2EltIdx0 = 4;
1701 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1702 V2EltIdx0 = 2;
1703 } else
1704 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1706 for (unsigned i = 0; i != MaxElts; ++i) {
1707 if (SVN->getMaskElt(i) < 0)
1708 continue;
1710 unsigned SrcElt = SVN->getMaskElt(i);
1712 if (monotonic) {
1713 if (SrcElt >= V2EltIdx0) {
1714 if (1 >= (++EltsFromV2)) {
1715 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1717 } else if (CurrElt != SrcElt) {
1718 monotonic = false;
1721 ++CurrElt;
1724 if (rotate) {
1725 if (PrevElt > 0 && SrcElt < MaxElts) {
1726 if ((PrevElt == SrcElt - 1)
1727 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1728 PrevElt = SrcElt;
1729 if (SrcElt == 0)
1730 V0Elt = i;
1731 } else {
1732 rotate = false;
1734 } else if (PrevElt == 0) {
1735 // First time through, need to keep track of previous element
1736 PrevElt = SrcElt;
1737 } else {
1738 // This isn't a rotation, takes elements from vector 2
1739 rotate = false;
1744 if (EltsFromV2 == 1 && monotonic) {
1745 // Compute mask and shuffle
1746 MachineFunction &MF = DAG.getMachineFunction();
1747 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1748 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1749 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1750 // Initialize temporary register to 0
1751 SDValue InitTempReg =
1752 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1753 // Copy register's contents as index in SHUFFLE_MASK:
1754 SDValue ShufMaskOp =
1755 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1756 DAG.getTargetConstant(V2Elt, MVT::i32),
1757 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1758 // Use shuffle mask in SHUFB synthetic instruction:
1759 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1760 ShufMaskOp);
1761 } else if (rotate) {
1762 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1764 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1765 V1, DAG.getConstant(rotamt, MVT::i16));
1766 } else {
1767 // Convert the SHUFFLE_VECTOR mask's input element units to the
1768 // actual bytes.
1769 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1771 SmallVector<SDValue, 16> ResultMask;
1772 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1773 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1775 for (unsigned j = 0; j < BytesPerElement; ++j)
1776 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1779 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1780 &ResultMask[0], ResultMask.size());
1781 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1785 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1786 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1787 DebugLoc dl = Op.getDebugLoc();
1789 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1790 // For a constant, build the appropriate constant vector, which will
1791 // eventually simplify to a vector register load.
1793 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1794 SmallVector<SDValue, 16> ConstVecValues;
1795 MVT VT;
1796 size_t n_copies;
1798 // Create a constant vector:
1799 switch (Op.getValueType().getSimpleVT()) {
1800 default: assert(0 && "Unexpected constant value type in "
1801 "LowerSCALAR_TO_VECTOR");
1802 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1803 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1804 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1805 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1806 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1807 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1810 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1811 for (size_t j = 0; j < n_copies; ++j)
1812 ConstVecValues.push_back(CValue);
1814 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1815 &ConstVecValues[0], ConstVecValues.size());
1816 } else {
1817 // Otherwise, copy the value from one register to another:
1818 switch (Op0.getValueType().getSimpleVT()) {
1819 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1820 case MVT::i8:
1821 case MVT::i16:
1822 case MVT::i32:
1823 case MVT::i64:
1824 case MVT::f32:
1825 case MVT::f64:
1826 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1830 return SDValue();
1833 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1834 MVT VT = Op.getValueType();
1835 SDValue N = Op.getOperand(0);
1836 SDValue Elt = Op.getOperand(1);
1837 DebugLoc dl = Op.getDebugLoc();
1838 SDValue retval;
1840 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1841 // Constant argument:
1842 int EltNo = (int) C->getZExtValue();
1844 // sanity checks:
1845 if (VT == MVT::i8 && EltNo >= 16)
1846 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1847 else if (VT == MVT::i16 && EltNo >= 8)
1848 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1849 else if (VT == MVT::i32 && EltNo >= 4)
1850 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1851 else if (VT == MVT::i64 && EltNo >= 2)
1852 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1854 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1855 // i32 and i64: Element 0 is the preferred slot
1856 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1859 // Need to generate shuffle mask and extract:
1860 int prefslot_begin = -1, prefslot_end = -1;
1861 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1863 switch (VT.getSimpleVT()) {
1864 default:
1865 assert(false && "Invalid value type!");
1866 case MVT::i8: {
1867 prefslot_begin = prefslot_end = 3;
1868 break;
1870 case MVT::i16: {
1871 prefslot_begin = 2; prefslot_end = 3;
1872 break;
1874 case MVT::i32:
1875 case MVT::f32: {
1876 prefslot_begin = 0; prefslot_end = 3;
1877 break;
1879 case MVT::i64:
1880 case MVT::f64: {
1881 prefslot_begin = 0; prefslot_end = 7;
1882 break;
1886 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1887 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1889 unsigned int ShufBytes[16];
1890 for (int i = 0; i < 16; ++i) {
1891 // zero fill uppper part of preferred slot, don't care about the
1892 // other slots:
1893 unsigned int mask_val;
1894 if (i <= prefslot_end) {
1895 mask_val =
1896 ((i < prefslot_begin)
1897 ? 0x80
1898 : elt_byte + (i - prefslot_begin));
1900 ShufBytes[i] = mask_val;
1901 } else
1902 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1905 SDValue ShufMask[4];
1906 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1907 unsigned bidx = i * 4;
1908 unsigned int bits = ((ShufBytes[bidx] << 24) |
1909 (ShufBytes[bidx+1] << 16) |
1910 (ShufBytes[bidx+2] << 8) |
1911 ShufBytes[bidx+3]);
1912 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1915 SDValue ShufMaskVec =
1916 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1917 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1919 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1920 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1921 N, N, ShufMaskVec));
1922 } else {
1923 // Variable index: Rotate the requested element into slot 0, then replicate
1924 // slot 0 across the vector
1925 MVT VecVT = N.getValueType();
1926 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1927 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1928 abort();
1931 // Make life easier by making sure the index is zero-extended to i32
1932 if (Elt.getValueType() != MVT::i32)
1933 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
1935 // Scale the index to a bit/byte shift quantity
1936 APInt scaleFactor =
1937 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1938 unsigned scaleShift = scaleFactor.logBase2();
1939 SDValue vecShift;
1941 if (scaleShift > 0) {
1942 // Scale the shift factor:
1943 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
1944 DAG.getConstant(scaleShift, MVT::i32));
1947 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
1949 // Replicate the bytes starting at byte 0 across the entire vector (for
1950 // consistency with the notion of a unified register set)
1951 SDValue replicate;
1953 switch (VT.getSimpleVT()) {
1954 default:
1955 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
1956 abort();
1957 /*NOTREACHED*/
1958 case MVT::i8: {
1959 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
1960 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1961 factor, factor, factor, factor);
1962 break;
1964 case MVT::i16: {
1965 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
1966 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1967 factor, factor, factor, factor);
1968 break;
1970 case MVT::i32:
1971 case MVT::f32: {
1972 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
1973 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1974 factor, factor, factor, factor);
1975 break;
1977 case MVT::i64:
1978 case MVT::f64: {
1979 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
1980 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
1981 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1982 loFactor, hiFactor, loFactor, hiFactor);
1983 break;
1987 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1988 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
1989 vecShift, vecShift, replicate));
1992 return retval;
1995 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1996 SDValue VecOp = Op.getOperand(0);
1997 SDValue ValOp = Op.getOperand(1);
1998 SDValue IdxOp = Op.getOperand(2);
1999 DebugLoc dl = Op.getDebugLoc();
2000 MVT VT = Op.getValueType();
2002 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2003 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2005 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2006 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2007 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2008 DAG.getRegister(SPU::R1, PtrVT),
2009 DAG.getConstant(CN->getSExtValue(), PtrVT));
2010 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2012 SDValue result =
2013 DAG.getNode(SPUISD::SHUFB, dl, VT,
2014 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2015 VecOp,
2016 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2018 return result;
2021 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2022 const TargetLowering &TLI)
2024 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2025 DebugLoc dl = Op.getDebugLoc();
2026 MVT ShiftVT = TLI.getShiftAmountTy();
2028 assert(Op.getValueType() == MVT::i8);
2029 switch (Opc) {
2030 default:
2031 assert(0 && "Unhandled i8 math operator");
2032 /*NOTREACHED*/
2033 break;
2034 case ISD::ADD: {
2035 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2036 // the result:
2037 SDValue N1 = Op.getOperand(1);
2038 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2039 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2040 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2041 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2045 case ISD::SUB: {
2046 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2047 // the result:
2048 SDValue N1 = Op.getOperand(1);
2049 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2050 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2051 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2052 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2054 case ISD::ROTR:
2055 case ISD::ROTL: {
2056 SDValue N1 = Op.getOperand(1);
2057 MVT N1VT = N1.getValueType();
2059 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2060 if (!N1VT.bitsEq(ShiftVT)) {
2061 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2062 ? ISD::ZERO_EXTEND
2063 : ISD::TRUNCATE;
2064 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2067 // Replicate lower 8-bits into upper 8:
2068 SDValue ExpandArg =
2069 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2070 DAG.getNode(ISD::SHL, dl, MVT::i16,
2071 N0, DAG.getConstant(8, MVT::i32)));
2073 // Truncate back down to i8
2074 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2075 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2077 case ISD::SRL:
2078 case ISD::SHL: {
2079 SDValue N1 = Op.getOperand(1);
2080 MVT N1VT = N1.getValueType();
2082 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2083 if (!N1VT.bitsEq(ShiftVT)) {
2084 unsigned N1Opc = ISD::ZERO_EXTEND;
2086 if (N1.getValueType().bitsGT(ShiftVT))
2087 N1Opc = ISD::TRUNCATE;
2089 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2092 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2093 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2095 case ISD::SRA: {
2096 SDValue N1 = Op.getOperand(1);
2097 MVT N1VT = N1.getValueType();
2099 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2100 if (!N1VT.bitsEq(ShiftVT)) {
2101 unsigned N1Opc = ISD::SIGN_EXTEND;
2103 if (N1VT.bitsGT(ShiftVT))
2104 N1Opc = ISD::TRUNCATE;
2105 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2108 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2109 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2111 case ISD::MUL: {
2112 SDValue N1 = Op.getOperand(1);
2114 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2115 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2116 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2117 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2118 break;
2122 return SDValue();
2125 //! Lower byte immediate operations for v16i8 vectors:
2126 static SDValue
2127 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2128 SDValue ConstVec;
2129 SDValue Arg;
2130 MVT VT = Op.getValueType();
2131 DebugLoc dl = Op.getDebugLoc();
2133 ConstVec = Op.getOperand(0);
2134 Arg = Op.getOperand(1);
2135 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2136 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2137 ConstVec = ConstVec.getOperand(0);
2138 } else {
2139 ConstVec = Op.getOperand(1);
2140 Arg = Op.getOperand(0);
2141 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2142 ConstVec = ConstVec.getOperand(0);
2147 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2148 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2149 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2151 APInt APSplatBits, APSplatUndef;
2152 unsigned SplatBitSize;
2153 bool HasAnyUndefs;
2154 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2156 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2157 HasAnyUndefs, minSplatBits)
2158 && minSplatBits <= SplatBitSize) {
2159 uint64_t SplatBits = APSplatBits.getZExtValue();
2160 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2162 SmallVector<SDValue, 16> tcVec;
2163 tcVec.assign(16, tc);
2164 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2165 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2169 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2170 // lowered. Return the operation, rather than a null SDValue.
2171 return Op;
2174 //! Custom lowering for CTPOP (count population)
2176 Custom lowering code that counts the number ones in the input
2177 operand. SPU has such an instruction, but it counts the number of
2178 ones per byte, which then have to be accumulated.
2180 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2181 MVT VT = Op.getValueType();
2182 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2183 DebugLoc dl = Op.getDebugLoc();
2185 switch (VT.getSimpleVT()) {
2186 default:
2187 assert(false && "Invalid value type!");
2188 case MVT::i8: {
2189 SDValue N = Op.getOperand(0);
2190 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2192 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2193 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2195 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2198 case MVT::i16: {
2199 MachineFunction &MF = DAG.getMachineFunction();
2200 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2202 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2204 SDValue N = Op.getOperand(0);
2205 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2206 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2207 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2209 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2210 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2212 // CNTB_result becomes the chain to which all of the virtual registers
2213 // CNTB_reg, SUM1_reg become associated:
2214 SDValue CNTB_result =
2215 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2217 SDValue CNTB_rescopy =
2218 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2220 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2222 return DAG.getNode(ISD::AND, dl, MVT::i16,
2223 DAG.getNode(ISD::ADD, dl, MVT::i16,
2224 DAG.getNode(ISD::SRL, dl, MVT::i16,
2225 Tmp1, Shift1),
2226 Tmp1),
2227 Mask0);
2230 case MVT::i32: {
2231 MachineFunction &MF = DAG.getMachineFunction();
2232 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2234 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2235 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2237 SDValue N = Op.getOperand(0);
2238 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2239 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2240 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2241 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2243 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2244 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2246 // CNTB_result becomes the chain to which all of the virtual registers
2247 // CNTB_reg, SUM1_reg become associated:
2248 SDValue CNTB_result =
2249 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2251 SDValue CNTB_rescopy =
2252 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2254 SDValue Comp1 =
2255 DAG.getNode(ISD::SRL, dl, MVT::i32,
2256 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2257 Shift1);
2259 SDValue Sum1 =
2260 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2261 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2263 SDValue Sum1_rescopy =
2264 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2266 SDValue Comp2 =
2267 DAG.getNode(ISD::SRL, dl, MVT::i32,
2268 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2269 Shift2);
2270 SDValue Sum2 =
2271 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2272 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2274 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2277 case MVT::i64:
2278 break;
2281 return SDValue();
2284 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2286 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2287 All conversions to i64 are expanded to a libcall.
2289 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2290 SPUTargetLowering &TLI) {
2291 MVT OpVT = Op.getValueType();
2292 SDValue Op0 = Op.getOperand(0);
2293 MVT Op0VT = Op0.getValueType();
2295 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2296 || OpVT == MVT::i64) {
2297 // Convert f32 / f64 to i32 / i64 via libcall.
2298 RTLIB::Libcall LC =
2299 (Op.getOpcode() == ISD::FP_TO_SINT)
2300 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2301 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2302 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2303 SDValue Dummy;
2304 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2307 return SDValue();
2310 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2312 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2313 All conversions from i64 are expanded to a libcall.
2315 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2316 SPUTargetLowering &TLI) {
2317 MVT OpVT = Op.getValueType();
2318 SDValue Op0 = Op.getOperand(0);
2319 MVT Op0VT = Op0.getValueType();
2321 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2322 || Op0VT == MVT::i64) {
2323 // Convert i32, i64 to f64 via libcall:
2324 RTLIB::Libcall LC =
2325 (Op.getOpcode() == ISD::SINT_TO_FP)
2326 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2327 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2328 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2329 SDValue Dummy;
2330 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2333 return SDValue();
2336 //! Lower ISD::SETCC
2338 This handles MVT::f64 (double floating point) condition lowering
2340 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2341 const TargetLowering &TLI) {
2342 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2343 DebugLoc dl = Op.getDebugLoc();
2344 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2346 SDValue lhs = Op.getOperand(0);
2347 SDValue rhs = Op.getOperand(1);
2348 MVT lhsVT = lhs.getValueType();
2349 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2351 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2352 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2353 MVT IntVT(MVT::i64);
2355 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2356 // selected to a NOP:
2357 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2358 SDValue lhsHi32 =
2359 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2360 DAG.getNode(ISD::SRL, dl, IntVT,
2361 i64lhs, DAG.getConstant(32, MVT::i32)));
2362 SDValue lhsHi32abs =
2363 DAG.getNode(ISD::AND, dl, MVT::i32,
2364 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2365 SDValue lhsLo32 =
2366 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2368 // SETO and SETUO only use the lhs operand:
2369 if (CC->get() == ISD::SETO) {
2370 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2371 // SETUO
2372 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2373 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2374 DAG.getSetCC(dl, ccResultVT,
2375 lhs, DAG.getConstantFP(0.0, lhsVT),
2376 ISD::SETUO),
2377 DAG.getConstant(ccResultAllOnes, ccResultVT));
2378 } else if (CC->get() == ISD::SETUO) {
2379 // Evaluates to true if Op0 is [SQ]NaN
2380 return DAG.getNode(ISD::AND, dl, ccResultVT,
2381 DAG.getSetCC(dl, ccResultVT,
2382 lhsHi32abs,
2383 DAG.getConstant(0x7ff00000, MVT::i32),
2384 ISD::SETGE),
2385 DAG.getSetCC(dl, ccResultVT,
2386 lhsLo32,
2387 DAG.getConstant(0, MVT::i32),
2388 ISD::SETGT));
2391 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2392 SDValue rhsHi32 =
2393 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2394 DAG.getNode(ISD::SRL, dl, IntVT,
2395 i64rhs, DAG.getConstant(32, MVT::i32)));
2397 // If a value is negative, subtract from the sign magnitude constant:
2398 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2400 // Convert the sign-magnitude representation into 2's complement:
2401 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2402 lhsHi32, DAG.getConstant(31, MVT::i32));
2403 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2404 SDValue lhsSelect =
2405 DAG.getNode(ISD::SELECT, dl, IntVT,
2406 lhsSelectMask, lhsSignMag2TC, i64lhs);
2408 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2409 rhsHi32, DAG.getConstant(31, MVT::i32));
2410 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2411 SDValue rhsSelect =
2412 DAG.getNode(ISD::SELECT, dl, IntVT,
2413 rhsSelectMask, rhsSignMag2TC, i64rhs);
2415 unsigned compareOp;
2417 switch (CC->get()) {
2418 case ISD::SETOEQ:
2419 case ISD::SETUEQ:
2420 compareOp = ISD::SETEQ; break;
2421 case ISD::SETOGT:
2422 case ISD::SETUGT:
2423 compareOp = ISD::SETGT; break;
2424 case ISD::SETOGE:
2425 case ISD::SETUGE:
2426 compareOp = ISD::SETGE; break;
2427 case ISD::SETOLT:
2428 case ISD::SETULT:
2429 compareOp = ISD::SETLT; break;
2430 case ISD::SETOLE:
2431 case ISD::SETULE:
2432 compareOp = ISD::SETLE; break;
2433 case ISD::SETUNE:
2434 case ISD::SETONE:
2435 compareOp = ISD::SETNE; break;
2436 default:
2437 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2438 abort();
2439 break;
2442 SDValue result =
2443 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2444 (ISD::CondCode) compareOp);
2446 if ((CC->get() & 0x8) == 0) {
2447 // Ordered comparison:
2448 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2449 lhs, DAG.getConstantFP(0.0, MVT::f64),
2450 ISD::SETO);
2451 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2452 rhs, DAG.getConstantFP(0.0, MVT::f64),
2453 ISD::SETO);
2454 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2456 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2459 return result;
2462 //! Lower ISD::SELECT_CC
2464 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2465 SELB instruction.
2467 \note Need to revisit this in the future: if the code path through the true
2468 and false value computations is longer than the latency of a branch (6
2469 cycles), then it would be more advantageous to branch and insert a new basic
2470 block and branch on the condition. However, this code does not make that
2471 assumption, given the simplisitc uses so far.
2474 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2475 const TargetLowering &TLI) {
2476 MVT VT = Op.getValueType();
2477 SDValue lhs = Op.getOperand(0);
2478 SDValue rhs = Op.getOperand(1);
2479 SDValue trueval = Op.getOperand(2);
2480 SDValue falseval = Op.getOperand(3);
2481 SDValue condition = Op.getOperand(4);
2482 DebugLoc dl = Op.getDebugLoc();
2484 // NOTE: SELB's arguments: $rA, $rB, $mask
2486 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2487 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2488 // condition was true and 0s where the condition was false. Hence, the
2489 // arguments to SELB get reversed.
2491 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2492 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2493 // with another "cannot select select_cc" assert:
2495 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2496 TLI.getSetCCResultType(Op.getValueType()),
2497 lhs, rhs, condition);
2498 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2501 //! Custom lower ISD::TRUNCATE
2502 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2504 // Type to truncate to
2505 MVT VT = Op.getValueType();
2506 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2507 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2508 DebugLoc dl = Op.getDebugLoc();
2510 // Type to truncate from
2511 SDValue Op0 = Op.getOperand(0);
2512 MVT Op0VT = Op0.getValueType();
2514 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2515 // Create shuffle mask, least significant doubleword of quadword
2516 unsigned maskHigh = 0x08090a0b;
2517 unsigned maskLow = 0x0c0d0e0f;
2518 // Use a shuffle to perform the truncation
2519 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2520 DAG.getConstant(maskHigh, MVT::i32),
2521 DAG.getConstant(maskLow, MVT::i32),
2522 DAG.getConstant(maskHigh, MVT::i32),
2523 DAG.getConstant(maskLow, MVT::i32));
2525 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2526 Op0, Op0, shufMask);
2528 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2531 return SDValue(); // Leave the truncate unmolested
2534 //! Custom (target-specific) lowering entry point
2536 This is where LLVM's DAG selection process calls to do target-specific
2537 lowering of nodes.
2539 SDValue
2540 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2542 unsigned Opc = (unsigned) Op.getOpcode();
2543 MVT VT = Op.getValueType();
2545 switch (Opc) {
2546 default: {
2547 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2548 cerr << "Op.getOpcode() = " << Opc << "\n";
2549 cerr << "*Op.getNode():\n";
2550 Op.getNode()->dump();
2551 abort();
2553 case ISD::LOAD:
2554 case ISD::EXTLOAD:
2555 case ISD::SEXTLOAD:
2556 case ISD::ZEXTLOAD:
2557 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2558 case ISD::STORE:
2559 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2560 case ISD::ConstantPool:
2561 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2562 case ISD::GlobalAddress:
2563 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2564 case ISD::JumpTable:
2565 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2566 case ISD::ConstantFP:
2567 return LowerConstantFP(Op, DAG);
2568 case ISD::FORMAL_ARGUMENTS:
2569 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2570 case ISD::CALL:
2571 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2572 case ISD::RET:
2573 return LowerRET(Op, DAG, getTargetMachine());
2575 // i8, i64 math ops:
2576 case ISD::ADD:
2577 case ISD::SUB:
2578 case ISD::ROTR:
2579 case ISD::ROTL:
2580 case ISD::SRL:
2581 case ISD::SHL:
2582 case ISD::SRA: {
2583 if (VT == MVT::i8)
2584 return LowerI8Math(Op, DAG, Opc, *this);
2585 break;
2588 case ISD::FP_TO_SINT:
2589 case ISD::FP_TO_UINT:
2590 return LowerFP_TO_INT(Op, DAG, *this);
2592 case ISD::SINT_TO_FP:
2593 case ISD::UINT_TO_FP:
2594 return LowerINT_TO_FP(Op, DAG, *this);
2596 // Vector-related lowering.
2597 case ISD::BUILD_VECTOR:
2598 return LowerBUILD_VECTOR(Op, DAG);
2599 case ISD::SCALAR_TO_VECTOR:
2600 return LowerSCALAR_TO_VECTOR(Op, DAG);
2601 case ISD::VECTOR_SHUFFLE:
2602 return LowerVECTOR_SHUFFLE(Op, DAG);
2603 case ISD::EXTRACT_VECTOR_ELT:
2604 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2605 case ISD::INSERT_VECTOR_ELT:
2606 return LowerINSERT_VECTOR_ELT(Op, DAG);
2608 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2609 case ISD::AND:
2610 case ISD::OR:
2611 case ISD::XOR:
2612 return LowerByteImmed(Op, DAG);
2614 // Vector and i8 multiply:
2615 case ISD::MUL:
2616 if (VT == MVT::i8)
2617 return LowerI8Math(Op, DAG, Opc, *this);
2619 case ISD::CTPOP:
2620 return LowerCTPOP(Op, DAG);
2622 case ISD::SELECT_CC:
2623 return LowerSELECT_CC(Op, DAG, *this);
2625 case ISD::SETCC:
2626 return LowerSETCC(Op, DAG, *this);
2628 case ISD::TRUNCATE:
2629 return LowerTRUNCATE(Op, DAG);
2632 return SDValue();
2635 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2636 SmallVectorImpl<SDValue>&Results,
2637 SelectionDAG &DAG)
2639 #if 0
2640 unsigned Opc = (unsigned) N->getOpcode();
2641 MVT OpVT = N->getValueType(0);
2643 switch (Opc) {
2644 default: {
2645 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2646 cerr << "Op.getOpcode() = " << Opc << "\n";
2647 cerr << "*Op.getNode():\n";
2648 N->dump();
2649 abort();
2650 /*NOTREACHED*/
2653 #endif
2655 /* Otherwise, return unchanged */
2658 //===----------------------------------------------------------------------===//
2659 // Target Optimization Hooks
2660 //===----------------------------------------------------------------------===//
2662 SDValue
2663 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2665 #if 0
2666 TargetMachine &TM = getTargetMachine();
2667 #endif
2668 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2669 SelectionDAG &DAG = DCI.DAG;
2670 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2671 MVT NodeVT = N->getValueType(0); // The node's value type
2672 MVT Op0VT = Op0.getValueType(); // The first operand's result
2673 SDValue Result; // Initially, empty result
2674 DebugLoc dl = N->getDebugLoc();
2676 switch (N->getOpcode()) {
2677 default: break;
2678 case ISD::ADD: {
2679 SDValue Op1 = N->getOperand(1);
2681 if (Op0.getOpcode() == SPUISD::IndirectAddr
2682 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2683 // Normalize the operands to reduce repeated code
2684 SDValue IndirectArg = Op0, AddArg = Op1;
2686 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2687 IndirectArg = Op1;
2688 AddArg = Op0;
2691 if (isa<ConstantSDNode>(AddArg)) {
2692 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2693 SDValue IndOp1 = IndirectArg.getOperand(1);
2695 if (CN0->isNullValue()) {
2696 // (add (SPUindirect <arg>, <arg>), 0) ->
2697 // (SPUindirect <arg>, <arg>)
2699 #if !defined(NDEBUG)
2700 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2701 cerr << "\n"
2702 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2703 << "With: (SPUindirect <arg>, <arg>)\n";
2705 #endif
2707 return IndirectArg;
2708 } else if (isa<ConstantSDNode>(IndOp1)) {
2709 // (add (SPUindirect <arg>, <const>), <const>) ->
2710 // (SPUindirect <arg>, <const + const>)
2711 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2712 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2713 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2715 #if !defined(NDEBUG)
2716 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2717 cerr << "\n"
2718 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2719 << "), " << CN0->getSExtValue() << ")\n"
2720 << "With: (SPUindirect <arg>, "
2721 << combinedConst << ")\n";
2723 #endif
2725 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2726 IndirectArg, combinedValue);
2730 break;
2732 case ISD::SIGN_EXTEND:
2733 case ISD::ZERO_EXTEND:
2734 case ISD::ANY_EXTEND: {
2735 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2736 // (any_extend (SPUextract_elt0 <arg>)) ->
2737 // (SPUextract_elt0 <arg>)
2738 // Types must match, however...
2739 #if !defined(NDEBUG)
2740 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2741 cerr << "\nReplace: ";
2742 N->dump(&DAG);
2743 cerr << "\nWith: ";
2744 Op0.getNode()->dump(&DAG);
2745 cerr << "\n";
2747 #endif
2749 return Op0;
2751 break;
2753 case SPUISD::IndirectAddr: {
2754 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2755 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2756 if (CN != 0 && CN->getZExtValue() == 0) {
2757 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2758 // (SPUaform <addr>, 0)
2760 DEBUG(cerr << "Replace: ");
2761 DEBUG(N->dump(&DAG));
2762 DEBUG(cerr << "\nWith: ");
2763 DEBUG(Op0.getNode()->dump(&DAG));
2764 DEBUG(cerr << "\n");
2766 return Op0;
2768 } else if (Op0.getOpcode() == ISD::ADD) {
2769 SDValue Op1 = N->getOperand(1);
2770 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2771 // (SPUindirect (add <arg>, <arg>), 0) ->
2772 // (SPUindirect <arg>, <arg>)
2773 if (CN1->isNullValue()) {
2775 #if !defined(NDEBUG)
2776 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2777 cerr << "\n"
2778 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2779 << "With: (SPUindirect <arg>, <arg>)\n";
2781 #endif
2783 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2784 Op0.getOperand(0), Op0.getOperand(1));
2788 break;
2790 case SPUISD::SHLQUAD_L_BITS:
2791 case SPUISD::SHLQUAD_L_BYTES:
2792 case SPUISD::VEC_SHL:
2793 case SPUISD::VEC_SRL:
2794 case SPUISD::VEC_SRA:
2795 case SPUISD::ROTBYTES_LEFT: {
2796 SDValue Op1 = N->getOperand(1);
2798 // Kill degenerate vector shifts:
2799 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2800 if (CN->isNullValue()) {
2801 Result = Op0;
2804 break;
2806 case SPUISD::PREFSLOT2VEC: {
2807 switch (Op0.getOpcode()) {
2808 default:
2809 break;
2810 case ISD::ANY_EXTEND:
2811 case ISD::ZERO_EXTEND:
2812 case ISD::SIGN_EXTEND: {
2813 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2814 // <arg>
2815 // but only if the SPUprefslot2vec and <arg> types match.
2816 SDValue Op00 = Op0.getOperand(0);
2817 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2818 SDValue Op000 = Op00.getOperand(0);
2819 if (Op000.getValueType() == NodeVT) {
2820 Result = Op000;
2823 break;
2825 case SPUISD::VEC2PREFSLOT: {
2826 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2827 // <arg>
2828 Result = Op0.getOperand(0);
2829 break;
2832 break;
2836 // Otherwise, return unchanged.
2837 #ifndef NDEBUG
2838 if (Result.getNode()) {
2839 DEBUG(cerr << "\nReplace.SPU: ");
2840 DEBUG(N->dump(&DAG));
2841 DEBUG(cerr << "\nWith: ");
2842 DEBUG(Result.getNode()->dump(&DAG));
2843 DEBUG(cerr << "\n");
2845 #endif
2847 return Result;
2850 //===----------------------------------------------------------------------===//
2851 // Inline Assembly Support
2852 //===----------------------------------------------------------------------===//
2854 /// getConstraintType - Given a constraint letter, return the type of
2855 /// constraint it is for this target.
2856 SPUTargetLowering::ConstraintType
2857 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2858 if (ConstraintLetter.size() == 1) {
2859 switch (ConstraintLetter[0]) {
2860 default: break;
2861 case 'b':
2862 case 'r':
2863 case 'f':
2864 case 'v':
2865 case 'y':
2866 return C_RegisterClass;
2869 return TargetLowering::getConstraintType(ConstraintLetter);
2872 std::pair<unsigned, const TargetRegisterClass*>
2873 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2874 MVT VT) const
2876 if (Constraint.size() == 1) {
2877 // GCC RS6000 Constraint Letters
2878 switch (Constraint[0]) {
2879 case 'b': // R1-R31
2880 case 'r': // R0-R31
2881 if (VT == MVT::i64)
2882 return std::make_pair(0U, SPU::R64CRegisterClass);
2883 return std::make_pair(0U, SPU::R32CRegisterClass);
2884 case 'f':
2885 if (VT == MVT::f32)
2886 return std::make_pair(0U, SPU::R32FPRegisterClass);
2887 else if (VT == MVT::f64)
2888 return std::make_pair(0U, SPU::R64FPRegisterClass);
2889 break;
2890 case 'v':
2891 return std::make_pair(0U, SPU::GPRCRegisterClass);
2895 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2898 //! Compute used/known bits for a SPU operand
2899 void
2900 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2901 const APInt &Mask,
2902 APInt &KnownZero,
2903 APInt &KnownOne,
2904 const SelectionDAG &DAG,
2905 unsigned Depth ) const {
2906 #if 0
2907 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2909 switch (Op.getOpcode()) {
2910 default:
2911 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2912 break;
2913 case CALL:
2914 case SHUFB:
2915 case SHUFFLE_MASK:
2916 case CNTB:
2917 case SPUISD::PREFSLOT2VEC:
2918 case SPUISD::LDRESULT:
2919 case SPUISD::VEC2PREFSLOT:
2920 case SPUISD::SHLQUAD_L_BITS:
2921 case SPUISD::SHLQUAD_L_BYTES:
2922 case SPUISD::VEC_SHL:
2923 case SPUISD::VEC_SRL:
2924 case SPUISD::VEC_SRA:
2925 case SPUISD::VEC_ROTL:
2926 case SPUISD::VEC_ROTR:
2927 case SPUISD::ROTBYTES_LEFT:
2928 case SPUISD::SELECT_MASK:
2929 case SPUISD::SELB:
2931 #endif
2934 unsigned
2935 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2936 unsigned Depth) const {
2937 switch (Op.getOpcode()) {
2938 default:
2939 return 1;
2941 case ISD::SETCC: {
2942 MVT VT = Op.getValueType();
2944 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2945 VT = MVT::i32;
2947 return VT.getSizeInBits();
2952 // LowerAsmOperandForConstraint
2953 void
2954 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2955 char ConstraintLetter,
2956 bool hasMemory,
2957 std::vector<SDValue> &Ops,
2958 SelectionDAG &DAG) const {
2959 // Default, for the time being, to the base class handler
2960 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2961 Ops, DAG);
2964 /// isLegalAddressImmediate - Return true if the integer value can be used
2965 /// as the offset of the target addressing mode.
2966 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
2967 const Type *Ty) const {
2968 // SPU's addresses are 256K:
2969 return (V > -(1 << 18) && V < (1 << 18) - 1);
2972 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2973 return false;
2976 bool
2977 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
2978 // The SPU target isn't yet aware of offsets.
2979 return false;