Update comments.
[llvm/msp430.git] / lib / Target / CellSPU / SPUISelLowering.cpp
blobc07e6d5645cc0360c04c59c7afe72cb475c7c436
1 //
2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
34 #include <map>
36 using namespace llvm;
38 // Used in getTargetNodeName() below
39 namespace {
40 std::map<unsigned, const char *> node_names;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s {
44 const MVT valtype;
45 const int prefslot_byte;
48 const valtype_map_s valtype_map[] = {
49 { MVT::i1, 3 },
50 { MVT::i8, 3 },
51 { MVT::i16, 2 },
52 { MVT::i32, 0 },
53 { MVT::f32, 0 },
54 { MVT::i64, 0 },
55 { MVT::f64, 0 },
56 { MVT::i128, 0 }
59 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61 const valtype_map_s *getValueTypeMapEntry(MVT VT) {
62 const valtype_map_s *retval = 0;
64 for (size_t i = 0; i < n_valtype_map; ++i) {
65 if (valtype_map[i].valtype == VT) {
66 retval = valtype_map + i;
67 break;
71 #ifndef NDEBUG
72 if (retval == 0) {
73 cerr << "getValueTypeMapEntry returns NULL for "
74 << VT.getMVTString()
75 << "\n";
76 abort();
78 #endif
80 return retval;
83 //! Expand a library call into an actual call DAG node
84 /*!
85 \note
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
90 SDValue
91 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
95 // dependence.
96 SDValue InChain = DAG.getEntryNode();
98 TargetLowering::ArgListTy Args;
99 TargetLowering::ArgListEntry Entry;
100 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101 MVT ArgVT = Op.getOperand(i).getValueType();
102 const Type *ArgTy = ArgVT.getTypeForMVT();
103 Entry.Node = Op.getOperand(i);
104 Entry.Ty = ArgTy;
105 Entry.isSExt = isSigned;
106 Entry.isZExt = !isSigned;
107 Args.push_back(Entry);
109 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
110 TLI.getPointerTy());
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type *RetTy = Op.getNode()->getValueType(0).getTypeForMVT();
114 std::pair<SDValue, SDValue> CallInfo =
115 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
116 CallingConv::C, false, Callee, Args, DAG,
117 Op.getDebugLoc());
119 return CallInfo.first;
123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
124 : TargetLowering(TM),
125 SPUTM(TM)
127 // Fold away setcc operations if possible.
128 setPow2DivIsCheap();
130 // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 setUseUnderscoreSetJmp(true);
132 setUseUnderscoreLongJmp(true);
134 // Set RTLIB libcall names as used by SPU:
135 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
137 // Set up the SPU's register classes:
138 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
139 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
140 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
141 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
142 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
143 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
144 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
146 // SPU has no sign or zero extended loads for i1, i8, i16:
147 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
148 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
149 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
151 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
152 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
156 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
160 ++sctype) {
161 MVT VT = (MVT::SimpleValueType)sctype;
163 setOperationAction(ISD::LOAD, VT, Custom);
164 setOperationAction(ISD::STORE, VT, Custom);
165 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
166 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
167 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
169 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
170 MVT StoreVT = (MVT::SimpleValueType) stype;
171 setTruncStoreAction(VT, StoreVT, Expand);
175 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
176 ++sctype) {
177 MVT VT = (MVT::SimpleValueType) sctype;
179 setOperationAction(ISD::LOAD, VT, Custom);
180 setOperationAction(ISD::STORE, VT, Custom);
182 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
183 MVT StoreVT = (MVT::SimpleValueType) stype;
184 setTruncStoreAction(VT, StoreVT, Expand);
188 // Expand the jumptable branches
189 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
190 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
192 // Custom lower SELECT_CC for most cases, but expand by default
193 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
194 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
195 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
196 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
197 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
199 // SPU has no intrinsics for these particular operations:
200 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
202 // SPU has no SREM/UREM instructions
203 setOperationAction(ISD::SREM, MVT::i32, Expand);
204 setOperationAction(ISD::UREM, MVT::i32, Expand);
205 setOperationAction(ISD::SREM, MVT::i64, Expand);
206 setOperationAction(ISD::UREM, MVT::i64, Expand);
208 // We don't support sin/cos/sqrt/fmod
209 setOperationAction(ISD::FSIN , MVT::f64, Expand);
210 setOperationAction(ISD::FCOS , MVT::f64, Expand);
211 setOperationAction(ISD::FREM , MVT::f64, Expand);
212 setOperationAction(ISD::FSIN , MVT::f32, Expand);
213 setOperationAction(ISD::FCOS , MVT::f32, Expand);
214 setOperationAction(ISD::FREM , MVT::f32, Expand);
216 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
217 // for f32!)
218 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
219 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
221 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
222 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
224 // SPU can do rotate right and left, so legalize it... but customize for i8
225 // because instructions don't exist.
227 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
228 // .td files.
229 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
230 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
231 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
233 setOperationAction(ISD::ROTL, MVT::i32, Legal);
234 setOperationAction(ISD::ROTL, MVT::i16, Legal);
235 setOperationAction(ISD::ROTL, MVT::i8, Custom);
237 // SPU has no native version of shift left/right for i8
238 setOperationAction(ISD::SHL, MVT::i8, Custom);
239 setOperationAction(ISD::SRL, MVT::i8, Custom);
240 setOperationAction(ISD::SRA, MVT::i8, Custom);
242 // Make these operations legal and handle them during instruction selection:
243 setOperationAction(ISD::SHL, MVT::i64, Legal);
244 setOperationAction(ISD::SRL, MVT::i64, Legal);
245 setOperationAction(ISD::SRA, MVT::i64, Legal);
247 // Custom lower i8, i32 and i64 multiplications
248 setOperationAction(ISD::MUL, MVT::i8, Custom);
249 setOperationAction(ISD::MUL, MVT::i32, Legal);
250 setOperationAction(ISD::MUL, MVT::i64, Legal);
252 // Need to custom handle (some) common i8, i64 math ops
253 setOperationAction(ISD::ADD, MVT::i8, Custom);
254 setOperationAction(ISD::ADD, MVT::i64, Legal);
255 setOperationAction(ISD::SUB, MVT::i8, Custom);
256 setOperationAction(ISD::SUB, MVT::i64, Legal);
258 // SPU does not have BSWAP. It does have i32 support CTLZ.
259 // CTPOP has to be custom lowered.
260 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
261 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
263 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
264 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
265 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
266 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
268 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
269 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
271 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
273 // SPU has a version of select that implements (a&~c)|(b&c), just like
274 // select ought to work:
275 setOperationAction(ISD::SELECT, MVT::i8, Legal);
276 setOperationAction(ISD::SELECT, MVT::i16, Legal);
277 setOperationAction(ISD::SELECT, MVT::i32, Legal);
278 setOperationAction(ISD::SELECT, MVT::i64, Legal);
280 setOperationAction(ISD::SETCC, MVT::i8, Legal);
281 setOperationAction(ISD::SETCC, MVT::i16, Legal);
282 setOperationAction(ISD::SETCC, MVT::i32, Legal);
283 setOperationAction(ISD::SETCC, MVT::i64, Legal);
284 setOperationAction(ISD::SETCC, MVT::f64, Custom);
286 // Custom lower i128 -> i64 truncates
287 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
289 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
290 // to expand to a libcall, hence the custom lowering:
291 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
292 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
297 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
298 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
299 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
300 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
301 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
302 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
303 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
304 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
305 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
307 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
308 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
309 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
310 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
312 // We cannot sextinreg(i1). Expand to shifts.
313 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
315 // Support label based line numbers.
316 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
317 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
319 // We want to legalize GlobalAddress and ConstantPool nodes into the
320 // appropriate instructions to materialize the address.
321 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
322 ++sctype) {
323 MVT VT = (MVT::SimpleValueType)sctype;
325 setOperationAction(ISD::GlobalAddress, VT, Custom);
326 setOperationAction(ISD::ConstantPool, VT, Custom);
327 setOperationAction(ISD::JumpTable, VT, Custom);
330 // RET must be custom lowered, to meet ABI requirements
331 setOperationAction(ISD::RET, MVT::Other, Custom);
333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
334 setOperationAction(ISD::VASTART , MVT::Other, Custom);
336 // Use the default implementation.
337 setOperationAction(ISD::VAARG , MVT::Other, Expand);
338 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
339 setOperationAction(ISD::VAEND , MVT::Other, Expand);
340 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
341 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
345 // Cell SPU has instructions for converting between i64 and fp.
346 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
347 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
350 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
352 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
353 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
355 // First set operation action for all vector types to expand. Then we
356 // will selectively turn on ones that can be effectively codegen'd.
357 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
358 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
359 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
360 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
361 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
362 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
364 // "Odd size" vector classes that we're willing to support:
365 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
367 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
368 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
369 MVT VT = (MVT::SimpleValueType)i;
371 // add/sub are legal for all supported vector VT's.
372 setOperationAction(ISD::ADD, VT, Legal);
373 setOperationAction(ISD::SUB, VT, Legal);
374 // mul has to be custom lowered.
375 setOperationAction(ISD::MUL, VT, Legal);
377 setOperationAction(ISD::AND, VT, Legal);
378 setOperationAction(ISD::OR, VT, Legal);
379 setOperationAction(ISD::XOR, VT, Legal);
380 setOperationAction(ISD::LOAD, VT, Legal);
381 setOperationAction(ISD::SELECT, VT, Legal);
382 setOperationAction(ISD::STORE, VT, Legal);
384 // These operations need to be expanded:
385 setOperationAction(ISD::SDIV, VT, Expand);
386 setOperationAction(ISD::SREM, VT, Expand);
387 setOperationAction(ISD::UDIV, VT, Expand);
388 setOperationAction(ISD::UREM, VT, Expand);
390 // Custom lower build_vector, constant pool spills, insert and
391 // extract vector elements:
392 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
393 setOperationAction(ISD::ConstantPool, VT, Custom);
394 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
395 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
396 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
397 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
400 setOperationAction(ISD::AND, MVT::v16i8, Custom);
401 setOperationAction(ISD::OR, MVT::v16i8, Custom);
402 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
403 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
405 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
407 setShiftAmountType(MVT::i32);
408 setBooleanContents(ZeroOrNegativeOneBooleanContent);
410 setStackPointerRegisterToSaveRestore(SPU::R1);
412 // We have target-specific dag combine patterns for the following nodes:
413 setTargetDAGCombine(ISD::ADD);
414 setTargetDAGCombine(ISD::ZERO_EXTEND);
415 setTargetDAGCombine(ISD::SIGN_EXTEND);
416 setTargetDAGCombine(ISD::ANY_EXTEND);
418 computeRegisterProperties();
420 // Set pre-RA register scheduler default to BURR, which produces slightly
421 // better code than the default (could also be TDRR, but TargetLowering.h
422 // needs a mod to support that model):
423 setSchedulingPreference(SchedulingForRegPressure);
426 const char *
427 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
429 if (node_names.empty()) {
430 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
431 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
432 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
433 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
434 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
435 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
436 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
437 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
438 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
439 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
440 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
441 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
442 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
443 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
444 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
446 node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
447 node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
448 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
449 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
450 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
451 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
452 "SPUISD::ROTBYTES_LEFT_BITS";
453 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
454 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
455 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
456 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
457 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
460 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
462 return ((i != node_names.end()) ? i->second : 0);
465 //===----------------------------------------------------------------------===//
466 // Return the Cell SPU's SETCC result type
467 //===----------------------------------------------------------------------===//
469 MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
470 // i16 and i32 are valid SETCC result types
471 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
474 //===----------------------------------------------------------------------===//
475 // Calling convention code:
476 //===----------------------------------------------------------------------===//
478 #include "SPUGenCallingConv.inc"
480 //===----------------------------------------------------------------------===//
481 // LowerOperation implementation
482 //===----------------------------------------------------------------------===//
484 /// Custom lower loads for CellSPU
486 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
487 within a 16-byte block, we have to rotate to extract the requested element.
489 For extending loads, we also want to ensure that the following sequence is
490 emitted, e.g. for MVT::f32 extending load to MVT::f64:
492 \verbatim
493 %1 v16i8,ch = load
494 %2 v16i8,ch = rotate %1
495 %3 v4f8, ch = bitconvert %2
496 %4 f32 = vec2perfslot %3
497 %5 f64 = fp_extend %4
498 \endverbatim
500 static SDValue
501 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
502 LoadSDNode *LN = cast<LoadSDNode>(Op);
503 SDValue the_chain = LN->getChain();
504 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
505 MVT InVT = LN->getMemoryVT();
506 MVT OutVT = Op.getValueType();
507 ISD::LoadExtType ExtType = LN->getExtensionType();
508 unsigned alignment = LN->getAlignment();
509 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
510 DebugLoc dl = Op.getDebugLoc();
512 switch (LN->getAddressingMode()) {
513 case ISD::UNINDEXED: {
514 SDValue result;
515 SDValue basePtr = LN->getBasePtr();
516 SDValue rotate;
518 if (alignment == 16) {
519 ConstantSDNode *CN;
521 // Special cases for a known aligned load to simplify the base pointer
522 // and the rotation amount:
523 if (basePtr.getOpcode() == ISD::ADD
524 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
525 // Known offset into basePtr
526 int64_t offset = CN->getSExtValue();
527 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
529 if (rotamt < 0)
530 rotamt += 16;
532 rotate = DAG.getConstant(rotamt, MVT::i16);
534 // Simplify the base pointer for this case:
535 basePtr = basePtr.getOperand(0);
536 if ((offset & ~0xf) > 0) {
537 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
538 basePtr,
539 DAG.getConstant((offset & ~0xf), PtrVT));
541 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
542 || (basePtr.getOpcode() == SPUISD::IndirectAddr
543 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
544 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
545 // Plain aligned a-form address: rotate into preferred slot
546 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
547 int64_t rotamt = -vtm->prefslot_byte;
548 if (rotamt < 0)
549 rotamt += 16;
550 rotate = DAG.getConstant(rotamt, MVT::i16);
551 } else {
552 // Offset the rotate amount by the basePtr and the preferred slot
553 // byte offset
554 int64_t rotamt = -vtm->prefslot_byte;
555 if (rotamt < 0)
556 rotamt += 16;
557 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
558 basePtr,
559 DAG.getConstant(rotamt, PtrVT));
561 } else {
562 // Unaligned load: must be more pessimistic about addressing modes:
563 if (basePtr.getOpcode() == ISD::ADD) {
564 MachineFunction &MF = DAG.getMachineFunction();
565 MachineRegisterInfo &RegInfo = MF.getRegInfo();
566 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
567 SDValue Flag;
569 SDValue Op0 = basePtr.getOperand(0);
570 SDValue Op1 = basePtr.getOperand(1);
572 if (isa<ConstantSDNode>(Op1)) {
573 // Convert the (add <ptr>, <const>) to an indirect address contained
574 // in a register. Note that this is done because we need to avoid
575 // creating a 0(reg) d-form address due to the SPU's block loads.
576 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
577 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
578 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
579 } else {
580 // Convert the (add <arg1>, <arg2>) to an indirect address, which
581 // will likely be lowered as a reg(reg) x-form address.
582 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
584 } else {
585 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
586 basePtr,
587 DAG.getConstant(0, PtrVT));
590 // Offset the rotate amount by the basePtr and the preferred slot
591 // byte offset
592 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
593 basePtr,
594 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
597 // Re-emit as a v16i8 vector load
598 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
599 LN->getSrcValue(), LN->getSrcValueOffset(),
600 LN->isVolatile(), 16);
602 // Update the chain
603 the_chain = result.getValue(1);
605 // Rotate into the preferred slot:
606 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
607 result.getValue(0), rotate);
609 // Convert the loaded v16i8 vector to the appropriate vector type
610 // specified by the operand:
611 MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
612 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
613 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
615 // Handle extending loads by extending the scalar result:
616 if (ExtType == ISD::SEXTLOAD) {
617 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
618 } else if (ExtType == ISD::ZEXTLOAD) {
619 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
620 } else if (ExtType == ISD::EXTLOAD) {
621 unsigned NewOpc = ISD::ANY_EXTEND;
623 if (OutVT.isFloatingPoint())
624 NewOpc = ISD::FP_EXTEND;
626 result = DAG.getNode(NewOpc, dl, OutVT, result);
629 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
630 SDValue retops[2] = {
631 result,
632 the_chain
635 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
636 retops, sizeof(retops) / sizeof(retops[0]));
637 return result;
639 case ISD::PRE_INC:
640 case ISD::PRE_DEC:
641 case ISD::POST_INC:
642 case ISD::POST_DEC:
643 case ISD::LAST_INDEXED_MODE:
644 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
645 "UNINDEXED\n";
646 cerr << (unsigned) LN->getAddressingMode() << "\n";
647 abort();
648 /*NOTREACHED*/
651 return SDValue();
654 /// Custom lower stores for CellSPU
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
660 static SDValue
661 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
662 StoreSDNode *SN = cast<StoreSDNode>(Op);
663 SDValue Value = SN->getValue();
664 MVT VT = Value.getValueType();
665 MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
666 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
667 DebugLoc dl = Op.getDebugLoc();
668 unsigned alignment = SN->getAlignment();
670 switch (SN->getAddressingMode()) {
671 case ISD::UNINDEXED: {
672 // The vector type we really want to load from the 16-byte chunk.
673 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
674 stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
676 SDValue alignLoadVec;
677 SDValue basePtr = SN->getBasePtr();
678 SDValue the_chain = SN->getChain();
679 SDValue insertEltOffs;
681 if (alignment == 16) {
682 ConstantSDNode *CN;
684 // Special cases for a known aligned load to simplify the base pointer
685 // and insertion byte:
686 if (basePtr.getOpcode() == ISD::ADD
687 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
688 // Known offset into basePtr
689 int64_t offset = CN->getSExtValue();
691 // Simplify the base pointer for this case:
692 basePtr = basePtr.getOperand(0);
693 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
694 basePtr,
695 DAG.getConstant((offset & 0xf), PtrVT));
697 if ((offset & ~0xf) > 0) {
698 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
699 basePtr,
700 DAG.getConstant((offset & ~0xf), PtrVT));
702 } else {
703 // Otherwise, assume it's at byte 0 of basePtr
704 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
705 basePtr,
706 DAG.getConstant(0, PtrVT));
708 } else {
709 // Unaligned load: must be more pessimistic about addressing modes:
710 if (basePtr.getOpcode() == ISD::ADD) {
711 MachineFunction &MF = DAG.getMachineFunction();
712 MachineRegisterInfo &RegInfo = MF.getRegInfo();
713 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
714 SDValue Flag;
716 SDValue Op0 = basePtr.getOperand(0);
717 SDValue Op1 = basePtr.getOperand(1);
719 if (isa<ConstantSDNode>(Op1)) {
720 // Convert the (add <ptr>, <const>) to an indirect address contained
721 // in a register. Note that this is done because we need to avoid
722 // creating a 0(reg) d-form address due to the SPU's block loads.
723 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
724 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
725 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
726 } else {
727 // Convert the (add <arg1>, <arg2>) to an indirect address, which
728 // will likely be lowered as a reg(reg) x-form address.
729 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
731 } else {
732 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
733 basePtr,
734 DAG.getConstant(0, PtrVT));
737 // Insertion point is solely determined by basePtr's contents
738 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
739 basePtr,
740 DAG.getConstant(0, PtrVT));
743 // Re-emit as a v16i8 vector load
744 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
745 SN->getSrcValue(), SN->getSrcValueOffset(),
746 SN->isVolatile(), 16);
748 // Update the chain
749 the_chain = alignLoadVec.getValue(1);
751 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
752 SDValue theValue = SN->getValue();
753 SDValue result;
755 if (StVT != VT
756 && (theValue.getOpcode() == ISD::AssertZext
757 || theValue.getOpcode() == ISD::AssertSext)) {
758 // Drill down and get the value for zero- and sign-extended
759 // quantities
760 theValue = theValue.getOperand(0);
763 // If the base pointer is already a D-form address, then just create
764 // a new D-form address with a slot offset and the orignal base pointer.
765 // Otherwise generate a D-form address with the slot offset relative
766 // to the stack pointer, which is always aligned.
767 #if !defined(NDEBUG)
768 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
769 cerr << "CellSPU LowerSTORE: basePtr = ";
770 basePtr.getNode()->dump(&DAG);
771 cerr << "\n";
773 #endif
775 SDValue insertEltOp =
776 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
777 SDValue vectorizeOp =
778 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
780 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
781 vectorizeOp, alignLoadVec,
782 DAG.getNode(ISD::BIT_CONVERT, dl,
783 MVT::v4i32, insertEltOp));
785 result = DAG.getStore(the_chain, dl, result, basePtr,
786 LN->getSrcValue(), LN->getSrcValueOffset(),
787 LN->isVolatile(), LN->getAlignment());
789 #if 0 && !defined(NDEBUG)
790 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
791 const SDValue &currentRoot = DAG.getRoot();
793 DAG.setRoot(result);
794 cerr << "------- CellSPU:LowerStore result:\n";
795 DAG.dump();
796 cerr << "-------\n";
797 DAG.setRoot(currentRoot);
799 #endif
801 return result;
802 /*UNREACHED*/
804 case ISD::PRE_INC:
805 case ISD::PRE_DEC:
806 case ISD::POST_INC:
807 case ISD::POST_DEC:
808 case ISD::LAST_INDEXED_MODE:
809 cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
810 "UNINDEXED\n";
811 cerr << (unsigned) SN->getAddressingMode() << "\n";
812 abort();
813 /*NOTREACHED*/
816 return SDValue();
819 //! Generate the address of a constant pool entry.
820 SDValue
821 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
822 MVT PtrVT = Op.getValueType();
823 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
824 Constant *C = CP->getConstVal();
825 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
826 SDValue Zero = DAG.getConstant(0, PtrVT);
827 const TargetMachine &TM = DAG.getTarget();
828 // FIXME there is no actual debug info here
829 DebugLoc dl = Op.getDebugLoc();
831 if (TM.getRelocationModel() == Reloc::Static) {
832 if (!ST->usingLargeMem()) {
833 // Just return the SDValue with the constant pool address in it.
834 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
835 } else {
836 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
837 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
838 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
842 assert(0 &&
843 "LowerConstantPool: Relocation model other than static"
844 " not supported.");
845 return SDValue();
848 //! Alternate entry point for generating the address of a constant pool entry
849 SDValue
850 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
851 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
854 static SDValue
855 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
856 MVT PtrVT = Op.getValueType();
857 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
858 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
859 SDValue Zero = DAG.getConstant(0, PtrVT);
860 const TargetMachine &TM = DAG.getTarget();
861 // FIXME there is no actual debug info here
862 DebugLoc dl = Op.getDebugLoc();
864 if (TM.getRelocationModel() == Reloc::Static) {
865 if (!ST->usingLargeMem()) {
866 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
867 } else {
868 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
869 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
870 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
874 assert(0 &&
875 "LowerJumpTable: Relocation model other than static not supported.");
876 return SDValue();
879 static SDValue
880 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
881 MVT PtrVT = Op.getValueType();
882 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
883 GlobalValue *GV = GSDN->getGlobal();
884 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
885 const TargetMachine &TM = DAG.getTarget();
886 SDValue Zero = DAG.getConstant(0, PtrVT);
887 // FIXME there is no actual debug info here
888 DebugLoc dl = Op.getDebugLoc();
890 if (TM.getRelocationModel() == Reloc::Static) {
891 if (!ST->usingLargeMem()) {
892 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
893 } else {
894 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
895 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
896 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
898 } else {
899 cerr << "LowerGlobalAddress: Relocation model other than static not "
900 << "supported.\n";
901 abort();
902 /*NOTREACHED*/
905 return SDValue();
908 //! Custom lower double precision floating point constants
909 static SDValue
910 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
911 MVT VT = Op.getValueType();
912 // FIXME there is no actual debug info here
913 DebugLoc dl = Op.getDebugLoc();
915 if (VT == MVT::f64) {
916 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
918 assert((FP != 0) &&
919 "LowerConstantFP: Node is not ConstantFPSDNode");
921 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
922 SDValue T = DAG.getConstant(dbits, MVT::i64);
923 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
924 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
925 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
928 return SDValue();
931 static SDValue
932 LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
934 MachineFunction &MF = DAG.getMachineFunction();
935 MachineFrameInfo *MFI = MF.getFrameInfo();
936 MachineRegisterInfo &RegInfo = MF.getRegInfo();
937 SmallVector<SDValue, 48> ArgValues;
938 SDValue Root = Op.getOperand(0);
939 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
940 DebugLoc dl = Op.getDebugLoc();
942 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
943 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
945 unsigned ArgOffset = SPUFrameInfo::minStackSize();
946 unsigned ArgRegIdx = 0;
947 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
949 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
951 // Add DAG nodes to load the arguments or copy them out of registers.
952 for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
953 ArgNo != e; ++ArgNo) {
954 MVT ObjectVT = Op.getValue(ArgNo).getValueType();
955 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
956 SDValue ArgVal;
958 if (ArgRegIdx < NumArgRegs) {
959 const TargetRegisterClass *ArgRegClass;
961 switch (ObjectVT.getSimpleVT()) {
962 default: {
963 cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
964 << ObjectVT.getMVTString()
965 << "\n";
966 abort();
968 case MVT::i8:
969 ArgRegClass = &SPU::R8CRegClass;
970 break;
971 case MVT::i16:
972 ArgRegClass = &SPU::R16CRegClass;
973 break;
974 case MVT::i32:
975 ArgRegClass = &SPU::R32CRegClass;
976 break;
977 case MVT::i64:
978 ArgRegClass = &SPU::R64CRegClass;
979 break;
980 case MVT::i128:
981 ArgRegClass = &SPU::GPRCRegClass;
982 break;
983 case MVT::f32:
984 ArgRegClass = &SPU::R32FPRegClass;
985 break;
986 case MVT::f64:
987 ArgRegClass = &SPU::R64FPRegClass;
988 break;
989 case MVT::v2f64:
990 case MVT::v4f32:
991 case MVT::v2i64:
992 case MVT::v4i32:
993 case MVT::v8i16:
994 case MVT::v16i8:
995 ArgRegClass = &SPU::VECREGRegClass;
996 break;
999 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1000 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001 ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1002 ++ArgRegIdx;
1003 } else {
1004 // We need to load the argument to a virtual register if we determined
1005 // above that we ran out of physical registers of the appropriate type
1006 // or we're forced to do vararg
1007 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1008 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1009 ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1010 ArgOffset += StackSlotSize;
1013 ArgValues.push_back(ArgVal);
1014 // Update the chain
1015 Root = ArgVal.getOperand(0);
1018 // vararg handling:
1019 if (isVarArg) {
1020 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1021 // We will spill (79-3)+1 registers to the stack
1022 SmallVector<SDValue, 79-3+1> MemOps;
1024 // Create the frame slot
1026 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1027 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1028 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1029 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1030 SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1031 Root = Store.getOperand(0);
1032 MemOps.push_back(Store);
1034 // Increment address by stack slot size for the next stored argument
1035 ArgOffset += StackSlotSize;
1037 if (!MemOps.empty())
1038 Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1039 &MemOps[0], MemOps.size());
1042 ArgValues.push_back(Root);
1044 // Return the new list of results.
1045 return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1046 &ArgValues[0], ArgValues.size());
1049 /// isLSAAddress - Return the immediate to use if the specified
1050 /// value is representable as a LSA address.
1051 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1052 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1053 if (!C) return 0;
1055 int Addr = C->getZExtValue();
1056 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1057 (Addr << 14 >> 14) != Addr)
1058 return 0; // Top 14 bits have to be sext of immediate.
1060 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1063 static SDValue
1064 LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1065 CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1066 SDValue Chain = TheCall->getChain();
1067 SDValue Callee = TheCall->getCallee();
1068 unsigned NumOps = TheCall->getNumArgs();
1069 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1070 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1071 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1072 DebugLoc dl = TheCall->getDebugLoc();
1074 // Handy pointer type
1075 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1077 // Accumulate how many bytes are to be pushed on the stack, including the
1078 // linkage area, and parameter passing area. According to the SPU ABI,
1079 // we minimally need space for [LR] and [SP]
1080 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1082 // Set up a copy of the stack pointer for use loading and storing any
1083 // arguments that may not fit in the registers available for argument
1084 // passing.
1085 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1087 // Figure out which arguments are going to go in registers, and which in
1088 // memory.
1089 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1090 unsigned ArgRegIdx = 0;
1092 // Keep track of registers passing arguments
1093 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1094 // And the arguments passed on the stack
1095 SmallVector<SDValue, 8> MemOpChains;
1097 for (unsigned i = 0; i != NumOps; ++i) {
1098 SDValue Arg = TheCall->getArg(i);
1100 // PtrOff will be used to store the current argument to the stack if a
1101 // register cannot be found for it.
1102 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1103 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1105 switch (Arg.getValueType().getSimpleVT()) {
1106 default: assert(0 && "Unexpected ValueType for argument!");
1107 case MVT::i8:
1108 case MVT::i16:
1109 case MVT::i32:
1110 case MVT::i64:
1111 case MVT::i128:
1112 if (ArgRegIdx != NumArgRegs) {
1113 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1114 } else {
1115 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1116 ArgOffset += StackSlotSize;
1118 break;
1119 case MVT::f32:
1120 case MVT::f64:
1121 if (ArgRegIdx != NumArgRegs) {
1122 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1123 } else {
1124 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1125 ArgOffset += StackSlotSize;
1127 break;
1128 case MVT::v2i64:
1129 case MVT::v2f64:
1130 case MVT::v4f32:
1131 case MVT::v4i32:
1132 case MVT::v8i16:
1133 case MVT::v16i8:
1134 if (ArgRegIdx != NumArgRegs) {
1135 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1136 } else {
1137 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1138 ArgOffset += StackSlotSize;
1140 break;
1144 // Update number of stack bytes actually used, insert a call sequence start
1145 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1146 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1147 true));
1149 if (!MemOpChains.empty()) {
1150 // Adjust the stack pointer for the stack arguments.
1151 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1152 &MemOpChains[0], MemOpChains.size());
1155 // Build a sequence of copy-to-reg nodes chained together with token chain
1156 // and flag operands which copy the outgoing args into the appropriate regs.
1157 SDValue InFlag;
1158 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1159 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1160 RegsToPass[i].second, InFlag);
1161 InFlag = Chain.getValue(1);
1164 SmallVector<SDValue, 8> Ops;
1165 unsigned CallOpc = SPUISD::CALL;
1167 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1168 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1169 // node so that legalize doesn't hack it.
1170 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1171 GlobalValue *GV = G->getGlobal();
1172 MVT CalleeVT = Callee.getValueType();
1173 SDValue Zero = DAG.getConstant(0, PtrVT);
1174 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1176 if (!ST->usingLargeMem()) {
1177 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1178 // style calls, otherwise, external symbols are BRASL calls. This assumes
1179 // that declared/defined symbols are in the same compilation unit and can
1180 // be reached through PC-relative jumps.
1182 // NOTE:
1183 // This may be an unsafe assumption for JIT and really large compilation
1184 // units.
1185 if (GV->isDeclaration()) {
1186 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1187 } else {
1188 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1190 } else {
1191 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1192 // address pairs:
1193 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1195 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1196 MVT CalleeVT = Callee.getValueType();
1197 SDValue Zero = DAG.getConstant(0, PtrVT);
1198 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1199 Callee.getValueType());
1201 if (!ST->usingLargeMem()) {
1202 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1203 } else {
1204 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1206 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1207 // If this is an absolute destination address that appears to be a legal
1208 // local store address, use the munged value.
1209 Callee = SDValue(Dest, 0);
1212 Ops.push_back(Chain);
1213 Ops.push_back(Callee);
1215 // Add argument registers to the end of the list so that they are known live
1216 // into the call.
1217 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1218 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1219 RegsToPass[i].second.getValueType()));
1221 if (InFlag.getNode())
1222 Ops.push_back(InFlag);
1223 // Returns a chain and a flag for retval copy to use.
1224 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1225 &Ops[0], Ops.size());
1226 InFlag = Chain.getValue(1);
1228 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1229 DAG.getIntPtrConstant(0, true), InFlag);
1230 if (TheCall->getValueType(0) != MVT::Other)
1231 InFlag = Chain.getValue(1);
1233 SDValue ResultVals[3];
1234 unsigned NumResults = 0;
1236 // If the call has results, copy the values out of the ret val registers.
1237 switch (TheCall->getValueType(0).getSimpleVT()) {
1238 default: assert(0 && "Unexpected ret value!");
1239 case MVT::Other: break;
1240 case MVT::i32:
1241 if (TheCall->getValueType(1) == MVT::i32) {
1242 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1243 MVT::i32, InFlag).getValue(1);
1244 ResultVals[0] = Chain.getValue(0);
1245 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1246 Chain.getValue(2)).getValue(1);
1247 ResultVals[1] = Chain.getValue(0);
1248 NumResults = 2;
1249 } else {
1250 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1251 InFlag).getValue(1);
1252 ResultVals[0] = Chain.getValue(0);
1253 NumResults = 1;
1255 break;
1256 case MVT::i64:
1257 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1258 InFlag).getValue(1);
1259 ResultVals[0] = Chain.getValue(0);
1260 NumResults = 1;
1261 break;
1262 case MVT::i128:
1263 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1264 InFlag).getValue(1);
1265 ResultVals[0] = Chain.getValue(0);
1266 NumResults = 1;
1267 break;
1268 case MVT::f32:
1269 case MVT::f64:
1270 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1271 InFlag).getValue(1);
1272 ResultVals[0] = Chain.getValue(0);
1273 NumResults = 1;
1274 break;
1275 case MVT::v2f64:
1276 case MVT::v2i64:
1277 case MVT::v4f32:
1278 case MVT::v4i32:
1279 case MVT::v8i16:
1280 case MVT::v16i8:
1281 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1282 InFlag).getValue(1);
1283 ResultVals[0] = Chain.getValue(0);
1284 NumResults = 1;
1285 break;
1288 // If the function returns void, just return the chain.
1289 if (NumResults == 0)
1290 return Chain;
1292 // Otherwise, merge everything together with a MERGE_VALUES node.
1293 ResultVals[NumResults++] = Chain;
1294 SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1295 return Res.getValue(Op.getResNo());
1298 static SDValue
1299 LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1300 SmallVector<CCValAssign, 16> RVLocs;
1301 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1302 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1303 DebugLoc dl = Op.getDebugLoc();
1304 CCState CCInfo(CC, isVarArg, TM, RVLocs);
1305 CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1307 // If this is the first return lowered for this function, add the regs to the
1308 // liveout set for the function.
1309 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1310 for (unsigned i = 0; i != RVLocs.size(); ++i)
1311 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1314 SDValue Chain = Op.getOperand(0);
1315 SDValue Flag;
1317 // Copy the result values into the output registers.
1318 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1319 CCValAssign &VA = RVLocs[i];
1320 assert(VA.isRegLoc() && "Can only return in registers!");
1321 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1322 Op.getOperand(i*2+1), Flag);
1323 Flag = Chain.getValue(1);
1326 if (Flag.getNode())
1327 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1328 else
1329 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1333 //===----------------------------------------------------------------------===//
1334 // Vector related lowering:
1335 //===----------------------------------------------------------------------===//
1337 static ConstantSDNode *
1338 getVecImm(SDNode *N) {
1339 SDValue OpVal(0, 0);
1341 // Check to see if this buildvec has a single non-undef value in its elements.
1342 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1343 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1344 if (OpVal.getNode() == 0)
1345 OpVal = N->getOperand(i);
1346 else if (OpVal != N->getOperand(i))
1347 return 0;
1350 if (OpVal.getNode() != 0) {
1351 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1352 return CN;
1356 return 0;
1359 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1360 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1361 /// constant
1362 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1363 MVT ValueType) {
1364 if (ConstantSDNode *CN = getVecImm(N)) {
1365 uint64_t Value = CN->getZExtValue();
1366 if (ValueType == MVT::i64) {
1367 uint64_t UValue = CN->getZExtValue();
1368 uint32_t upper = uint32_t(UValue >> 32);
1369 uint32_t lower = uint32_t(UValue);
1370 if (upper != lower)
1371 return SDValue();
1372 Value = Value >> 32;
1374 if (Value <= 0x3ffff)
1375 return DAG.getTargetConstant(Value, ValueType);
1378 return SDValue();
1381 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1382 /// and the value fits into a signed 16-bit constant, and if so, return the
1383 /// constant
1384 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1385 MVT ValueType) {
1386 if (ConstantSDNode *CN = getVecImm(N)) {
1387 int64_t Value = CN->getSExtValue();
1388 if (ValueType == MVT::i64) {
1389 uint64_t UValue = CN->getZExtValue();
1390 uint32_t upper = uint32_t(UValue >> 32);
1391 uint32_t lower = uint32_t(UValue);
1392 if (upper != lower)
1393 return SDValue();
1394 Value = Value >> 32;
1396 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1397 return DAG.getTargetConstant(Value, ValueType);
1401 return SDValue();
1404 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1405 /// and the value fits into a signed 10-bit constant, and if so, return the
1406 /// constant
1407 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1408 MVT ValueType) {
1409 if (ConstantSDNode *CN = getVecImm(N)) {
1410 int64_t Value = CN->getSExtValue();
1411 if (ValueType == MVT::i64) {
1412 uint64_t UValue = CN->getZExtValue();
1413 uint32_t upper = uint32_t(UValue >> 32);
1414 uint32_t lower = uint32_t(UValue);
1415 if (upper != lower)
1416 return SDValue();
1417 Value = Value >> 32;
1419 if (isS10Constant(Value))
1420 return DAG.getTargetConstant(Value, ValueType);
1423 return SDValue();
1426 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1427 /// and the value fits into a signed 8-bit constant, and if so, return the
1428 /// constant.
1430 /// @note: The incoming vector is v16i8 because that's the only way we can load
1431 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1432 /// same value.
1433 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1434 MVT ValueType) {
1435 if (ConstantSDNode *CN = getVecImm(N)) {
1436 int Value = (int) CN->getZExtValue();
1437 if (ValueType == MVT::i16
1438 && Value <= 0xffff /* truncated from uint64_t */
1439 && ((short) Value >> 8) == ((short) Value & 0xff))
1440 return DAG.getTargetConstant(Value & 0xff, ValueType);
1441 else if (ValueType == MVT::i8
1442 && (Value & 0xff) == Value)
1443 return DAG.getTargetConstant(Value, ValueType);
1446 return SDValue();
1449 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1450 /// and the value fits into a signed 16-bit constant, and if so, return the
1451 /// constant
1452 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1453 MVT ValueType) {
1454 if (ConstantSDNode *CN = getVecImm(N)) {
1455 uint64_t Value = CN->getZExtValue();
1456 if ((ValueType == MVT::i32
1457 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1458 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1459 return DAG.getTargetConstant(Value >> 16, ValueType);
1462 return SDValue();
1465 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1466 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1467 if (ConstantSDNode *CN = getVecImm(N)) {
1468 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1471 return SDValue();
1474 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1475 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1476 if (ConstantSDNode *CN = getVecImm(N)) {
1477 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1480 return SDValue();
1483 //! Lower a BUILD_VECTOR instruction creatively:
1484 SDValue
1485 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1486 MVT VT = Op.getValueType();
1487 MVT EltVT = VT.getVectorElementType();
1488 DebugLoc dl = Op.getDebugLoc();
1489 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1490 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1491 unsigned minSplatBits = EltVT.getSizeInBits();
1493 if (minSplatBits < 16)
1494 minSplatBits = 16;
1496 APInt APSplatBits, APSplatUndef;
1497 unsigned SplatBitSize;
1498 bool HasAnyUndefs;
1500 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1501 HasAnyUndefs, minSplatBits)
1502 || minSplatBits < SplatBitSize)
1503 return SDValue(); // Wasn't a constant vector or splat exceeded min
1505 uint64_t SplatBits = APSplatBits.getZExtValue();
1507 switch (VT.getSimpleVT()) {
1508 default:
1509 cerr << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1510 << VT.getMVTString()
1511 << "\n";
1512 abort();
1513 /*NOTREACHED*/
1514 case MVT::v4f32: {
1515 uint32_t Value32 = uint32_t(SplatBits);
1516 assert(SplatBitSize == 32
1517 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1518 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1519 SDValue T = DAG.getConstant(Value32, MVT::i32);
1520 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1521 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1522 break;
1524 case MVT::v2f64: {
1525 uint64_t f64val = uint64_t(SplatBits);
1526 assert(SplatBitSize == 64
1527 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1528 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1529 SDValue T = DAG.getConstant(f64val, MVT::i64);
1530 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1531 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1532 break;
1534 case MVT::v16i8: {
1535 // 8-bit constants have to be expanded to 16-bits
1536 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1537 SmallVector<SDValue, 8> Ops;
1539 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1540 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1541 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1543 case MVT::v8i16: {
1544 unsigned short Value16 = SplatBits;
1545 SDValue T = DAG.getConstant(Value16, EltVT);
1546 SmallVector<SDValue, 8> Ops;
1548 Ops.assign(8, T);
1549 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1551 case MVT::v4i32: {
1552 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1553 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1555 case MVT::v2i32: {
1556 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1557 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1559 case MVT::v2i64: {
1560 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1564 return SDValue();
1569 SDValue
1570 SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1571 DebugLoc dl) {
1572 uint32_t upper = uint32_t(SplatVal >> 32);
1573 uint32_t lower = uint32_t(SplatVal);
1575 if (upper == lower) {
1576 // Magic constant that can be matched by IL, ILA, et. al.
1577 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1578 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1579 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1580 Val, Val, Val, Val));
1581 } else {
1582 bool upper_special, lower_special;
1584 // NOTE: This code creates common-case shuffle masks that can be easily
1585 // detected as common expressions. It is not attempting to create highly
1586 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1588 // Detect if the upper or lower half is a special shuffle mask pattern:
1589 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1590 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1592 // Both upper and lower are special, lower to a constant pool load:
1593 if (lower_special && upper_special) {
1594 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1595 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1596 SplatValCN, SplatValCN);
1599 SDValue LO32;
1600 SDValue HI32;
1601 SmallVector<SDValue, 16> ShufBytes;
1602 SDValue Result;
1604 // Create lower vector if not a special pattern
1605 if (!lower_special) {
1606 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1607 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1608 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1609 LO32C, LO32C, LO32C, LO32C));
1612 // Create upper vector if not a special pattern
1613 if (!upper_special) {
1614 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1615 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1616 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1617 HI32C, HI32C, HI32C, HI32C));
1620 // If either upper or lower are special, then the two input operands are
1621 // the same (basically, one of them is a "don't care")
1622 if (lower_special)
1623 LO32 = HI32;
1624 if (upper_special)
1625 HI32 = LO32;
1627 for (int i = 0; i < 4; ++i) {
1628 uint64_t val = 0;
1629 for (int j = 0; j < 4; ++j) {
1630 SDValue V;
1631 bool process_upper, process_lower;
1632 val <<= 8;
1633 process_upper = (upper_special && (i & 1) == 0);
1634 process_lower = (lower_special && (i & 1) == 1);
1636 if (process_upper || process_lower) {
1637 if ((process_upper && upper == 0)
1638 || (process_lower && lower == 0))
1639 val |= 0x80;
1640 else if ((process_upper && upper == 0xffffffff)
1641 || (process_lower && lower == 0xffffffff))
1642 val |= 0xc0;
1643 else if ((process_upper && upper == 0x80000000)
1644 || (process_lower && lower == 0x80000000))
1645 val |= (j == 0 ? 0xe0 : 0x80);
1646 } else
1647 val |= i * 4 + j + ((i & 1) * 16);
1650 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1653 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1654 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1655 &ShufBytes[0], ShufBytes.size()));
1659 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1660 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1661 /// permutation vector, V3, is monotonically increasing with one "exception"
1662 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1663 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1664 /// In either case, the net result is going to eventually invoke SHUFB to
1665 /// permute/shuffle the bytes from V1 and V2.
1666 /// \note
1667 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1668 /// control word for byte/halfword/word insertion. This takes care of a single
1669 /// element move from V2 into V1.
1670 /// \note
1671 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1672 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1673 SDValue V1 = Op.getOperand(0);
1674 SDValue V2 = Op.getOperand(1);
1675 SDValue PermMask = Op.getOperand(2);
1676 DebugLoc dl = Op.getDebugLoc();
1678 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1680 // If we have a single element being moved from V1 to V2, this can be handled
1681 // using the C*[DX] compute mask instructions, but the vector elements have
1682 // to be monotonically increasing with one exception element.
1683 MVT VecVT = V1.getValueType();
1684 MVT EltVT = VecVT.getVectorElementType();
1685 unsigned EltsFromV2 = 0;
1686 unsigned V2Elt = 0;
1687 unsigned V2EltIdx0 = 0;
1688 unsigned CurrElt = 0;
1689 unsigned MaxElts = VecVT.getVectorNumElements();
1690 unsigned PrevElt = 0;
1691 unsigned V0Elt = 0;
1692 bool monotonic = true;
1693 bool rotate = true;
1695 if (EltVT == MVT::i8) {
1696 V2EltIdx0 = 16;
1697 } else if (EltVT == MVT::i16) {
1698 V2EltIdx0 = 8;
1699 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1700 V2EltIdx0 = 4;
1701 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1702 V2EltIdx0 = 2;
1703 } else
1704 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1706 for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1707 if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1708 unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1710 if (monotonic) {
1711 if (SrcElt >= V2EltIdx0) {
1712 if (1 >= (++EltsFromV2)) {
1713 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1715 } else if (CurrElt != SrcElt) {
1716 monotonic = false;
1719 ++CurrElt;
1722 if (rotate) {
1723 if (PrevElt > 0 && SrcElt < MaxElts) {
1724 if ((PrevElt == SrcElt - 1)
1725 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1726 PrevElt = SrcElt;
1727 if (SrcElt == 0)
1728 V0Elt = i;
1729 } else {
1730 rotate = false;
1732 } else if (PrevElt == 0) {
1733 // First time through, need to keep track of previous element
1734 PrevElt = SrcElt;
1735 } else {
1736 // This isn't a rotation, takes elements from vector 2
1737 rotate = false;
1743 if (EltsFromV2 == 1 && monotonic) {
1744 // Compute mask and shuffle
1745 MachineFunction &MF = DAG.getMachineFunction();
1746 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1747 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1748 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1749 // Initialize temporary register to 0
1750 SDValue InitTempReg =
1751 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1752 // Copy register's contents as index in SHUFFLE_MASK:
1753 SDValue ShufMaskOp =
1754 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1755 DAG.getTargetConstant(V2Elt, MVT::i32),
1756 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1757 // Use shuffle mask in SHUFB synthetic instruction:
1758 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1759 ShufMaskOp);
1760 } else if (rotate) {
1761 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1763 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1764 V1, DAG.getConstant(rotamt, MVT::i16));
1765 } else {
1766 // Convert the SHUFFLE_VECTOR mask's input element units to the
1767 // actual bytes.
1768 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1770 SmallVector<SDValue, 16> ResultMask;
1771 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1772 unsigned SrcElt;
1773 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1774 SrcElt = 0;
1775 else
1776 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1778 for (unsigned j = 0; j < BytesPerElement; ++j) {
1779 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1780 MVT::i8));
1784 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1785 &ResultMask[0], ResultMask.size());
1786 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1790 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1791 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1792 DebugLoc dl = Op.getDebugLoc();
1794 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1795 // For a constant, build the appropriate constant vector, which will
1796 // eventually simplify to a vector register load.
1798 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1799 SmallVector<SDValue, 16> ConstVecValues;
1800 MVT VT;
1801 size_t n_copies;
1803 // Create a constant vector:
1804 switch (Op.getValueType().getSimpleVT()) {
1805 default: assert(0 && "Unexpected constant value type in "
1806 "LowerSCALAR_TO_VECTOR");
1807 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1808 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1809 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1810 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1811 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1812 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1815 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1816 for (size_t j = 0; j < n_copies; ++j)
1817 ConstVecValues.push_back(CValue);
1819 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1820 &ConstVecValues[0], ConstVecValues.size());
1821 } else {
1822 // Otherwise, copy the value from one register to another:
1823 switch (Op0.getValueType().getSimpleVT()) {
1824 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1825 case MVT::i8:
1826 case MVT::i16:
1827 case MVT::i32:
1828 case MVT::i64:
1829 case MVT::f32:
1830 case MVT::f64:
1831 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1835 return SDValue();
1838 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1839 MVT VT = Op.getValueType();
1840 SDValue N = Op.getOperand(0);
1841 SDValue Elt = Op.getOperand(1);
1842 DebugLoc dl = Op.getDebugLoc();
1843 SDValue retval;
1845 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1846 // Constant argument:
1847 int EltNo = (int) C->getZExtValue();
1849 // sanity checks:
1850 if (VT == MVT::i8 && EltNo >= 16)
1851 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1852 else if (VT == MVT::i16 && EltNo >= 8)
1853 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1854 else if (VT == MVT::i32 && EltNo >= 4)
1855 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1856 else if (VT == MVT::i64 && EltNo >= 2)
1857 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1859 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1860 // i32 and i64: Element 0 is the preferred slot
1861 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1864 // Need to generate shuffle mask and extract:
1865 int prefslot_begin = -1, prefslot_end = -1;
1866 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1868 switch (VT.getSimpleVT()) {
1869 default:
1870 assert(false && "Invalid value type!");
1871 case MVT::i8: {
1872 prefslot_begin = prefslot_end = 3;
1873 break;
1875 case MVT::i16: {
1876 prefslot_begin = 2; prefslot_end = 3;
1877 break;
1879 case MVT::i32:
1880 case MVT::f32: {
1881 prefslot_begin = 0; prefslot_end = 3;
1882 break;
1884 case MVT::i64:
1885 case MVT::f64: {
1886 prefslot_begin = 0; prefslot_end = 7;
1887 break;
1891 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1892 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1894 unsigned int ShufBytes[16];
1895 for (int i = 0; i < 16; ++i) {
1896 // zero fill uppper part of preferred slot, don't care about the
1897 // other slots:
1898 unsigned int mask_val;
1899 if (i <= prefslot_end) {
1900 mask_val =
1901 ((i < prefslot_begin)
1902 ? 0x80
1903 : elt_byte + (i - prefslot_begin));
1905 ShufBytes[i] = mask_val;
1906 } else
1907 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1910 SDValue ShufMask[4];
1911 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1912 unsigned bidx = i * 4;
1913 unsigned int bits = ((ShufBytes[bidx] << 24) |
1914 (ShufBytes[bidx+1] << 16) |
1915 (ShufBytes[bidx+2] << 8) |
1916 ShufBytes[bidx+3]);
1917 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1920 SDValue ShufMaskVec =
1921 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1922 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1924 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1925 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1926 N, N, ShufMaskVec));
1927 } else {
1928 // Variable index: Rotate the requested element into slot 0, then replicate
1929 // slot 0 across the vector
1930 MVT VecVT = N.getValueType();
1931 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
1932 cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1933 abort();
1936 // Make life easier by making sure the index is zero-extended to i32
1937 if (Elt.getValueType() != MVT::i32)
1938 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
1940 // Scale the index to a bit/byte shift quantity
1941 APInt scaleFactor =
1942 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
1943 unsigned scaleShift = scaleFactor.logBase2();
1944 SDValue vecShift;
1946 if (scaleShift > 0) {
1947 // Scale the shift factor:
1948 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
1949 DAG.getConstant(scaleShift, MVT::i32));
1952 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
1954 // Replicate the bytes starting at byte 0 across the entire vector (for
1955 // consistency with the notion of a unified register set)
1956 SDValue replicate;
1958 switch (VT.getSimpleVT()) {
1959 default:
1960 cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
1961 abort();
1962 /*NOTREACHED*/
1963 case MVT::i8: {
1964 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
1965 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1966 factor, factor, factor, factor);
1967 break;
1969 case MVT::i16: {
1970 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
1971 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1972 factor, factor, factor, factor);
1973 break;
1975 case MVT::i32:
1976 case MVT::f32: {
1977 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
1978 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1979 factor, factor, factor, factor);
1980 break;
1982 case MVT::i64:
1983 case MVT::f64: {
1984 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
1985 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
1986 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1987 loFactor, hiFactor, loFactor, hiFactor);
1988 break;
1992 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1993 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
1994 vecShift, vecShift, replicate));
1997 return retval;
2000 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2001 SDValue VecOp = Op.getOperand(0);
2002 SDValue ValOp = Op.getOperand(1);
2003 SDValue IdxOp = Op.getOperand(2);
2004 DebugLoc dl = Op.getDebugLoc();
2005 MVT VT = Op.getValueType();
2007 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2008 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2010 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2011 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2012 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2013 DAG.getRegister(SPU::R1, PtrVT),
2014 DAG.getConstant(CN->getSExtValue(), PtrVT));
2015 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2017 SDValue result =
2018 DAG.getNode(SPUISD::SHUFB, dl, VT,
2019 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2020 VecOp,
2021 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2023 return result;
2026 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2027 const TargetLowering &TLI)
2029 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2030 DebugLoc dl = Op.getDebugLoc();
2031 MVT ShiftVT = TLI.getShiftAmountTy();
2033 assert(Op.getValueType() == MVT::i8);
2034 switch (Opc) {
2035 default:
2036 assert(0 && "Unhandled i8 math operator");
2037 /*NOTREACHED*/
2038 break;
2039 case ISD::ADD: {
2040 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2041 // the result:
2042 SDValue N1 = Op.getOperand(1);
2043 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2044 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2045 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2046 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2050 case ISD::SUB: {
2051 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2052 // the result:
2053 SDValue N1 = Op.getOperand(1);
2054 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2055 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2056 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2057 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2059 case ISD::ROTR:
2060 case ISD::ROTL: {
2061 SDValue N1 = Op.getOperand(1);
2062 MVT N1VT = N1.getValueType();
2064 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2065 if (!N1VT.bitsEq(ShiftVT)) {
2066 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2067 ? ISD::ZERO_EXTEND
2068 : ISD::TRUNCATE;
2069 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2072 // Replicate lower 8-bits into upper 8:
2073 SDValue ExpandArg =
2074 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2075 DAG.getNode(ISD::SHL, dl, MVT::i16,
2076 N0, DAG.getConstant(8, MVT::i32)));
2078 // Truncate back down to i8
2079 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2080 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2082 case ISD::SRL:
2083 case ISD::SHL: {
2084 SDValue N1 = Op.getOperand(1);
2085 MVT N1VT = N1.getValueType();
2087 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2088 if (!N1VT.bitsEq(ShiftVT)) {
2089 unsigned N1Opc = ISD::ZERO_EXTEND;
2091 if (N1.getValueType().bitsGT(ShiftVT))
2092 N1Opc = ISD::TRUNCATE;
2094 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2097 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2098 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2100 case ISD::SRA: {
2101 SDValue N1 = Op.getOperand(1);
2102 MVT N1VT = N1.getValueType();
2104 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2105 if (!N1VT.bitsEq(ShiftVT)) {
2106 unsigned N1Opc = ISD::SIGN_EXTEND;
2108 if (N1VT.bitsGT(ShiftVT))
2109 N1Opc = ISD::TRUNCATE;
2110 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2113 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2114 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2116 case ISD::MUL: {
2117 SDValue N1 = Op.getOperand(1);
2119 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2120 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2121 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2122 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2123 break;
2127 return SDValue();
2130 //! Lower byte immediate operations for v16i8 vectors:
2131 static SDValue
2132 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2133 SDValue ConstVec;
2134 SDValue Arg;
2135 MVT VT = Op.getValueType();
2136 DebugLoc dl = Op.getDebugLoc();
2138 ConstVec = Op.getOperand(0);
2139 Arg = Op.getOperand(1);
2140 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2141 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2142 ConstVec = ConstVec.getOperand(0);
2143 } else {
2144 ConstVec = Op.getOperand(1);
2145 Arg = Op.getOperand(0);
2146 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2147 ConstVec = ConstVec.getOperand(0);
2152 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2153 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2154 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2156 APInt APSplatBits, APSplatUndef;
2157 unsigned SplatBitSize;
2158 bool HasAnyUndefs;
2159 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2161 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2162 HasAnyUndefs, minSplatBits)
2163 && minSplatBits <= SplatBitSize) {
2164 uint64_t SplatBits = APSplatBits.getZExtValue();
2165 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2167 SmallVector<SDValue, 16> tcVec;
2168 tcVec.assign(16, tc);
2169 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2170 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2174 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2175 // lowered. Return the operation, rather than a null SDValue.
2176 return Op;
2179 //! Custom lowering for CTPOP (count population)
2181 Custom lowering code that counts the number ones in the input
2182 operand. SPU has such an instruction, but it counts the number of
2183 ones per byte, which then have to be accumulated.
2185 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2186 MVT VT = Op.getValueType();
2187 MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2188 DebugLoc dl = Op.getDebugLoc();
2190 switch (VT.getSimpleVT()) {
2191 default:
2192 assert(false && "Invalid value type!");
2193 case MVT::i8: {
2194 SDValue N = Op.getOperand(0);
2195 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2197 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2198 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2200 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2203 case MVT::i16: {
2204 MachineFunction &MF = DAG.getMachineFunction();
2205 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2207 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2209 SDValue N = Op.getOperand(0);
2210 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2211 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2212 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2214 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2215 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2217 // CNTB_result becomes the chain to which all of the virtual registers
2218 // CNTB_reg, SUM1_reg become associated:
2219 SDValue CNTB_result =
2220 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2222 SDValue CNTB_rescopy =
2223 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2225 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2227 return DAG.getNode(ISD::AND, dl, MVT::i16,
2228 DAG.getNode(ISD::ADD, dl, MVT::i16,
2229 DAG.getNode(ISD::SRL, dl, MVT::i16,
2230 Tmp1, Shift1),
2231 Tmp1),
2232 Mask0);
2235 case MVT::i32: {
2236 MachineFunction &MF = DAG.getMachineFunction();
2237 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2239 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2240 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2242 SDValue N = Op.getOperand(0);
2243 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2244 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2245 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2246 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2248 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2249 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2251 // CNTB_result becomes the chain to which all of the virtual registers
2252 // CNTB_reg, SUM1_reg become associated:
2253 SDValue CNTB_result =
2254 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2256 SDValue CNTB_rescopy =
2257 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2259 SDValue Comp1 =
2260 DAG.getNode(ISD::SRL, dl, MVT::i32,
2261 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2262 Shift1);
2264 SDValue Sum1 =
2265 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2266 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2268 SDValue Sum1_rescopy =
2269 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2271 SDValue Comp2 =
2272 DAG.getNode(ISD::SRL, dl, MVT::i32,
2273 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2274 Shift2);
2275 SDValue Sum2 =
2276 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2277 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2279 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2282 case MVT::i64:
2283 break;
2286 return SDValue();
2289 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2291 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2292 All conversions to i64 are expanded to a libcall.
2294 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2295 SPUTargetLowering &TLI) {
2296 MVT OpVT = Op.getValueType();
2297 SDValue Op0 = Op.getOperand(0);
2298 MVT Op0VT = Op0.getValueType();
2300 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2301 || OpVT == MVT::i64) {
2302 // Convert f32 / f64 to i32 / i64 via libcall.
2303 RTLIB::Libcall LC =
2304 (Op.getOpcode() == ISD::FP_TO_SINT)
2305 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2306 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2307 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2308 SDValue Dummy;
2309 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2312 return SDValue();
2315 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2317 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2318 All conversions from i64 are expanded to a libcall.
2320 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2321 SPUTargetLowering &TLI) {
2322 MVT OpVT = Op.getValueType();
2323 SDValue Op0 = Op.getOperand(0);
2324 MVT Op0VT = Op0.getValueType();
2326 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2327 || Op0VT == MVT::i64) {
2328 // Convert i32, i64 to f64 via libcall:
2329 RTLIB::Libcall LC =
2330 (Op.getOpcode() == ISD::SINT_TO_FP)
2331 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2332 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2333 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2334 SDValue Dummy;
2335 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2338 return SDValue();
2341 //! Lower ISD::SETCC
2343 This handles MVT::f64 (double floating point) condition lowering
2345 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2346 const TargetLowering &TLI) {
2347 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2348 DebugLoc dl = Op.getDebugLoc();
2349 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2351 SDValue lhs = Op.getOperand(0);
2352 SDValue rhs = Op.getOperand(1);
2353 MVT lhsVT = lhs.getValueType();
2354 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2356 MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2357 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2358 MVT IntVT(MVT::i64);
2360 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2361 // selected to a NOP:
2362 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2363 SDValue lhsHi32 =
2364 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2365 DAG.getNode(ISD::SRL, dl, IntVT,
2366 i64lhs, DAG.getConstant(32, MVT::i32)));
2367 SDValue lhsHi32abs =
2368 DAG.getNode(ISD::AND, dl, MVT::i32,
2369 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2370 SDValue lhsLo32 =
2371 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2373 // SETO and SETUO only use the lhs operand:
2374 if (CC->get() == ISD::SETO) {
2375 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2376 // SETUO
2377 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2378 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2379 DAG.getSetCC(dl, ccResultVT,
2380 lhs, DAG.getConstantFP(0.0, lhsVT),
2381 ISD::SETUO),
2382 DAG.getConstant(ccResultAllOnes, ccResultVT));
2383 } else if (CC->get() == ISD::SETUO) {
2384 // Evaluates to true if Op0 is [SQ]NaN
2385 return DAG.getNode(ISD::AND, dl, ccResultVT,
2386 DAG.getSetCC(dl, ccResultVT,
2387 lhsHi32abs,
2388 DAG.getConstant(0x7ff00000, MVT::i32),
2389 ISD::SETGE),
2390 DAG.getSetCC(dl, ccResultVT,
2391 lhsLo32,
2392 DAG.getConstant(0, MVT::i32),
2393 ISD::SETGT));
2396 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2397 SDValue rhsHi32 =
2398 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2399 DAG.getNode(ISD::SRL, dl, IntVT,
2400 i64rhs, DAG.getConstant(32, MVT::i32)));
2402 // If a value is negative, subtract from the sign magnitude constant:
2403 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2405 // Convert the sign-magnitude representation into 2's complement:
2406 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2407 lhsHi32, DAG.getConstant(31, MVT::i32));
2408 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2409 SDValue lhsSelect =
2410 DAG.getNode(ISD::SELECT, dl, IntVT,
2411 lhsSelectMask, lhsSignMag2TC, i64lhs);
2413 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2414 rhsHi32, DAG.getConstant(31, MVT::i32));
2415 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2416 SDValue rhsSelect =
2417 DAG.getNode(ISD::SELECT, dl, IntVT,
2418 rhsSelectMask, rhsSignMag2TC, i64rhs);
2420 unsigned compareOp;
2422 switch (CC->get()) {
2423 case ISD::SETOEQ:
2424 case ISD::SETUEQ:
2425 compareOp = ISD::SETEQ; break;
2426 case ISD::SETOGT:
2427 case ISD::SETUGT:
2428 compareOp = ISD::SETGT; break;
2429 case ISD::SETOGE:
2430 case ISD::SETUGE:
2431 compareOp = ISD::SETGE; break;
2432 case ISD::SETOLT:
2433 case ISD::SETULT:
2434 compareOp = ISD::SETLT; break;
2435 case ISD::SETOLE:
2436 case ISD::SETULE:
2437 compareOp = ISD::SETLE; break;
2438 case ISD::SETUNE:
2439 case ISD::SETONE:
2440 compareOp = ISD::SETNE; break;
2441 default:
2442 cerr << "CellSPU ISel Select: unimplemented f64 condition\n";
2443 abort();
2444 break;
2447 SDValue result =
2448 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2449 (ISD::CondCode) compareOp);
2451 if ((CC->get() & 0x8) == 0) {
2452 // Ordered comparison:
2453 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2454 lhs, DAG.getConstantFP(0.0, MVT::f64),
2455 ISD::SETO);
2456 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2457 rhs, DAG.getConstantFP(0.0, MVT::f64),
2458 ISD::SETO);
2459 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2461 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2464 return result;
2467 //! Lower ISD::SELECT_CC
2469 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2470 SELB instruction.
2472 \note Need to revisit this in the future: if the code path through the true
2473 and false value computations is longer than the latency of a branch (6
2474 cycles), then it would be more advantageous to branch and insert a new basic
2475 block and branch on the condition. However, this code does not make that
2476 assumption, given the simplisitc uses so far.
2479 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2480 const TargetLowering &TLI) {
2481 MVT VT = Op.getValueType();
2482 SDValue lhs = Op.getOperand(0);
2483 SDValue rhs = Op.getOperand(1);
2484 SDValue trueval = Op.getOperand(2);
2485 SDValue falseval = Op.getOperand(3);
2486 SDValue condition = Op.getOperand(4);
2487 DebugLoc dl = Op.getDebugLoc();
2489 // NOTE: SELB's arguments: $rA, $rB, $mask
2491 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2492 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2493 // condition was true and 0s where the condition was false. Hence, the
2494 // arguments to SELB get reversed.
2496 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2497 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2498 // with another "cannot select select_cc" assert:
2500 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2501 TLI.getSetCCResultType(Op.getValueType()),
2502 lhs, rhs, condition);
2503 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2506 //! Custom lower ISD::TRUNCATE
2507 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2509 // Type to truncate to
2510 MVT VT = Op.getValueType();
2511 MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2512 MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2513 DebugLoc dl = Op.getDebugLoc();
2515 // Type to truncate from
2516 SDValue Op0 = Op.getOperand(0);
2517 MVT Op0VT = Op0.getValueType();
2519 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2520 // Create shuffle mask, least significant doubleword of quadword
2521 unsigned maskHigh = 0x08090a0b;
2522 unsigned maskLow = 0x0c0d0e0f;
2523 // Use a shuffle to perform the truncation
2524 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2525 DAG.getConstant(maskHigh, MVT::i32),
2526 DAG.getConstant(maskLow, MVT::i32),
2527 DAG.getConstant(maskHigh, MVT::i32),
2528 DAG.getConstant(maskLow, MVT::i32));
2530 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2531 Op0, Op0, shufMask);
2533 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2536 return SDValue(); // Leave the truncate unmolested
2539 //! Custom (target-specific) lowering entry point
2541 This is where LLVM's DAG selection process calls to do target-specific
2542 lowering of nodes.
2544 SDValue
2545 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2547 unsigned Opc = (unsigned) Op.getOpcode();
2548 MVT VT = Op.getValueType();
2550 switch (Opc) {
2551 default: {
2552 cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2553 cerr << "Op.getOpcode() = " << Opc << "\n";
2554 cerr << "*Op.getNode():\n";
2555 Op.getNode()->dump();
2556 abort();
2558 case ISD::LOAD:
2559 case ISD::EXTLOAD:
2560 case ISD::SEXTLOAD:
2561 case ISD::ZEXTLOAD:
2562 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2563 case ISD::STORE:
2564 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2565 case ISD::ConstantPool:
2566 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2567 case ISD::GlobalAddress:
2568 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2569 case ISD::JumpTable:
2570 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2571 case ISD::ConstantFP:
2572 return LowerConstantFP(Op, DAG);
2573 case ISD::FORMAL_ARGUMENTS:
2574 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2575 case ISD::CALL:
2576 return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2577 case ISD::RET:
2578 return LowerRET(Op, DAG, getTargetMachine());
2580 // i8, i64 math ops:
2581 case ISD::ADD:
2582 case ISD::SUB:
2583 case ISD::ROTR:
2584 case ISD::ROTL:
2585 case ISD::SRL:
2586 case ISD::SHL:
2587 case ISD::SRA: {
2588 if (VT == MVT::i8)
2589 return LowerI8Math(Op, DAG, Opc, *this);
2590 break;
2593 case ISD::FP_TO_SINT:
2594 case ISD::FP_TO_UINT:
2595 return LowerFP_TO_INT(Op, DAG, *this);
2597 case ISD::SINT_TO_FP:
2598 case ISD::UINT_TO_FP:
2599 return LowerINT_TO_FP(Op, DAG, *this);
2601 // Vector-related lowering.
2602 case ISD::BUILD_VECTOR:
2603 return LowerBUILD_VECTOR(Op, DAG);
2604 case ISD::SCALAR_TO_VECTOR:
2605 return LowerSCALAR_TO_VECTOR(Op, DAG);
2606 case ISD::VECTOR_SHUFFLE:
2607 return LowerVECTOR_SHUFFLE(Op, DAG);
2608 case ISD::EXTRACT_VECTOR_ELT:
2609 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2610 case ISD::INSERT_VECTOR_ELT:
2611 return LowerINSERT_VECTOR_ELT(Op, DAG);
2613 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2614 case ISD::AND:
2615 case ISD::OR:
2616 case ISD::XOR:
2617 return LowerByteImmed(Op, DAG);
2619 // Vector and i8 multiply:
2620 case ISD::MUL:
2621 if (VT == MVT::i8)
2622 return LowerI8Math(Op, DAG, Opc, *this);
2624 case ISD::CTPOP:
2625 return LowerCTPOP(Op, DAG);
2627 case ISD::SELECT_CC:
2628 return LowerSELECT_CC(Op, DAG, *this);
2630 case ISD::SETCC:
2631 return LowerSETCC(Op, DAG, *this);
2633 case ISD::TRUNCATE:
2634 return LowerTRUNCATE(Op, DAG);
2637 return SDValue();
2640 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2641 SmallVectorImpl<SDValue>&Results,
2642 SelectionDAG &DAG)
2644 #if 0
2645 unsigned Opc = (unsigned) N->getOpcode();
2646 MVT OpVT = N->getValueType(0);
2648 switch (Opc) {
2649 default: {
2650 cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2651 cerr << "Op.getOpcode() = " << Opc << "\n";
2652 cerr << "*Op.getNode():\n";
2653 N->dump();
2654 abort();
2655 /*NOTREACHED*/
2658 #endif
2660 /* Otherwise, return unchanged */
2663 //===----------------------------------------------------------------------===//
2664 // Target Optimization Hooks
2665 //===----------------------------------------------------------------------===//
2667 SDValue
2668 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2670 #if 0
2671 TargetMachine &TM = getTargetMachine();
2672 #endif
2673 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2674 SelectionDAG &DAG = DCI.DAG;
2675 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2676 MVT NodeVT = N->getValueType(0); // The node's value type
2677 MVT Op0VT = Op0.getValueType(); // The first operand's result
2678 SDValue Result; // Initially, empty result
2679 DebugLoc dl = N->getDebugLoc();
2681 switch (N->getOpcode()) {
2682 default: break;
2683 case ISD::ADD: {
2684 SDValue Op1 = N->getOperand(1);
2686 if (Op0.getOpcode() == SPUISD::IndirectAddr
2687 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2688 // Normalize the operands to reduce repeated code
2689 SDValue IndirectArg = Op0, AddArg = Op1;
2691 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2692 IndirectArg = Op1;
2693 AddArg = Op0;
2696 if (isa<ConstantSDNode>(AddArg)) {
2697 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2698 SDValue IndOp1 = IndirectArg.getOperand(1);
2700 if (CN0->isNullValue()) {
2701 // (add (SPUindirect <arg>, <arg>), 0) ->
2702 // (SPUindirect <arg>, <arg>)
2704 #if !defined(NDEBUG)
2705 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2706 cerr << "\n"
2707 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2708 << "With: (SPUindirect <arg>, <arg>)\n";
2710 #endif
2712 return IndirectArg;
2713 } else if (isa<ConstantSDNode>(IndOp1)) {
2714 // (add (SPUindirect <arg>, <const>), <const>) ->
2715 // (SPUindirect <arg>, <const + const>)
2716 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2717 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2718 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2720 #if !defined(NDEBUG)
2721 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2722 cerr << "\n"
2723 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2724 << "), " << CN0->getSExtValue() << ")\n"
2725 << "With: (SPUindirect <arg>, "
2726 << combinedConst << ")\n";
2728 #endif
2730 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2731 IndirectArg, combinedValue);
2735 break;
2737 case ISD::SIGN_EXTEND:
2738 case ISD::ZERO_EXTEND:
2739 case ISD::ANY_EXTEND: {
2740 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2741 // (any_extend (SPUextract_elt0 <arg>)) ->
2742 // (SPUextract_elt0 <arg>)
2743 // Types must match, however...
2744 #if !defined(NDEBUG)
2745 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2746 cerr << "\nReplace: ";
2747 N->dump(&DAG);
2748 cerr << "\nWith: ";
2749 Op0.getNode()->dump(&DAG);
2750 cerr << "\n";
2752 #endif
2754 return Op0;
2756 break;
2758 case SPUISD::IndirectAddr: {
2759 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2760 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2761 if (CN != 0 && CN->getZExtValue() == 0) {
2762 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2763 // (SPUaform <addr>, 0)
2765 DEBUG(cerr << "Replace: ");
2766 DEBUG(N->dump(&DAG));
2767 DEBUG(cerr << "\nWith: ");
2768 DEBUG(Op0.getNode()->dump(&DAG));
2769 DEBUG(cerr << "\n");
2771 return Op0;
2773 } else if (Op0.getOpcode() == ISD::ADD) {
2774 SDValue Op1 = N->getOperand(1);
2775 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2776 // (SPUindirect (add <arg>, <arg>), 0) ->
2777 // (SPUindirect <arg>, <arg>)
2778 if (CN1->isNullValue()) {
2780 #if !defined(NDEBUG)
2781 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2782 cerr << "\n"
2783 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2784 << "With: (SPUindirect <arg>, <arg>)\n";
2786 #endif
2788 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2789 Op0.getOperand(0), Op0.getOperand(1));
2793 break;
2795 case SPUISD::SHLQUAD_L_BITS:
2796 case SPUISD::SHLQUAD_L_BYTES:
2797 case SPUISD::VEC_SHL:
2798 case SPUISD::VEC_SRL:
2799 case SPUISD::VEC_SRA:
2800 case SPUISD::ROTBYTES_LEFT: {
2801 SDValue Op1 = N->getOperand(1);
2803 // Kill degenerate vector shifts:
2804 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2805 if (CN->isNullValue()) {
2806 Result = Op0;
2809 break;
2811 case SPUISD::PREFSLOT2VEC: {
2812 switch (Op0.getOpcode()) {
2813 default:
2814 break;
2815 case ISD::ANY_EXTEND:
2816 case ISD::ZERO_EXTEND:
2817 case ISD::SIGN_EXTEND: {
2818 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2819 // <arg>
2820 // but only if the SPUprefslot2vec and <arg> types match.
2821 SDValue Op00 = Op0.getOperand(0);
2822 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2823 SDValue Op000 = Op00.getOperand(0);
2824 if (Op000.getValueType() == NodeVT) {
2825 Result = Op000;
2828 break;
2830 case SPUISD::VEC2PREFSLOT: {
2831 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2832 // <arg>
2833 Result = Op0.getOperand(0);
2834 break;
2837 break;
2841 // Otherwise, return unchanged.
2842 #ifndef NDEBUG
2843 if (Result.getNode()) {
2844 DEBUG(cerr << "\nReplace.SPU: ");
2845 DEBUG(N->dump(&DAG));
2846 DEBUG(cerr << "\nWith: ");
2847 DEBUG(Result.getNode()->dump(&DAG));
2848 DEBUG(cerr << "\n");
2850 #endif
2852 return Result;
2855 //===----------------------------------------------------------------------===//
2856 // Inline Assembly Support
2857 //===----------------------------------------------------------------------===//
2859 /// getConstraintType - Given a constraint letter, return the type of
2860 /// constraint it is for this target.
2861 SPUTargetLowering::ConstraintType
2862 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2863 if (ConstraintLetter.size() == 1) {
2864 switch (ConstraintLetter[0]) {
2865 default: break;
2866 case 'b':
2867 case 'r':
2868 case 'f':
2869 case 'v':
2870 case 'y':
2871 return C_RegisterClass;
2874 return TargetLowering::getConstraintType(ConstraintLetter);
2877 std::pair<unsigned, const TargetRegisterClass*>
2878 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2879 MVT VT) const
2881 if (Constraint.size() == 1) {
2882 // GCC RS6000 Constraint Letters
2883 switch (Constraint[0]) {
2884 case 'b': // R1-R31
2885 case 'r': // R0-R31
2886 if (VT == MVT::i64)
2887 return std::make_pair(0U, SPU::R64CRegisterClass);
2888 return std::make_pair(0U, SPU::R32CRegisterClass);
2889 case 'f':
2890 if (VT == MVT::f32)
2891 return std::make_pair(0U, SPU::R32FPRegisterClass);
2892 else if (VT == MVT::f64)
2893 return std::make_pair(0U, SPU::R64FPRegisterClass);
2894 break;
2895 case 'v':
2896 return std::make_pair(0U, SPU::GPRCRegisterClass);
2900 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2903 //! Compute used/known bits for a SPU operand
2904 void
2905 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2906 const APInt &Mask,
2907 APInt &KnownZero,
2908 APInt &KnownOne,
2909 const SelectionDAG &DAG,
2910 unsigned Depth ) const {
2911 #if 0
2912 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2914 switch (Op.getOpcode()) {
2915 default:
2916 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2917 break;
2918 case CALL:
2919 case SHUFB:
2920 case SHUFFLE_MASK:
2921 case CNTB:
2922 case SPUISD::PREFSLOT2VEC:
2923 case SPUISD::LDRESULT:
2924 case SPUISD::VEC2PREFSLOT:
2925 case SPUISD::SHLQUAD_L_BITS:
2926 case SPUISD::SHLQUAD_L_BYTES:
2927 case SPUISD::VEC_SHL:
2928 case SPUISD::VEC_SRL:
2929 case SPUISD::VEC_SRA:
2930 case SPUISD::VEC_ROTL:
2931 case SPUISD::VEC_ROTR:
2932 case SPUISD::ROTBYTES_LEFT:
2933 case SPUISD::SELECT_MASK:
2934 case SPUISD::SELB:
2936 #endif
2939 unsigned
2940 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
2941 unsigned Depth) const {
2942 switch (Op.getOpcode()) {
2943 default:
2944 return 1;
2946 case ISD::SETCC: {
2947 MVT VT = Op.getValueType();
2949 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
2950 VT = MVT::i32;
2952 return VT.getSizeInBits();
2957 // LowerAsmOperandForConstraint
2958 void
2959 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
2960 char ConstraintLetter,
2961 bool hasMemory,
2962 std::vector<SDValue> &Ops,
2963 SelectionDAG &DAG) const {
2964 // Default, for the time being, to the base class handler
2965 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
2966 Ops, DAG);
2969 /// isLegalAddressImmediate - Return true if the integer value can be used
2970 /// as the offset of the target addressing mode.
2971 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
2972 const Type *Ty) const {
2973 // SPU's addresses are 256K:
2974 return (V > -(1 << 18) && V < (1 << 18) - 1);
2977 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
2978 return false;
2981 bool
2982 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
2983 // The SPU target isn't yet aware of offsets.
2984 return false;