remove a dead bool.
[llvm/avr.git] / lib / Target / CellSPU / SPUISelLowering.cpp
blobaaf07838fb683eacac0f55947e1d43c8323ef0af
1 //
2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/Constants.h"
19 #include "llvm/Function.h"
20 #include "llvm/Intrinsics.h"
21 #include "llvm/CallingConv.h"
22 #include "llvm/CodeGen/CallingConvLower.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineFunction.h"
25 #include "llvm/CodeGen/MachineInstrBuilder.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/Target/TargetLoweringObjectFile.h"
29 #include "llvm/Target/TargetOptions.h"
30 #include "llvm/ADT/VectorExtras.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/ErrorHandling.h"
33 #include "llvm/Support/MathExtras.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <map>
37 using namespace llvm;
39 // Used in getTargetNodeName() below
40 namespace {
41 std::map<unsigned, const char *> node_names;
43 //! EVT mapping to useful data for Cell SPU
44 struct valtype_map_s {
45 EVT valtype;
46 int prefslot_byte;
49 const valtype_map_s valtype_map[] = {
50 { MVT::i1, 3 },
51 { MVT::i8, 3 },
52 { MVT::i16, 2 },
53 { MVT::i32, 0 },
54 { MVT::f32, 0 },
55 { MVT::i64, 0 },
56 { MVT::f64, 0 },
57 { MVT::i128, 0 }
60 const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
62 const valtype_map_s *getValueTypeMapEntry(EVT VT) {
63 const valtype_map_s *retval = 0;
65 for (size_t i = 0; i < n_valtype_map; ++i) {
66 if (valtype_map[i].valtype == VT) {
67 retval = valtype_map + i;
68 break;
72 #ifndef NDEBUG
73 if (retval == 0) {
74 std::string msg;
75 raw_string_ostream Msg(msg);
76 Msg << "getValueTypeMapEntry returns NULL for "
77 << VT.getEVTString();
78 llvm_report_error(Msg.str());
80 #endif
82 return retval;
85 //! Expand a library call into an actual call DAG node
86 /*!
87 \note
88 This code is taken from SelectionDAGLegalize, since it is not exposed as
89 part of the LLVM SelectionDAG API.
92 SDValue
93 ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
94 bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
95 // The input chain to this libcall is the entry node of the function.
96 // Legalizing the call will automatically add the previous call to the
97 // dependence.
98 SDValue InChain = DAG.getEntryNode();
100 TargetLowering::ArgListTy Args;
101 TargetLowering::ArgListEntry Entry;
102 for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
103 EVT ArgVT = Op.getOperand(i).getValueType();
104 const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
105 Entry.Node = Op.getOperand(i);
106 Entry.Ty = ArgTy;
107 Entry.isSExt = isSigned;
108 Entry.isZExt = !isSigned;
109 Args.push_back(Entry);
111 SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
112 TLI.getPointerTy());
114 // Splice the libcall in wherever FindInputOutputChains tells us to.
115 const Type *RetTy =
116 Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
117 std::pair<SDValue, SDValue> CallInfo =
118 TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
119 0, TLI.getLibcallCallingConv(LC), false,
120 /*isReturnValueUsed=*/true,
121 Callee, Args, DAG,
122 Op.getDebugLoc());
124 return CallInfo.first;
128 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
129 : TargetLowering(TM, new TargetLoweringObjectFileELF()),
130 SPUTM(TM) {
131 // Fold away setcc operations if possible.
132 setPow2DivIsCheap();
134 // Use _setjmp/_longjmp instead of setjmp/longjmp.
135 setUseUnderscoreSetJmp(true);
136 setUseUnderscoreLongJmp(true);
138 // Set RTLIB libcall names as used by SPU:
139 setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
141 // Set up the SPU's register classes:
142 addRegisterClass(MVT::i8, SPU::R8CRegisterClass);
143 addRegisterClass(MVT::i16, SPU::R16CRegisterClass);
144 addRegisterClass(MVT::i32, SPU::R32CRegisterClass);
145 addRegisterClass(MVT::i64, SPU::R64CRegisterClass);
146 addRegisterClass(MVT::f32, SPU::R32FPRegisterClass);
147 addRegisterClass(MVT::f64, SPU::R64FPRegisterClass);
148 addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
150 // SPU has no sign or zero extended loads for i1, i8, i16:
151 setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
152 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
153 setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
155 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
156 setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand);
158 setTruncStoreAction(MVT::i128, MVT::i64, Expand);
159 setTruncStoreAction(MVT::i128, MVT::i32, Expand);
160 setTruncStoreAction(MVT::i128, MVT::i16, Expand);
161 setTruncStoreAction(MVT::i128, MVT::i8, Expand);
163 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
165 // SPU constant load actions are custom lowered:
166 setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
167 setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
169 // SPU's loads and stores have to be custom lowered:
170 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
171 ++sctype) {
172 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
174 setOperationAction(ISD::LOAD, VT, Custom);
175 setOperationAction(ISD::STORE, VT, Custom);
176 setLoadExtAction(ISD::EXTLOAD, VT, Custom);
177 setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
178 setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
180 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
181 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
182 setTruncStoreAction(VT, StoreVT, Expand);
186 for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
187 ++sctype) {
188 MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
190 setOperationAction(ISD::LOAD, VT, Custom);
191 setOperationAction(ISD::STORE, VT, Custom);
193 for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
194 MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
195 setTruncStoreAction(VT, StoreVT, Expand);
199 // Expand the jumptable branches
200 setOperationAction(ISD::BR_JT, MVT::Other, Expand);
201 setOperationAction(ISD::BR_CC, MVT::Other, Expand);
203 // Custom lower SELECT_CC for most cases, but expand by default
204 setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
205 setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
206 setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
207 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
208 setOperationAction(ISD::SELECT_CC, MVT::i64, Custom);
210 // SPU has no intrinsics for these particular operations:
211 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
213 // SPU has no division/remainder instructions
214 setOperationAction(ISD::SREM, MVT::i8, Expand);
215 setOperationAction(ISD::UREM, MVT::i8, Expand);
216 setOperationAction(ISD::SDIV, MVT::i8, Expand);
217 setOperationAction(ISD::UDIV, MVT::i8, Expand);
218 setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
219 setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
220 setOperationAction(ISD::SREM, MVT::i16, Expand);
221 setOperationAction(ISD::UREM, MVT::i16, Expand);
222 setOperationAction(ISD::SDIV, MVT::i16, Expand);
223 setOperationAction(ISD::UDIV, MVT::i16, Expand);
224 setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
225 setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
226 setOperationAction(ISD::SREM, MVT::i32, Expand);
227 setOperationAction(ISD::UREM, MVT::i32, Expand);
228 setOperationAction(ISD::SDIV, MVT::i32, Expand);
229 setOperationAction(ISD::UDIV, MVT::i32, Expand);
230 setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
231 setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
232 setOperationAction(ISD::SREM, MVT::i64, Expand);
233 setOperationAction(ISD::UREM, MVT::i64, Expand);
234 setOperationAction(ISD::SDIV, MVT::i64, Expand);
235 setOperationAction(ISD::UDIV, MVT::i64, Expand);
236 setOperationAction(ISD::SDIVREM, MVT::i64, Expand);
237 setOperationAction(ISD::UDIVREM, MVT::i64, Expand);
238 setOperationAction(ISD::SREM, MVT::i128, Expand);
239 setOperationAction(ISD::UREM, MVT::i128, Expand);
240 setOperationAction(ISD::SDIV, MVT::i128, Expand);
241 setOperationAction(ISD::UDIV, MVT::i128, Expand);
242 setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
243 setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
245 // We don't support sin/cos/sqrt/fmod
246 setOperationAction(ISD::FSIN , MVT::f64, Expand);
247 setOperationAction(ISD::FCOS , MVT::f64, Expand);
248 setOperationAction(ISD::FREM , MVT::f64, Expand);
249 setOperationAction(ISD::FSIN , MVT::f32, Expand);
250 setOperationAction(ISD::FCOS , MVT::f32, Expand);
251 setOperationAction(ISD::FREM , MVT::f32, Expand);
253 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
254 // for f32!)
255 setOperationAction(ISD::FSQRT, MVT::f64, Expand);
256 setOperationAction(ISD::FSQRT, MVT::f32, Expand);
258 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
259 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
261 // SPU can do rotate right and left, so legalize it... but customize for i8
262 // because instructions don't exist.
264 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
265 // .td files.
266 setOperationAction(ISD::ROTR, MVT::i32, Expand /*Legal*/);
267 setOperationAction(ISD::ROTR, MVT::i16, Expand /*Legal*/);
268 setOperationAction(ISD::ROTR, MVT::i8, Expand /*Custom*/);
270 setOperationAction(ISD::ROTL, MVT::i32, Legal);
271 setOperationAction(ISD::ROTL, MVT::i16, Legal);
272 setOperationAction(ISD::ROTL, MVT::i8, Custom);
274 // SPU has no native version of shift left/right for i8
275 setOperationAction(ISD::SHL, MVT::i8, Custom);
276 setOperationAction(ISD::SRL, MVT::i8, Custom);
277 setOperationAction(ISD::SRA, MVT::i8, Custom);
279 // Make these operations legal and handle them during instruction selection:
280 setOperationAction(ISD::SHL, MVT::i64, Legal);
281 setOperationAction(ISD::SRL, MVT::i64, Legal);
282 setOperationAction(ISD::SRA, MVT::i64, Legal);
284 // Custom lower i8, i32 and i64 multiplications
285 setOperationAction(ISD::MUL, MVT::i8, Custom);
286 setOperationAction(ISD::MUL, MVT::i32, Legal);
287 setOperationAction(ISD::MUL, MVT::i64, Legal);
289 // Expand double-width multiplication
290 // FIXME: It would probably be reasonable to support some of these operations
291 setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
292 setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
293 setOperationAction(ISD::MULHU, MVT::i8, Expand);
294 setOperationAction(ISD::MULHS, MVT::i8, Expand);
295 setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
296 setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
297 setOperationAction(ISD::MULHU, MVT::i16, Expand);
298 setOperationAction(ISD::MULHS, MVT::i16, Expand);
299 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
300 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
301 setOperationAction(ISD::MULHU, MVT::i32, Expand);
302 setOperationAction(ISD::MULHS, MVT::i32, Expand);
303 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
304 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
305 setOperationAction(ISD::MULHU, MVT::i64, Expand);
306 setOperationAction(ISD::MULHS, MVT::i64, Expand);
308 // Need to custom handle (some) common i8, i64 math ops
309 setOperationAction(ISD::ADD, MVT::i8, Custom);
310 setOperationAction(ISD::ADD, MVT::i64, Legal);
311 setOperationAction(ISD::SUB, MVT::i8, Custom);
312 setOperationAction(ISD::SUB, MVT::i64, Legal);
314 // SPU does not have BSWAP. It does have i32 support CTLZ.
315 // CTPOP has to be custom lowered.
316 setOperationAction(ISD::BSWAP, MVT::i32, Expand);
317 setOperationAction(ISD::BSWAP, MVT::i64, Expand);
319 setOperationAction(ISD::CTPOP, MVT::i8, Custom);
320 setOperationAction(ISD::CTPOP, MVT::i16, Custom);
321 setOperationAction(ISD::CTPOP, MVT::i32, Custom);
322 setOperationAction(ISD::CTPOP, MVT::i64, Custom);
323 setOperationAction(ISD::CTPOP, MVT::i128, Expand);
325 setOperationAction(ISD::CTTZ , MVT::i8, Expand);
326 setOperationAction(ISD::CTTZ , MVT::i16, Expand);
327 setOperationAction(ISD::CTTZ , MVT::i32, Expand);
328 setOperationAction(ISD::CTTZ , MVT::i64, Expand);
329 setOperationAction(ISD::CTTZ , MVT::i128, Expand);
331 setOperationAction(ISD::CTLZ , MVT::i8, Promote);
332 setOperationAction(ISD::CTLZ , MVT::i16, Promote);
333 setOperationAction(ISD::CTLZ , MVT::i32, Legal);
334 setOperationAction(ISD::CTLZ , MVT::i64, Expand);
335 setOperationAction(ISD::CTLZ , MVT::i128, Expand);
337 // SPU has a version of select that implements (a&~c)|(b&c), just like
338 // select ought to work:
339 setOperationAction(ISD::SELECT, MVT::i8, Legal);
340 setOperationAction(ISD::SELECT, MVT::i16, Legal);
341 setOperationAction(ISD::SELECT, MVT::i32, Legal);
342 setOperationAction(ISD::SELECT, MVT::i64, Legal);
344 setOperationAction(ISD::SETCC, MVT::i8, Legal);
345 setOperationAction(ISD::SETCC, MVT::i16, Legal);
346 setOperationAction(ISD::SETCC, MVT::i32, Legal);
347 setOperationAction(ISD::SETCC, MVT::i64, Legal);
348 setOperationAction(ISD::SETCC, MVT::f64, Custom);
350 // Custom lower i128 -> i64 truncates
351 setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
353 // Custom lower i32/i64 -> i128 sign extend
354 setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
356 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
357 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
358 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
359 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
360 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
361 // to expand to a libcall, hence the custom lowering:
362 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
363 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
364 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
365 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
366 setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
367 setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
369 // FDIV on SPU requires custom lowering
370 setOperationAction(ISD::FDIV, MVT::f64, Expand); // to libcall
372 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
373 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
374 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
375 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
376 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
377 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
378 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
379 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
380 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
382 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
383 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
384 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
385 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
387 // We cannot sextinreg(i1). Expand to shifts.
388 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
390 // Support label based line numbers.
391 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
392 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
394 // We want to legalize GlobalAddress and ConstantPool nodes into the
395 // appropriate instructions to materialize the address.
396 for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
397 ++sctype) {
398 MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
400 setOperationAction(ISD::GlobalAddress, VT, Custom);
401 setOperationAction(ISD::ConstantPool, VT, Custom);
402 setOperationAction(ISD::JumpTable, VT, Custom);
405 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
406 setOperationAction(ISD::VASTART , MVT::Other, Custom);
408 // Use the default implementation.
409 setOperationAction(ISD::VAARG , MVT::Other, Expand);
410 setOperationAction(ISD::VACOPY , MVT::Other, Expand);
411 setOperationAction(ISD::VAEND , MVT::Other, Expand);
412 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand);
413 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand);
414 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand);
415 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Expand);
417 // Cell SPU has instructions for converting between i64 and fp.
418 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
419 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
421 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
422 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
424 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
425 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
427 // First set operation action for all vector types to expand. Then we
428 // will selectively turn on ones that can be effectively codegen'd.
429 addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
430 addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
431 addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
432 addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
433 addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
434 addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
436 // "Odd size" vector classes that we're willing to support:
437 addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
439 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
440 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
441 MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
443 // add/sub are legal for all supported vector VT's.
444 setOperationAction(ISD::ADD, VT, Legal);
445 setOperationAction(ISD::SUB, VT, Legal);
446 // mul has to be custom lowered.
447 setOperationAction(ISD::MUL, VT, Legal);
449 setOperationAction(ISD::AND, VT, Legal);
450 setOperationAction(ISD::OR, VT, Legal);
451 setOperationAction(ISD::XOR, VT, Legal);
452 setOperationAction(ISD::LOAD, VT, Legal);
453 setOperationAction(ISD::SELECT, VT, Legal);
454 setOperationAction(ISD::STORE, VT, Legal);
456 // These operations need to be expanded:
457 setOperationAction(ISD::SDIV, VT, Expand);
458 setOperationAction(ISD::SREM, VT, Expand);
459 setOperationAction(ISD::UDIV, VT, Expand);
460 setOperationAction(ISD::UREM, VT, Expand);
462 // Custom lower build_vector, constant pool spills, insert and
463 // extract vector elements:
464 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
465 setOperationAction(ISD::ConstantPool, VT, Custom);
466 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
467 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
468 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
469 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
472 setOperationAction(ISD::AND, MVT::v16i8, Custom);
473 setOperationAction(ISD::OR, MVT::v16i8, Custom);
474 setOperationAction(ISD::XOR, MVT::v16i8, Custom);
475 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
477 setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
479 setShiftAmountType(MVT::i32);
480 setBooleanContents(ZeroOrNegativeOneBooleanContent);
482 setStackPointerRegisterToSaveRestore(SPU::R1);
484 // We have target-specific dag combine patterns for the following nodes:
485 setTargetDAGCombine(ISD::ADD);
486 setTargetDAGCombine(ISD::ZERO_EXTEND);
487 setTargetDAGCombine(ISD::SIGN_EXTEND);
488 setTargetDAGCombine(ISD::ANY_EXTEND);
490 computeRegisterProperties();
492 // Set pre-RA register scheduler default to BURR, which produces slightly
493 // better code than the default (could also be TDRR, but TargetLowering.h
494 // needs a mod to support that model):
495 setSchedulingPreference(SchedulingForRegPressure);
498 const char *
499 SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
501 if (node_names.empty()) {
502 node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
503 node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
504 node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
505 node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
506 node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
507 node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
508 node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
509 node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
510 node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
511 node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
512 node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
513 node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
514 node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
515 node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
516 node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
517 node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
518 node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
519 node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
520 node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
521 "SPUISD::ROTBYTES_LEFT_BITS";
522 node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
523 node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
524 node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
525 node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
526 node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
529 std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
531 return ((i != node_names.end()) ? i->second : 0);
534 /// getFunctionAlignment - Return the Log2 alignment of this function.
535 unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
536 return 3;
539 //===----------------------------------------------------------------------===//
540 // Return the Cell SPU's SETCC result type
541 //===----------------------------------------------------------------------===//
543 MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
544 // i16 and i32 are valid SETCC result types
545 return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
546 VT.getSimpleVT().SimpleTy :
547 MVT::i32);
550 //===----------------------------------------------------------------------===//
551 // Calling convention code:
552 //===----------------------------------------------------------------------===//
554 #include "SPUGenCallingConv.inc"
556 //===----------------------------------------------------------------------===//
557 // LowerOperation implementation
558 //===----------------------------------------------------------------------===//
560 /// Custom lower loads for CellSPU
562 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
563 within a 16-byte block, we have to rotate to extract the requested element.
565 For extending loads, we also want to ensure that the following sequence is
566 emitted, e.g. for MVT::f32 extending load to MVT::f64:
568 \verbatim
569 %1 v16i8,ch = load
570 %2 v16i8,ch = rotate %1
571 %3 v4f8, ch = bitconvert %2
572 %4 f32 = vec2perfslot %3
573 %5 f64 = fp_extend %4
574 \endverbatim
576 static SDValue
577 LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
578 LoadSDNode *LN = cast<LoadSDNode>(Op);
579 SDValue the_chain = LN->getChain();
580 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
581 EVT InVT = LN->getMemoryVT();
582 EVT OutVT = Op.getValueType();
583 ISD::LoadExtType ExtType = LN->getExtensionType();
584 unsigned alignment = LN->getAlignment();
585 const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
586 DebugLoc dl = Op.getDebugLoc();
588 switch (LN->getAddressingMode()) {
589 case ISD::UNINDEXED: {
590 SDValue result;
591 SDValue basePtr = LN->getBasePtr();
592 SDValue rotate;
594 if (alignment == 16) {
595 ConstantSDNode *CN;
597 // Special cases for a known aligned load to simplify the base pointer
598 // and the rotation amount:
599 if (basePtr.getOpcode() == ISD::ADD
600 && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
601 // Known offset into basePtr
602 int64_t offset = CN->getSExtValue();
603 int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
605 if (rotamt < 0)
606 rotamt += 16;
608 rotate = DAG.getConstant(rotamt, MVT::i16);
610 // Simplify the base pointer for this case:
611 basePtr = basePtr.getOperand(0);
612 if ((offset & ~0xf) > 0) {
613 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
614 basePtr,
615 DAG.getConstant((offset & ~0xf), PtrVT));
617 } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
618 || (basePtr.getOpcode() == SPUISD::IndirectAddr
619 && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
620 && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
621 // Plain aligned a-form address: rotate into preferred slot
622 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
623 int64_t rotamt = -vtm->prefslot_byte;
624 if (rotamt < 0)
625 rotamt += 16;
626 rotate = DAG.getConstant(rotamt, MVT::i16);
627 } else {
628 // Offset the rotate amount by the basePtr and the preferred slot
629 // byte offset
630 int64_t rotamt = -vtm->prefslot_byte;
631 if (rotamt < 0)
632 rotamt += 16;
633 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
634 basePtr,
635 DAG.getConstant(rotamt, PtrVT));
637 } else {
638 // Unaligned load: must be more pessimistic about addressing modes:
639 if (basePtr.getOpcode() == ISD::ADD) {
640 MachineFunction &MF = DAG.getMachineFunction();
641 MachineRegisterInfo &RegInfo = MF.getRegInfo();
642 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
643 SDValue Flag;
645 SDValue Op0 = basePtr.getOperand(0);
646 SDValue Op1 = basePtr.getOperand(1);
648 if (isa<ConstantSDNode>(Op1)) {
649 // Convert the (add <ptr>, <const>) to an indirect address contained
650 // in a register. Note that this is done because we need to avoid
651 // creating a 0(reg) d-form address due to the SPU's block loads.
652 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
653 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
654 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
655 } else {
656 // Convert the (add <arg1>, <arg2>) to an indirect address, which
657 // will likely be lowered as a reg(reg) x-form address.
658 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
660 } else {
661 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
662 basePtr,
663 DAG.getConstant(0, PtrVT));
666 // Offset the rotate amount by the basePtr and the preferred slot
667 // byte offset
668 rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
669 basePtr,
670 DAG.getConstant(-vtm->prefslot_byte, PtrVT));
673 // Re-emit as a v16i8 vector load
674 result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
675 LN->getSrcValue(), LN->getSrcValueOffset(),
676 LN->isVolatile(), 16);
678 // Update the chain
679 the_chain = result.getValue(1);
681 // Rotate into the preferred slot:
682 result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
683 result.getValue(0), rotate);
685 // Convert the loaded v16i8 vector to the appropriate vector type
686 // specified by the operand:
687 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
688 InVT, (128 / InVT.getSizeInBits()));
689 result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
690 DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
692 // Handle extending loads by extending the scalar result:
693 if (ExtType == ISD::SEXTLOAD) {
694 result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
695 } else if (ExtType == ISD::ZEXTLOAD) {
696 result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
697 } else if (ExtType == ISD::EXTLOAD) {
698 unsigned NewOpc = ISD::ANY_EXTEND;
700 if (OutVT.isFloatingPoint())
701 NewOpc = ISD::FP_EXTEND;
703 result = DAG.getNode(NewOpc, dl, OutVT, result);
706 SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
707 SDValue retops[2] = {
708 result,
709 the_chain
712 result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
713 retops, sizeof(retops) / sizeof(retops[0]));
714 return result;
716 case ISD::PRE_INC:
717 case ISD::PRE_DEC:
718 case ISD::POST_INC:
719 case ISD::POST_DEC:
720 case ISD::LAST_INDEXED_MODE:
722 std::string msg;
723 raw_string_ostream Msg(msg);
724 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
725 "UNINDEXED\n";
726 Msg << (unsigned) LN->getAddressingMode();
727 llvm_report_error(Msg.str());
728 /*NOTREACHED*/
732 return SDValue();
735 /// Custom lower stores for CellSPU
737 All CellSPU stores are aligned to 16-byte boundaries, so for elements
738 within a 16-byte block, we have to generate a shuffle to insert the
739 requested element into its place, then store the resulting block.
741 static SDValue
742 LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
743 StoreSDNode *SN = cast<StoreSDNode>(Op);
744 SDValue Value = SN->getValue();
745 EVT VT = Value.getValueType();
746 EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
747 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
748 DebugLoc dl = Op.getDebugLoc();
749 unsigned alignment = SN->getAlignment();
751 switch (SN->getAddressingMode()) {
752 case ISD::UNINDEXED: {
753 // The vector type we really want to load from the 16-byte chunk.
754 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
755 VT, (128 / VT.getSizeInBits())),
756 stVecVT = EVT::getVectorVT(*DAG.getContext(),
757 StVT, (128 / StVT.getSizeInBits()));
759 SDValue alignLoadVec;
760 SDValue basePtr = SN->getBasePtr();
761 SDValue the_chain = SN->getChain();
762 SDValue insertEltOffs;
764 if (alignment == 16) {
765 ConstantSDNode *CN;
767 // Special cases for a known aligned load to simplify the base pointer
768 // and insertion byte:
769 if (basePtr.getOpcode() == ISD::ADD
770 && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
771 // Known offset into basePtr
772 int64_t offset = CN->getSExtValue();
774 // Simplify the base pointer for this case:
775 basePtr = basePtr.getOperand(0);
776 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
777 basePtr,
778 DAG.getConstant((offset & 0xf), PtrVT));
780 if ((offset & ~0xf) > 0) {
781 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
782 basePtr,
783 DAG.getConstant((offset & ~0xf), PtrVT));
785 } else {
786 // Otherwise, assume it's at byte 0 of basePtr
787 insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
788 basePtr,
789 DAG.getConstant(0, PtrVT));
791 } else {
792 // Unaligned load: must be more pessimistic about addressing modes:
793 if (basePtr.getOpcode() == ISD::ADD) {
794 MachineFunction &MF = DAG.getMachineFunction();
795 MachineRegisterInfo &RegInfo = MF.getRegInfo();
796 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
797 SDValue Flag;
799 SDValue Op0 = basePtr.getOperand(0);
800 SDValue Op1 = basePtr.getOperand(1);
802 if (isa<ConstantSDNode>(Op1)) {
803 // Convert the (add <ptr>, <const>) to an indirect address contained
804 // in a register. Note that this is done because we need to avoid
805 // creating a 0(reg) d-form address due to the SPU's block loads.
806 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
807 the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
808 basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
809 } else {
810 // Convert the (add <arg1>, <arg2>) to an indirect address, which
811 // will likely be lowered as a reg(reg) x-form address.
812 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
814 } else {
815 basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
816 basePtr,
817 DAG.getConstant(0, PtrVT));
820 // Insertion point is solely determined by basePtr's contents
821 insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
822 basePtr,
823 DAG.getConstant(0, PtrVT));
826 // Re-emit as a v16i8 vector load
827 alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
828 SN->getSrcValue(), SN->getSrcValueOffset(),
829 SN->isVolatile(), 16);
831 // Update the chain
832 the_chain = alignLoadVec.getValue(1);
834 LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
835 SDValue theValue = SN->getValue();
836 SDValue result;
838 if (StVT != VT
839 && (theValue.getOpcode() == ISD::AssertZext
840 || theValue.getOpcode() == ISD::AssertSext)) {
841 // Drill down and get the value for zero- and sign-extended
842 // quantities
843 theValue = theValue.getOperand(0);
846 // If the base pointer is already a D-form address, then just create
847 // a new D-form address with a slot offset and the orignal base pointer.
848 // Otherwise generate a D-form address with the slot offset relative
849 // to the stack pointer, which is always aligned.
850 #if !defined(NDEBUG)
851 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
852 errs() << "CellSPU LowerSTORE: basePtr = ";
853 basePtr.getNode()->dump(&DAG);
854 errs() << "\n";
856 #endif
858 SDValue insertEltOp =
859 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
860 SDValue vectorizeOp =
861 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
863 result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
864 vectorizeOp, alignLoadVec,
865 DAG.getNode(ISD::BIT_CONVERT, dl,
866 MVT::v4i32, insertEltOp));
868 result = DAG.getStore(the_chain, dl, result, basePtr,
869 LN->getSrcValue(), LN->getSrcValueOffset(),
870 LN->isVolatile(), LN->getAlignment());
872 #if 0 && !defined(NDEBUG)
873 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
874 const SDValue &currentRoot = DAG.getRoot();
876 DAG.setRoot(result);
877 errs() << "------- CellSPU:LowerStore result:\n";
878 DAG.dump();
879 errs() << "-------\n";
880 DAG.setRoot(currentRoot);
882 #endif
884 return result;
885 /*UNREACHED*/
887 case ISD::PRE_INC:
888 case ISD::PRE_DEC:
889 case ISD::POST_INC:
890 case ISD::POST_DEC:
891 case ISD::LAST_INDEXED_MODE:
893 std::string msg;
894 raw_string_ostream Msg(msg);
895 Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
896 "UNINDEXED\n";
897 Msg << (unsigned) SN->getAddressingMode();
898 llvm_report_error(Msg.str());
899 /*NOTREACHED*/
903 return SDValue();
906 //! Generate the address of a constant pool entry.
907 static SDValue
908 LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
909 EVT PtrVT = Op.getValueType();
910 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
911 Constant *C = CP->getConstVal();
912 SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
913 SDValue Zero = DAG.getConstant(0, PtrVT);
914 const TargetMachine &TM = DAG.getTarget();
915 // FIXME there is no actual debug info here
916 DebugLoc dl = Op.getDebugLoc();
918 if (TM.getRelocationModel() == Reloc::Static) {
919 if (!ST->usingLargeMem()) {
920 // Just return the SDValue with the constant pool address in it.
921 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
922 } else {
923 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
924 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
925 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
929 llvm_unreachable("LowerConstantPool: Relocation model other than static"
930 " not supported.");
931 return SDValue();
934 //! Alternate entry point for generating the address of a constant pool entry
935 SDValue
936 SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
937 return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
940 static SDValue
941 LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
942 EVT PtrVT = Op.getValueType();
943 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
944 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
945 SDValue Zero = DAG.getConstant(0, PtrVT);
946 const TargetMachine &TM = DAG.getTarget();
947 // FIXME there is no actual debug info here
948 DebugLoc dl = Op.getDebugLoc();
950 if (TM.getRelocationModel() == Reloc::Static) {
951 if (!ST->usingLargeMem()) {
952 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
953 } else {
954 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
955 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
956 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
960 llvm_unreachable("LowerJumpTable: Relocation model other than static"
961 " not supported.");
962 return SDValue();
965 static SDValue
966 LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
967 EVT PtrVT = Op.getValueType();
968 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
969 GlobalValue *GV = GSDN->getGlobal();
970 SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
971 const TargetMachine &TM = DAG.getTarget();
972 SDValue Zero = DAG.getConstant(0, PtrVT);
973 // FIXME there is no actual debug info here
974 DebugLoc dl = Op.getDebugLoc();
976 if (TM.getRelocationModel() == Reloc::Static) {
977 if (!ST->usingLargeMem()) {
978 return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
979 } else {
980 SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
981 SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
982 return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
984 } else {
985 llvm_report_error("LowerGlobalAddress: Relocation model other than static"
986 "not supported.");
987 /*NOTREACHED*/
990 return SDValue();
993 //! Custom lower double precision floating point constants
994 static SDValue
995 LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
996 EVT VT = Op.getValueType();
997 // FIXME there is no actual debug info here
998 DebugLoc dl = Op.getDebugLoc();
1000 if (VT == MVT::f64) {
1001 ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1003 assert((FP != 0) &&
1004 "LowerConstantFP: Node is not ConstantFPSDNode");
1006 uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1007 SDValue T = DAG.getConstant(dbits, MVT::i64);
1008 SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1009 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1010 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1013 return SDValue();
1016 SDValue
1017 SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1018 CallingConv::ID CallConv, bool isVarArg,
1019 const SmallVectorImpl<ISD::InputArg>
1020 &Ins,
1021 DebugLoc dl, SelectionDAG &DAG,
1022 SmallVectorImpl<SDValue> &InVals) {
1024 MachineFunction &MF = DAG.getMachineFunction();
1025 MachineFrameInfo *MFI = MF.getFrameInfo();
1026 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1028 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1029 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1031 unsigned ArgOffset = SPUFrameInfo::minStackSize();
1032 unsigned ArgRegIdx = 0;
1033 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1035 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1037 // Add DAG nodes to load the arguments or copy them out of registers.
1038 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1039 EVT ObjectVT = Ins[ArgNo].VT;
1040 unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1041 SDValue ArgVal;
1043 if (ArgRegIdx < NumArgRegs) {
1044 const TargetRegisterClass *ArgRegClass;
1046 switch (ObjectVT.getSimpleVT().SimpleTy) {
1047 default: {
1048 std::string msg;
1049 raw_string_ostream Msg(msg);
1050 Msg << "LowerFormalArguments Unhandled argument type: "
1051 << ObjectVT.getEVTString();
1052 llvm_report_error(Msg.str());
1054 case MVT::i8:
1055 ArgRegClass = &SPU::R8CRegClass;
1056 break;
1057 case MVT::i16:
1058 ArgRegClass = &SPU::R16CRegClass;
1059 break;
1060 case MVT::i32:
1061 ArgRegClass = &SPU::R32CRegClass;
1062 break;
1063 case MVT::i64:
1064 ArgRegClass = &SPU::R64CRegClass;
1065 break;
1066 case MVT::i128:
1067 ArgRegClass = &SPU::GPRCRegClass;
1068 break;
1069 case MVT::f32:
1070 ArgRegClass = &SPU::R32FPRegClass;
1071 break;
1072 case MVT::f64:
1073 ArgRegClass = &SPU::R64FPRegClass;
1074 break;
1075 case MVT::v2f64:
1076 case MVT::v4f32:
1077 case MVT::v2i64:
1078 case MVT::v4i32:
1079 case MVT::v8i16:
1080 case MVT::v16i8:
1081 ArgRegClass = &SPU::VECREGRegClass;
1082 break;
1085 unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1086 RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1087 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1088 ++ArgRegIdx;
1089 } else {
1090 // We need to load the argument to a virtual register if we determined
1091 // above that we ran out of physical registers of the appropriate type
1092 // or we're forced to do vararg
1093 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1094 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1095 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1096 ArgOffset += StackSlotSize;
1099 InVals.push_back(ArgVal);
1100 // Update the chain
1101 Chain = ArgVal.getOperand(0);
1104 // vararg handling:
1105 if (isVarArg) {
1106 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1107 // We will spill (79-3)+1 registers to the stack
1108 SmallVector<SDValue, 79-3+1> MemOps;
1110 // Create the frame slot
1112 for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1113 VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1114 SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1115 SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1116 SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1117 Chain = Store.getOperand(0);
1118 MemOps.push_back(Store);
1120 // Increment address by stack slot size for the next stored argument
1121 ArgOffset += StackSlotSize;
1123 if (!MemOps.empty())
1124 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1125 &MemOps[0], MemOps.size());
1128 return Chain;
1131 /// isLSAAddress - Return the immediate to use if the specified
1132 /// value is representable as a LSA address.
1133 static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1134 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1135 if (!C) return 0;
1137 int Addr = C->getZExtValue();
1138 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
1139 (Addr << 14 >> 14) != Addr)
1140 return 0; // Top 14 bits have to be sext of immediate.
1142 return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1145 SDValue
1146 SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1147 CallingConv::ID CallConv, bool isVarArg,
1148 bool isTailCall,
1149 const SmallVectorImpl<ISD::OutputArg> &Outs,
1150 const SmallVectorImpl<ISD::InputArg> &Ins,
1151 DebugLoc dl, SelectionDAG &DAG,
1152 SmallVectorImpl<SDValue> &InVals) {
1154 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1155 unsigned NumOps = Outs.size();
1156 unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1157 const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1158 const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1160 // Handy pointer type
1161 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1163 // Accumulate how many bytes are to be pushed on the stack, including the
1164 // linkage area, and parameter passing area. According to the SPU ABI,
1165 // we minimally need space for [LR] and [SP]
1166 unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1168 // Set up a copy of the stack pointer for use loading and storing any
1169 // arguments that may not fit in the registers available for argument
1170 // passing.
1171 SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1173 // Figure out which arguments are going to go in registers, and which in
1174 // memory.
1175 unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1176 unsigned ArgRegIdx = 0;
1178 // Keep track of registers passing arguments
1179 std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1180 // And the arguments passed on the stack
1181 SmallVector<SDValue, 8> MemOpChains;
1183 for (unsigned i = 0; i != NumOps; ++i) {
1184 SDValue Arg = Outs[i].Val;
1186 // PtrOff will be used to store the current argument to the stack if a
1187 // register cannot be found for it.
1188 SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1189 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1191 switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1192 default: llvm_unreachable("Unexpected ValueType for argument!");
1193 case MVT::i8:
1194 case MVT::i16:
1195 case MVT::i32:
1196 case MVT::i64:
1197 case MVT::i128:
1198 if (ArgRegIdx != NumArgRegs) {
1199 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1200 } else {
1201 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1202 ArgOffset += StackSlotSize;
1204 break;
1205 case MVT::f32:
1206 case MVT::f64:
1207 if (ArgRegIdx != NumArgRegs) {
1208 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1209 } else {
1210 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1211 ArgOffset += StackSlotSize;
1213 break;
1214 case MVT::v2i64:
1215 case MVT::v2f64:
1216 case MVT::v4f32:
1217 case MVT::v4i32:
1218 case MVT::v8i16:
1219 case MVT::v16i8:
1220 if (ArgRegIdx != NumArgRegs) {
1221 RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1222 } else {
1223 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1224 ArgOffset += StackSlotSize;
1226 break;
1230 // Update number of stack bytes actually used, insert a call sequence start
1231 NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1232 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1233 true));
1235 if (!MemOpChains.empty()) {
1236 // Adjust the stack pointer for the stack arguments.
1237 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1238 &MemOpChains[0], MemOpChains.size());
1241 // Build a sequence of copy-to-reg nodes chained together with token chain
1242 // and flag operands which copy the outgoing args into the appropriate regs.
1243 SDValue InFlag;
1244 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1245 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1246 RegsToPass[i].second, InFlag);
1247 InFlag = Chain.getValue(1);
1250 SmallVector<SDValue, 8> Ops;
1251 unsigned CallOpc = SPUISD::CALL;
1253 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1254 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1255 // node so that legalize doesn't hack it.
1256 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1257 GlobalValue *GV = G->getGlobal();
1258 EVT CalleeVT = Callee.getValueType();
1259 SDValue Zero = DAG.getConstant(0, PtrVT);
1260 SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1262 if (!ST->usingLargeMem()) {
1263 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1264 // style calls, otherwise, external symbols are BRASL calls. This assumes
1265 // that declared/defined symbols are in the same compilation unit and can
1266 // be reached through PC-relative jumps.
1268 // NOTE:
1269 // This may be an unsafe assumption for JIT and really large compilation
1270 // units.
1271 if (GV->isDeclaration()) {
1272 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1273 } else {
1274 Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1276 } else {
1277 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1278 // address pairs:
1279 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1281 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1282 EVT CalleeVT = Callee.getValueType();
1283 SDValue Zero = DAG.getConstant(0, PtrVT);
1284 SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1285 Callee.getValueType());
1287 if (!ST->usingLargeMem()) {
1288 Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1289 } else {
1290 Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1292 } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1293 // If this is an absolute destination address that appears to be a legal
1294 // local store address, use the munged value.
1295 Callee = SDValue(Dest, 0);
1298 Ops.push_back(Chain);
1299 Ops.push_back(Callee);
1301 // Add argument registers to the end of the list so that they are known live
1302 // into the call.
1303 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1304 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1305 RegsToPass[i].second.getValueType()));
1307 if (InFlag.getNode())
1308 Ops.push_back(InFlag);
1309 // Returns a chain and a flag for retval copy to use.
1310 Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1311 &Ops[0], Ops.size());
1312 InFlag = Chain.getValue(1);
1314 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1315 DAG.getIntPtrConstant(0, true), InFlag);
1316 if (!Ins.empty())
1317 InFlag = Chain.getValue(1);
1319 // If the function returns void, just return the chain.
1320 if (Ins.empty())
1321 return Chain;
1323 // If the call has results, copy the values out of the ret val registers.
1324 switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1325 default: llvm_unreachable("Unexpected ret value!");
1326 case MVT::Other: break;
1327 case MVT::i32:
1328 if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1329 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1330 MVT::i32, InFlag).getValue(1);
1331 InVals.push_back(Chain.getValue(0));
1332 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1333 Chain.getValue(2)).getValue(1);
1334 InVals.push_back(Chain.getValue(0));
1335 } else {
1336 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1337 InFlag).getValue(1);
1338 InVals.push_back(Chain.getValue(0));
1340 break;
1341 case MVT::i64:
1342 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1343 InFlag).getValue(1);
1344 InVals.push_back(Chain.getValue(0));
1345 break;
1346 case MVT::i128:
1347 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1348 InFlag).getValue(1);
1349 InVals.push_back(Chain.getValue(0));
1350 break;
1351 case MVT::f32:
1352 case MVT::f64:
1353 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1354 InFlag).getValue(1);
1355 InVals.push_back(Chain.getValue(0));
1356 break;
1357 case MVT::v2f64:
1358 case MVT::v2i64:
1359 case MVT::v4f32:
1360 case MVT::v4i32:
1361 case MVT::v8i16:
1362 case MVT::v16i8:
1363 Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1364 InFlag).getValue(1);
1365 InVals.push_back(Chain.getValue(0));
1366 break;
1369 return Chain;
1372 SDValue
1373 SPUTargetLowering::LowerReturn(SDValue Chain,
1374 CallingConv::ID CallConv, bool isVarArg,
1375 const SmallVectorImpl<ISD::OutputArg> &Outs,
1376 DebugLoc dl, SelectionDAG &DAG) {
1378 SmallVector<CCValAssign, 16> RVLocs;
1379 CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1380 RVLocs, *DAG.getContext());
1381 CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1383 // If this is the first return lowered for this function, add the regs to the
1384 // liveout set for the function.
1385 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1386 for (unsigned i = 0; i != RVLocs.size(); ++i)
1387 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1390 SDValue Flag;
1392 // Copy the result values into the output registers.
1393 for (unsigned i = 0; i != RVLocs.size(); ++i) {
1394 CCValAssign &VA = RVLocs[i];
1395 assert(VA.isRegLoc() && "Can only return in registers!");
1396 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1397 Outs[i].Val, Flag);
1398 Flag = Chain.getValue(1);
1401 if (Flag.getNode())
1402 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1403 else
1404 return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1408 //===----------------------------------------------------------------------===//
1409 // Vector related lowering:
1410 //===----------------------------------------------------------------------===//
1412 static ConstantSDNode *
1413 getVecImm(SDNode *N) {
1414 SDValue OpVal(0, 0);
1416 // Check to see if this buildvec has a single non-undef value in its elements.
1417 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1418 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1419 if (OpVal.getNode() == 0)
1420 OpVal = N->getOperand(i);
1421 else if (OpVal != N->getOperand(i))
1422 return 0;
1425 if (OpVal.getNode() != 0) {
1426 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1427 return CN;
1431 return 0;
1434 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1435 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1436 /// constant
1437 SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1438 EVT ValueType) {
1439 if (ConstantSDNode *CN = getVecImm(N)) {
1440 uint64_t Value = CN->getZExtValue();
1441 if (ValueType == MVT::i64) {
1442 uint64_t UValue = CN->getZExtValue();
1443 uint32_t upper = uint32_t(UValue >> 32);
1444 uint32_t lower = uint32_t(UValue);
1445 if (upper != lower)
1446 return SDValue();
1447 Value = Value >> 32;
1449 if (Value <= 0x3ffff)
1450 return DAG.getTargetConstant(Value, ValueType);
1453 return SDValue();
1456 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1457 /// and the value fits into a signed 16-bit constant, and if so, return the
1458 /// constant
1459 SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1460 EVT ValueType) {
1461 if (ConstantSDNode *CN = getVecImm(N)) {
1462 int64_t Value = CN->getSExtValue();
1463 if (ValueType == MVT::i64) {
1464 uint64_t UValue = CN->getZExtValue();
1465 uint32_t upper = uint32_t(UValue >> 32);
1466 uint32_t lower = uint32_t(UValue);
1467 if (upper != lower)
1468 return SDValue();
1469 Value = Value >> 32;
1471 if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1472 return DAG.getTargetConstant(Value, ValueType);
1476 return SDValue();
1479 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1480 /// and the value fits into a signed 10-bit constant, and if so, return the
1481 /// constant
1482 SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1483 EVT ValueType) {
1484 if (ConstantSDNode *CN = getVecImm(N)) {
1485 int64_t Value = CN->getSExtValue();
1486 if (ValueType == MVT::i64) {
1487 uint64_t UValue = CN->getZExtValue();
1488 uint32_t upper = uint32_t(UValue >> 32);
1489 uint32_t lower = uint32_t(UValue);
1490 if (upper != lower)
1491 return SDValue();
1492 Value = Value >> 32;
1494 if (isS10Constant(Value))
1495 return DAG.getTargetConstant(Value, ValueType);
1498 return SDValue();
1501 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1502 /// and the value fits into a signed 8-bit constant, and if so, return the
1503 /// constant.
1505 /// @note: The incoming vector is v16i8 because that's the only way we can load
1506 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1507 /// same value.
1508 SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1509 EVT ValueType) {
1510 if (ConstantSDNode *CN = getVecImm(N)) {
1511 int Value = (int) CN->getZExtValue();
1512 if (ValueType == MVT::i16
1513 && Value <= 0xffff /* truncated from uint64_t */
1514 && ((short) Value >> 8) == ((short) Value & 0xff))
1515 return DAG.getTargetConstant(Value & 0xff, ValueType);
1516 else if (ValueType == MVT::i8
1517 && (Value & 0xff) == Value)
1518 return DAG.getTargetConstant(Value, ValueType);
1521 return SDValue();
1524 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1525 /// and the value fits into a signed 16-bit constant, and if so, return the
1526 /// constant
1527 SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1528 EVT ValueType) {
1529 if (ConstantSDNode *CN = getVecImm(N)) {
1530 uint64_t Value = CN->getZExtValue();
1531 if ((ValueType == MVT::i32
1532 && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1533 || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1534 return DAG.getTargetConstant(Value >> 16, ValueType);
1537 return SDValue();
1540 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1541 SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1542 if (ConstantSDNode *CN = getVecImm(N)) {
1543 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1546 return SDValue();
1549 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1550 SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1551 if (ConstantSDNode *CN = getVecImm(N)) {
1552 return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1555 return SDValue();
1558 //! Lower a BUILD_VECTOR instruction creatively:
1559 static SDValue
1560 LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1561 EVT VT = Op.getValueType();
1562 EVT EltVT = VT.getVectorElementType();
1563 DebugLoc dl = Op.getDebugLoc();
1564 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1565 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1566 unsigned minSplatBits = EltVT.getSizeInBits();
1568 if (minSplatBits < 16)
1569 minSplatBits = 16;
1571 APInt APSplatBits, APSplatUndef;
1572 unsigned SplatBitSize;
1573 bool HasAnyUndefs;
1575 if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1576 HasAnyUndefs, minSplatBits)
1577 || minSplatBits < SplatBitSize)
1578 return SDValue(); // Wasn't a constant vector or splat exceeded min
1580 uint64_t SplatBits = APSplatBits.getZExtValue();
1582 switch (VT.getSimpleVT().SimpleTy) {
1583 default: {
1584 std::string msg;
1585 raw_string_ostream Msg(msg);
1586 Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1587 << VT.getEVTString();
1588 llvm_report_error(Msg.str());
1589 /*NOTREACHED*/
1591 case MVT::v4f32: {
1592 uint32_t Value32 = uint32_t(SplatBits);
1593 assert(SplatBitSize == 32
1594 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596 SDValue T = DAG.getConstant(Value32, MVT::i32);
1597 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1598 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1599 break;
1601 case MVT::v2f64: {
1602 uint64_t f64val = uint64_t(SplatBits);
1603 assert(SplatBitSize == 64
1604 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1605 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606 SDValue T = DAG.getConstant(f64val, MVT::i64);
1607 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1608 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1609 break;
1611 case MVT::v16i8: {
1612 // 8-bit constants have to be expanded to 16-bits
1613 unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1614 SmallVector<SDValue, 8> Ops;
1616 Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1617 return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1618 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1620 case MVT::v8i16: {
1621 unsigned short Value16 = SplatBits;
1622 SDValue T = DAG.getConstant(Value16, EltVT);
1623 SmallVector<SDValue, 8> Ops;
1625 Ops.assign(8, T);
1626 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1628 case MVT::v4i32: {
1629 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1630 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1632 case MVT::v2i32: {
1633 SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1634 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1636 case MVT::v2i64: {
1637 return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1641 return SDValue();
1646 SDValue
1647 SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1648 DebugLoc dl) {
1649 uint32_t upper = uint32_t(SplatVal >> 32);
1650 uint32_t lower = uint32_t(SplatVal);
1652 if (upper == lower) {
1653 // Magic constant that can be matched by IL, ILA, et. al.
1654 SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1655 return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1656 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1657 Val, Val, Val, Val));
1658 } else {
1659 bool upper_special, lower_special;
1661 // NOTE: This code creates common-case shuffle masks that can be easily
1662 // detected as common expressions. It is not attempting to create highly
1663 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1665 // Detect if the upper or lower half is a special shuffle mask pattern:
1666 upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1667 lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1669 // Both upper and lower are special, lower to a constant pool load:
1670 if (lower_special && upper_special) {
1671 SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1672 return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1673 SplatValCN, SplatValCN);
1676 SDValue LO32;
1677 SDValue HI32;
1678 SmallVector<SDValue, 16> ShufBytes;
1679 SDValue Result;
1681 // Create lower vector if not a special pattern
1682 if (!lower_special) {
1683 SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1684 LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1685 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1686 LO32C, LO32C, LO32C, LO32C));
1689 // Create upper vector if not a special pattern
1690 if (!upper_special) {
1691 SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1692 HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1693 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1694 HI32C, HI32C, HI32C, HI32C));
1697 // If either upper or lower are special, then the two input operands are
1698 // the same (basically, one of them is a "don't care")
1699 if (lower_special)
1700 LO32 = HI32;
1701 if (upper_special)
1702 HI32 = LO32;
1704 for (int i = 0; i < 4; ++i) {
1705 uint64_t val = 0;
1706 for (int j = 0; j < 4; ++j) {
1707 SDValue V;
1708 bool process_upper, process_lower;
1709 val <<= 8;
1710 process_upper = (upper_special && (i & 1) == 0);
1711 process_lower = (lower_special && (i & 1) == 1);
1713 if (process_upper || process_lower) {
1714 if ((process_upper && upper == 0)
1715 || (process_lower && lower == 0))
1716 val |= 0x80;
1717 else if ((process_upper && upper == 0xffffffff)
1718 || (process_lower && lower == 0xffffffff))
1719 val |= 0xc0;
1720 else if ((process_upper && upper == 0x80000000)
1721 || (process_lower && lower == 0x80000000))
1722 val |= (j == 0 ? 0xe0 : 0x80);
1723 } else
1724 val |= i * 4 + j + ((i & 1) * 16);
1727 ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1730 return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1731 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1732 &ShufBytes[0], ShufBytes.size()));
1736 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1737 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1738 /// permutation vector, V3, is monotonically increasing with one "exception"
1739 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1740 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1741 /// In either case, the net result is going to eventually invoke SHUFB to
1742 /// permute/shuffle the bytes from V1 and V2.
1743 /// \note
1744 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1745 /// control word for byte/halfword/word insertion. This takes care of a single
1746 /// element move from V2 into V1.
1747 /// \note
1748 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1749 static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1750 const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1751 SDValue V1 = Op.getOperand(0);
1752 SDValue V2 = Op.getOperand(1);
1753 DebugLoc dl = Op.getDebugLoc();
1755 if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1757 // If we have a single element being moved from V1 to V2, this can be handled
1758 // using the C*[DX] compute mask instructions, but the vector elements have
1759 // to be monotonically increasing with one exception element.
1760 EVT VecVT = V1.getValueType();
1761 EVT EltVT = VecVT.getVectorElementType();
1762 unsigned EltsFromV2 = 0;
1763 unsigned V2Elt = 0;
1764 unsigned V2EltIdx0 = 0;
1765 unsigned CurrElt = 0;
1766 unsigned MaxElts = VecVT.getVectorNumElements();
1767 unsigned PrevElt = 0;
1768 unsigned V0Elt = 0;
1769 bool monotonic = true;
1770 bool rotate = true;
1772 if (EltVT == MVT::i8) {
1773 V2EltIdx0 = 16;
1774 } else if (EltVT == MVT::i16) {
1775 V2EltIdx0 = 8;
1776 } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1777 V2EltIdx0 = 4;
1778 } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1779 V2EltIdx0 = 2;
1780 } else
1781 llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1783 for (unsigned i = 0; i != MaxElts; ++i) {
1784 if (SVN->getMaskElt(i) < 0)
1785 continue;
1787 unsigned SrcElt = SVN->getMaskElt(i);
1789 if (monotonic) {
1790 if (SrcElt >= V2EltIdx0) {
1791 if (1 >= (++EltsFromV2)) {
1792 V2Elt = (V2EltIdx0 - SrcElt) << 2;
1794 } else if (CurrElt != SrcElt) {
1795 monotonic = false;
1798 ++CurrElt;
1801 if (rotate) {
1802 if (PrevElt > 0 && SrcElt < MaxElts) {
1803 if ((PrevElt == SrcElt - 1)
1804 || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1805 PrevElt = SrcElt;
1806 if (SrcElt == 0)
1807 V0Elt = i;
1808 } else {
1809 rotate = false;
1811 } else if (PrevElt == 0) {
1812 // First time through, need to keep track of previous element
1813 PrevElt = SrcElt;
1814 } else {
1815 // This isn't a rotation, takes elements from vector 2
1816 rotate = false;
1821 if (EltsFromV2 == 1 && monotonic) {
1822 // Compute mask and shuffle
1823 MachineFunction &MF = DAG.getMachineFunction();
1824 MachineRegisterInfo &RegInfo = MF.getRegInfo();
1825 unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1826 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1827 // Initialize temporary register to 0
1828 SDValue InitTempReg =
1829 DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1830 // Copy register's contents as index in SHUFFLE_MASK:
1831 SDValue ShufMaskOp =
1832 DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1833 DAG.getTargetConstant(V2Elt, MVT::i32),
1834 DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1835 // Use shuffle mask in SHUFB synthetic instruction:
1836 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1837 ShufMaskOp);
1838 } else if (rotate) {
1839 int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1841 return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1842 V1, DAG.getConstant(rotamt, MVT::i16));
1843 } else {
1844 // Convert the SHUFFLE_VECTOR mask's input element units to the
1845 // actual bytes.
1846 unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1848 SmallVector<SDValue, 16> ResultMask;
1849 for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1850 unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1852 for (unsigned j = 0; j < BytesPerElement; ++j)
1853 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1856 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1857 &ResultMask[0], ResultMask.size());
1858 return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1862 static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1863 SDValue Op0 = Op.getOperand(0); // Op0 = the scalar
1864 DebugLoc dl = Op.getDebugLoc();
1866 if (Op0.getNode()->getOpcode() == ISD::Constant) {
1867 // For a constant, build the appropriate constant vector, which will
1868 // eventually simplify to a vector register load.
1870 ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1871 SmallVector<SDValue, 16> ConstVecValues;
1872 EVT VT;
1873 size_t n_copies;
1875 // Create a constant vector:
1876 switch (Op.getValueType().getSimpleVT().SimpleTy) {
1877 default: llvm_unreachable("Unexpected constant value type in "
1878 "LowerSCALAR_TO_VECTOR");
1879 case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1880 case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1881 case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1882 case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1883 case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1884 case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1887 SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1888 for (size_t j = 0; j < n_copies; ++j)
1889 ConstVecValues.push_back(CValue);
1891 return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1892 &ConstVecValues[0], ConstVecValues.size());
1893 } else {
1894 // Otherwise, copy the value from one register to another:
1895 switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1896 default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1897 case MVT::i8:
1898 case MVT::i16:
1899 case MVT::i32:
1900 case MVT::i64:
1901 case MVT::f32:
1902 case MVT::f64:
1903 return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1907 return SDValue();
1910 static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1911 EVT VT = Op.getValueType();
1912 SDValue N = Op.getOperand(0);
1913 SDValue Elt = Op.getOperand(1);
1914 DebugLoc dl = Op.getDebugLoc();
1915 SDValue retval;
1917 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1918 // Constant argument:
1919 int EltNo = (int) C->getZExtValue();
1921 // sanity checks:
1922 if (VT == MVT::i8 && EltNo >= 16)
1923 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1924 else if (VT == MVT::i16 && EltNo >= 8)
1925 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1926 else if (VT == MVT::i32 && EltNo >= 4)
1927 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1928 else if (VT == MVT::i64 && EltNo >= 2)
1929 llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1931 if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1932 // i32 and i64: Element 0 is the preferred slot
1933 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1936 // Need to generate shuffle mask and extract:
1937 int prefslot_begin = -1, prefslot_end = -1;
1938 int elt_byte = EltNo * VT.getSizeInBits() / 8;
1940 switch (VT.getSimpleVT().SimpleTy) {
1941 default:
1942 assert(false && "Invalid value type!");
1943 case MVT::i8: {
1944 prefslot_begin = prefslot_end = 3;
1945 break;
1947 case MVT::i16: {
1948 prefslot_begin = 2; prefslot_end = 3;
1949 break;
1951 case MVT::i32:
1952 case MVT::f32: {
1953 prefslot_begin = 0; prefslot_end = 3;
1954 break;
1956 case MVT::i64:
1957 case MVT::f64: {
1958 prefslot_begin = 0; prefslot_end = 7;
1959 break;
1963 assert(prefslot_begin != -1 && prefslot_end != -1 &&
1964 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1966 unsigned int ShufBytes[16] = {
1967 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1969 for (int i = 0; i < 16; ++i) {
1970 // zero fill uppper part of preferred slot, don't care about the
1971 // other slots:
1972 unsigned int mask_val;
1973 if (i <= prefslot_end) {
1974 mask_val =
1975 ((i < prefslot_begin)
1976 ? 0x80
1977 : elt_byte + (i - prefslot_begin));
1979 ShufBytes[i] = mask_val;
1980 } else
1981 ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1984 SDValue ShufMask[4];
1985 for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1986 unsigned bidx = i * 4;
1987 unsigned int bits = ((ShufBytes[bidx] << 24) |
1988 (ShufBytes[bidx+1] << 16) |
1989 (ShufBytes[bidx+2] << 8) |
1990 ShufBytes[bidx+3]);
1991 ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1994 SDValue ShufMaskVec =
1995 DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1996 &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1998 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1999 DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2000 N, N, ShufMaskVec));
2001 } else {
2002 // Variable index: Rotate the requested element into slot 0, then replicate
2003 // slot 0 across the vector
2004 EVT VecVT = N.getValueType();
2005 if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2006 llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2007 "vector type!");
2010 // Make life easier by making sure the index is zero-extended to i32
2011 if (Elt.getValueType() != MVT::i32)
2012 Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2014 // Scale the index to a bit/byte shift quantity
2015 APInt scaleFactor =
2016 APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2017 unsigned scaleShift = scaleFactor.logBase2();
2018 SDValue vecShift;
2020 if (scaleShift > 0) {
2021 // Scale the shift factor:
2022 Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2023 DAG.getConstant(scaleShift, MVT::i32));
2026 vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2028 // Replicate the bytes starting at byte 0 across the entire vector (for
2029 // consistency with the notion of a unified register set)
2030 SDValue replicate;
2032 switch (VT.getSimpleVT().SimpleTy) {
2033 default:
2034 llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2035 "type");
2036 /*NOTREACHED*/
2037 case MVT::i8: {
2038 SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2039 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2040 factor, factor, factor, factor);
2041 break;
2043 case MVT::i16: {
2044 SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2045 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2046 factor, factor, factor, factor);
2047 break;
2049 case MVT::i32:
2050 case MVT::f32: {
2051 SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2052 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2053 factor, factor, factor, factor);
2054 break;
2056 case MVT::i64:
2057 case MVT::f64: {
2058 SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2059 SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2060 replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2061 loFactor, hiFactor, loFactor, hiFactor);
2062 break;
2066 retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2067 DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2068 vecShift, vecShift, replicate));
2071 return retval;
2074 static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2075 SDValue VecOp = Op.getOperand(0);
2076 SDValue ValOp = Op.getOperand(1);
2077 SDValue IdxOp = Op.getOperand(2);
2078 DebugLoc dl = Op.getDebugLoc();
2079 EVT VT = Op.getValueType();
2081 ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2082 assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2084 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2085 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2086 SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2087 DAG.getRegister(SPU::R1, PtrVT),
2088 DAG.getConstant(CN->getSExtValue(), PtrVT));
2089 SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2091 SDValue result =
2092 DAG.getNode(SPUISD::SHUFB, dl, VT,
2093 DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2094 VecOp,
2095 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2097 return result;
2100 static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2101 const TargetLowering &TLI)
2103 SDValue N0 = Op.getOperand(0); // Everything has at least one operand
2104 DebugLoc dl = Op.getDebugLoc();
2105 EVT ShiftVT = TLI.getShiftAmountTy();
2107 assert(Op.getValueType() == MVT::i8);
2108 switch (Opc) {
2109 default:
2110 llvm_unreachable("Unhandled i8 math operator");
2111 /*NOTREACHED*/
2112 break;
2113 case ISD::ADD: {
2114 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2115 // the result:
2116 SDValue N1 = Op.getOperand(1);
2117 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2118 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2119 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2120 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2124 case ISD::SUB: {
2125 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2126 // the result:
2127 SDValue N1 = Op.getOperand(1);
2128 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2129 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2130 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2131 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2133 case ISD::ROTR:
2134 case ISD::ROTL: {
2135 SDValue N1 = Op.getOperand(1);
2136 EVT N1VT = N1.getValueType();
2138 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2139 if (!N1VT.bitsEq(ShiftVT)) {
2140 unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2141 ? ISD::ZERO_EXTEND
2142 : ISD::TRUNCATE;
2143 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2146 // Replicate lower 8-bits into upper 8:
2147 SDValue ExpandArg =
2148 DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2149 DAG.getNode(ISD::SHL, dl, MVT::i16,
2150 N0, DAG.getConstant(8, MVT::i32)));
2152 // Truncate back down to i8
2153 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2154 DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2156 case ISD::SRL:
2157 case ISD::SHL: {
2158 SDValue N1 = Op.getOperand(1);
2159 EVT N1VT = N1.getValueType();
2161 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2162 if (!N1VT.bitsEq(ShiftVT)) {
2163 unsigned N1Opc = ISD::ZERO_EXTEND;
2165 if (N1.getValueType().bitsGT(ShiftVT))
2166 N1Opc = ISD::TRUNCATE;
2168 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2171 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2172 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2174 case ISD::SRA: {
2175 SDValue N1 = Op.getOperand(1);
2176 EVT N1VT = N1.getValueType();
2178 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2179 if (!N1VT.bitsEq(ShiftVT)) {
2180 unsigned N1Opc = ISD::SIGN_EXTEND;
2182 if (N1VT.bitsGT(ShiftVT))
2183 N1Opc = ISD::TRUNCATE;
2184 N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2187 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2188 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2190 case ISD::MUL: {
2191 SDValue N1 = Op.getOperand(1);
2193 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2194 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2195 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2196 DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2197 break;
2201 return SDValue();
2204 //! Lower byte immediate operations for v16i8 vectors:
2205 static SDValue
2206 LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2207 SDValue ConstVec;
2208 SDValue Arg;
2209 EVT VT = Op.getValueType();
2210 DebugLoc dl = Op.getDebugLoc();
2212 ConstVec = Op.getOperand(0);
2213 Arg = Op.getOperand(1);
2214 if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2215 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2216 ConstVec = ConstVec.getOperand(0);
2217 } else {
2218 ConstVec = Op.getOperand(1);
2219 Arg = Op.getOperand(0);
2220 if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2221 ConstVec = ConstVec.getOperand(0);
2226 if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2227 BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2228 assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2230 APInt APSplatBits, APSplatUndef;
2231 unsigned SplatBitSize;
2232 bool HasAnyUndefs;
2233 unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2235 if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2236 HasAnyUndefs, minSplatBits)
2237 && minSplatBits <= SplatBitSize) {
2238 uint64_t SplatBits = APSplatBits.getZExtValue();
2239 SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2241 SmallVector<SDValue, 16> tcVec;
2242 tcVec.assign(16, tc);
2243 return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2244 DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2248 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2249 // lowered. Return the operation, rather than a null SDValue.
2250 return Op;
2253 //! Custom lowering for CTPOP (count population)
2255 Custom lowering code that counts the number ones in the input
2256 operand. SPU has such an instruction, but it counts the number of
2257 ones per byte, which then have to be accumulated.
2259 static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2260 EVT VT = Op.getValueType();
2261 EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2262 VT, (128 / VT.getSizeInBits()));
2263 DebugLoc dl = Op.getDebugLoc();
2265 switch (VT.getSimpleVT().SimpleTy) {
2266 default:
2267 assert(false && "Invalid value type!");
2268 case MVT::i8: {
2269 SDValue N = Op.getOperand(0);
2270 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2272 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2273 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2275 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2278 case MVT::i16: {
2279 MachineFunction &MF = DAG.getMachineFunction();
2280 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2282 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2284 SDValue N = Op.getOperand(0);
2285 SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2286 SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2287 SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2289 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2290 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2292 // CNTB_result becomes the chain to which all of the virtual registers
2293 // CNTB_reg, SUM1_reg become associated:
2294 SDValue CNTB_result =
2295 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2297 SDValue CNTB_rescopy =
2298 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2300 SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2302 return DAG.getNode(ISD::AND, dl, MVT::i16,
2303 DAG.getNode(ISD::ADD, dl, MVT::i16,
2304 DAG.getNode(ISD::SRL, dl, MVT::i16,
2305 Tmp1, Shift1),
2306 Tmp1),
2307 Mask0);
2310 case MVT::i32: {
2311 MachineFunction &MF = DAG.getMachineFunction();
2312 MachineRegisterInfo &RegInfo = MF.getRegInfo();
2314 unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2315 unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2317 SDValue N = Op.getOperand(0);
2318 SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2319 SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2320 SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2321 SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2323 SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2324 SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2326 // CNTB_result becomes the chain to which all of the virtual registers
2327 // CNTB_reg, SUM1_reg become associated:
2328 SDValue CNTB_result =
2329 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2331 SDValue CNTB_rescopy =
2332 DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2334 SDValue Comp1 =
2335 DAG.getNode(ISD::SRL, dl, MVT::i32,
2336 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2337 Shift1);
2339 SDValue Sum1 =
2340 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2341 DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2343 SDValue Sum1_rescopy =
2344 DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2346 SDValue Comp2 =
2347 DAG.getNode(ISD::SRL, dl, MVT::i32,
2348 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2349 Shift2);
2350 SDValue Sum2 =
2351 DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2352 DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2354 return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2357 case MVT::i64:
2358 break;
2361 return SDValue();
2364 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2366 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2367 All conversions to i64 are expanded to a libcall.
2369 static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2370 SPUTargetLowering &TLI) {
2371 EVT OpVT = Op.getValueType();
2372 SDValue Op0 = Op.getOperand(0);
2373 EVT Op0VT = Op0.getValueType();
2375 if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2376 || OpVT == MVT::i64) {
2377 // Convert f32 / f64 to i32 / i64 via libcall.
2378 RTLIB::Libcall LC =
2379 (Op.getOpcode() == ISD::FP_TO_SINT)
2380 ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2381 : RTLIB::getFPTOUINT(Op0VT, OpVT);
2382 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2383 SDValue Dummy;
2384 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2387 return Op;
2390 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2392 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2393 All conversions from i64 are expanded to a libcall.
2395 static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2396 SPUTargetLowering &TLI) {
2397 EVT OpVT = Op.getValueType();
2398 SDValue Op0 = Op.getOperand(0);
2399 EVT Op0VT = Op0.getValueType();
2401 if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2402 || Op0VT == MVT::i64) {
2403 // Convert i32, i64 to f64 via libcall:
2404 RTLIB::Libcall LC =
2405 (Op.getOpcode() == ISD::SINT_TO_FP)
2406 ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2407 : RTLIB::getUINTTOFP(Op0VT, OpVT);
2408 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2409 SDValue Dummy;
2410 return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2413 return Op;
2416 //! Lower ISD::SETCC
2418 This handles MVT::f64 (double floating point) condition lowering
2420 static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2421 const TargetLowering &TLI) {
2422 CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2423 DebugLoc dl = Op.getDebugLoc();
2424 assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2426 SDValue lhs = Op.getOperand(0);
2427 SDValue rhs = Op.getOperand(1);
2428 EVT lhsVT = lhs.getValueType();
2429 assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2431 EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2432 APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2433 EVT IntVT(MVT::i64);
2435 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2436 // selected to a NOP:
2437 SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2438 SDValue lhsHi32 =
2439 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2440 DAG.getNode(ISD::SRL, dl, IntVT,
2441 i64lhs, DAG.getConstant(32, MVT::i32)));
2442 SDValue lhsHi32abs =
2443 DAG.getNode(ISD::AND, dl, MVT::i32,
2444 lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2445 SDValue lhsLo32 =
2446 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2448 // SETO and SETUO only use the lhs operand:
2449 if (CC->get() == ISD::SETO) {
2450 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2451 // SETUO
2452 APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2453 return DAG.getNode(ISD::XOR, dl, ccResultVT,
2454 DAG.getSetCC(dl, ccResultVT,
2455 lhs, DAG.getConstantFP(0.0, lhsVT),
2456 ISD::SETUO),
2457 DAG.getConstant(ccResultAllOnes, ccResultVT));
2458 } else if (CC->get() == ISD::SETUO) {
2459 // Evaluates to true if Op0 is [SQ]NaN
2460 return DAG.getNode(ISD::AND, dl, ccResultVT,
2461 DAG.getSetCC(dl, ccResultVT,
2462 lhsHi32abs,
2463 DAG.getConstant(0x7ff00000, MVT::i32),
2464 ISD::SETGE),
2465 DAG.getSetCC(dl, ccResultVT,
2466 lhsLo32,
2467 DAG.getConstant(0, MVT::i32),
2468 ISD::SETGT));
2471 SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2472 SDValue rhsHi32 =
2473 DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2474 DAG.getNode(ISD::SRL, dl, IntVT,
2475 i64rhs, DAG.getConstant(32, MVT::i32)));
2477 // If a value is negative, subtract from the sign magnitude constant:
2478 SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2480 // Convert the sign-magnitude representation into 2's complement:
2481 SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2482 lhsHi32, DAG.getConstant(31, MVT::i32));
2483 SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2484 SDValue lhsSelect =
2485 DAG.getNode(ISD::SELECT, dl, IntVT,
2486 lhsSelectMask, lhsSignMag2TC, i64lhs);
2488 SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2489 rhsHi32, DAG.getConstant(31, MVT::i32));
2490 SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2491 SDValue rhsSelect =
2492 DAG.getNode(ISD::SELECT, dl, IntVT,
2493 rhsSelectMask, rhsSignMag2TC, i64rhs);
2495 unsigned compareOp;
2497 switch (CC->get()) {
2498 case ISD::SETOEQ:
2499 case ISD::SETUEQ:
2500 compareOp = ISD::SETEQ; break;
2501 case ISD::SETOGT:
2502 case ISD::SETUGT:
2503 compareOp = ISD::SETGT; break;
2504 case ISD::SETOGE:
2505 case ISD::SETUGE:
2506 compareOp = ISD::SETGE; break;
2507 case ISD::SETOLT:
2508 case ISD::SETULT:
2509 compareOp = ISD::SETLT; break;
2510 case ISD::SETOLE:
2511 case ISD::SETULE:
2512 compareOp = ISD::SETLE; break;
2513 case ISD::SETUNE:
2514 case ISD::SETONE:
2515 compareOp = ISD::SETNE; break;
2516 default:
2517 llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2520 SDValue result =
2521 DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2522 (ISD::CondCode) compareOp);
2524 if ((CC->get() & 0x8) == 0) {
2525 // Ordered comparison:
2526 SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2527 lhs, DAG.getConstantFP(0.0, MVT::f64),
2528 ISD::SETO);
2529 SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2530 rhs, DAG.getConstantFP(0.0, MVT::f64),
2531 ISD::SETO);
2532 SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2534 result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2537 return result;
2540 //! Lower ISD::SELECT_CC
2542 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2543 SELB instruction.
2545 \note Need to revisit this in the future: if the code path through the true
2546 and false value computations is longer than the latency of a branch (6
2547 cycles), then it would be more advantageous to branch and insert a new basic
2548 block and branch on the condition. However, this code does not make that
2549 assumption, given the simplisitc uses so far.
2552 static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2553 const TargetLowering &TLI) {
2554 EVT VT = Op.getValueType();
2555 SDValue lhs = Op.getOperand(0);
2556 SDValue rhs = Op.getOperand(1);
2557 SDValue trueval = Op.getOperand(2);
2558 SDValue falseval = Op.getOperand(3);
2559 SDValue condition = Op.getOperand(4);
2560 DebugLoc dl = Op.getDebugLoc();
2562 // NOTE: SELB's arguments: $rA, $rB, $mask
2564 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2565 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2566 // condition was true and 0s where the condition was false. Hence, the
2567 // arguments to SELB get reversed.
2569 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2570 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2571 // with another "cannot select select_cc" assert:
2573 SDValue compare = DAG.getNode(ISD::SETCC, dl,
2574 TLI.getSetCCResultType(Op.getValueType()),
2575 lhs, rhs, condition);
2576 return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2579 //! Custom lower ISD::TRUNCATE
2580 static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2582 // Type to truncate to
2583 EVT VT = Op.getValueType();
2584 MVT simpleVT = VT.getSimpleVT();
2585 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2586 VT, (128 / VT.getSizeInBits()));
2587 DebugLoc dl = Op.getDebugLoc();
2589 // Type to truncate from
2590 SDValue Op0 = Op.getOperand(0);
2591 EVT Op0VT = Op0.getValueType();
2593 if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2594 // Create shuffle mask, least significant doubleword of quadword
2595 unsigned maskHigh = 0x08090a0b;
2596 unsigned maskLow = 0x0c0d0e0f;
2597 // Use a shuffle to perform the truncation
2598 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2599 DAG.getConstant(maskHigh, MVT::i32),
2600 DAG.getConstant(maskLow, MVT::i32),
2601 DAG.getConstant(maskHigh, MVT::i32),
2602 DAG.getConstant(maskLow, MVT::i32));
2604 SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2605 Op0, Op0, shufMask);
2607 return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2610 return SDValue(); // Leave the truncate unmolested
2614 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2615 * algorithm is to duplicate the sign bit using rotmai to generate at
2616 * least one byte full of sign bits. Then propagate the "sign-byte" into
2617 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2619 * @param Op The sext operand
2620 * @param DAG The current DAG
2621 * @return The SDValue with the entire instruction sequence
2623 static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2625 DebugLoc dl = Op.getDebugLoc();
2627 // Type to extend to
2628 MVT OpVT = Op.getValueType().getSimpleVT();
2629 EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2630 OpVT, (128 / OpVT.getSizeInBits()));
2632 // Type to extend from
2633 SDValue Op0 = Op.getOperand(0);
2634 MVT Op0VT = Op0.getValueType().getSimpleVT();
2636 // The type to extend to needs to be a i128 and
2637 // the type to extend from needs to be i64 or i32.
2638 assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2639 "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2641 // Create shuffle mask
2642 unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2643 unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte 8 - 11
2644 unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2645 SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2646 DAG.getConstant(mask1, MVT::i32),
2647 DAG.getConstant(mask1, MVT::i32),
2648 DAG.getConstant(mask2, MVT::i32),
2649 DAG.getConstant(mask3, MVT::i32));
2651 // Word wise arithmetic right shift to generate at least one byte
2652 // that contains sign bits.
2653 MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2654 SDValue sraVal = DAG.getNode(ISD::SRA,
2656 mvt,
2657 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2658 DAG.getConstant(31, MVT::i32));
2660 // Shuffle bytes - Copy the sign bits into the upper 64 bits
2661 // and the input value into the lower 64 bits.
2662 SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2663 DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2665 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2668 //! Custom (target-specific) lowering entry point
2670 This is where LLVM's DAG selection process calls to do target-specific
2671 lowering of nodes.
2673 SDValue
2674 SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2676 unsigned Opc = (unsigned) Op.getOpcode();
2677 EVT VT = Op.getValueType();
2679 switch (Opc) {
2680 default: {
2681 #ifndef NDEBUG
2682 errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2683 errs() << "Op.getOpcode() = " << Opc << "\n";
2684 errs() << "*Op.getNode():\n";
2685 Op.getNode()->dump();
2686 #endif
2687 llvm_unreachable(0);
2689 case ISD::LOAD:
2690 case ISD::EXTLOAD:
2691 case ISD::SEXTLOAD:
2692 case ISD::ZEXTLOAD:
2693 return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2694 case ISD::STORE:
2695 return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2696 case ISD::ConstantPool:
2697 return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2698 case ISD::GlobalAddress:
2699 return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2700 case ISD::JumpTable:
2701 return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2702 case ISD::ConstantFP:
2703 return LowerConstantFP(Op, DAG);
2705 // i8, i64 math ops:
2706 case ISD::ADD:
2707 case ISD::SUB:
2708 case ISD::ROTR:
2709 case ISD::ROTL:
2710 case ISD::SRL:
2711 case ISD::SHL:
2712 case ISD::SRA: {
2713 if (VT == MVT::i8)
2714 return LowerI8Math(Op, DAG, Opc, *this);
2715 break;
2718 case ISD::FP_TO_SINT:
2719 case ISD::FP_TO_UINT:
2720 return LowerFP_TO_INT(Op, DAG, *this);
2722 case ISD::SINT_TO_FP:
2723 case ISD::UINT_TO_FP:
2724 return LowerINT_TO_FP(Op, DAG, *this);
2726 // Vector-related lowering.
2727 case ISD::BUILD_VECTOR:
2728 return LowerBUILD_VECTOR(Op, DAG);
2729 case ISD::SCALAR_TO_VECTOR:
2730 return LowerSCALAR_TO_VECTOR(Op, DAG);
2731 case ISD::VECTOR_SHUFFLE:
2732 return LowerVECTOR_SHUFFLE(Op, DAG);
2733 case ISD::EXTRACT_VECTOR_ELT:
2734 return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2735 case ISD::INSERT_VECTOR_ELT:
2736 return LowerINSERT_VECTOR_ELT(Op, DAG);
2738 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2739 case ISD::AND:
2740 case ISD::OR:
2741 case ISD::XOR:
2742 return LowerByteImmed(Op, DAG);
2744 // Vector and i8 multiply:
2745 case ISD::MUL:
2746 if (VT == MVT::i8)
2747 return LowerI8Math(Op, DAG, Opc, *this);
2749 case ISD::CTPOP:
2750 return LowerCTPOP(Op, DAG);
2752 case ISD::SELECT_CC:
2753 return LowerSELECT_CC(Op, DAG, *this);
2755 case ISD::SETCC:
2756 return LowerSETCC(Op, DAG, *this);
2758 case ISD::TRUNCATE:
2759 return LowerTRUNCATE(Op, DAG);
2761 case ISD::SIGN_EXTEND:
2762 return LowerSIGN_EXTEND(Op, DAG);
2765 return SDValue();
2768 void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2769 SmallVectorImpl<SDValue>&Results,
2770 SelectionDAG &DAG)
2772 #if 0
2773 unsigned Opc = (unsigned) N->getOpcode();
2774 EVT OpVT = N->getValueType(0);
2776 switch (Opc) {
2777 default: {
2778 errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2779 errs() << "Op.getOpcode() = " << Opc << "\n";
2780 errs() << "*Op.getNode():\n";
2781 N->dump();
2782 abort();
2783 /*NOTREACHED*/
2786 #endif
2788 /* Otherwise, return unchanged */
2791 //===----------------------------------------------------------------------===//
2792 // Target Optimization Hooks
2793 //===----------------------------------------------------------------------===//
2795 SDValue
2796 SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2798 #if 0
2799 TargetMachine &TM = getTargetMachine();
2800 #endif
2801 const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2802 SelectionDAG &DAG = DCI.DAG;
2803 SDValue Op0 = N->getOperand(0); // everything has at least one operand
2804 EVT NodeVT = N->getValueType(0); // The node's value type
2805 EVT Op0VT = Op0.getValueType(); // The first operand's result
2806 SDValue Result; // Initially, empty result
2807 DebugLoc dl = N->getDebugLoc();
2809 switch (N->getOpcode()) {
2810 default: break;
2811 case ISD::ADD: {
2812 SDValue Op1 = N->getOperand(1);
2814 if (Op0.getOpcode() == SPUISD::IndirectAddr
2815 || Op1.getOpcode() == SPUISD::IndirectAddr) {
2816 // Normalize the operands to reduce repeated code
2817 SDValue IndirectArg = Op0, AddArg = Op1;
2819 if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2820 IndirectArg = Op1;
2821 AddArg = Op0;
2824 if (isa<ConstantSDNode>(AddArg)) {
2825 ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2826 SDValue IndOp1 = IndirectArg.getOperand(1);
2828 if (CN0->isNullValue()) {
2829 // (add (SPUindirect <arg>, <arg>), 0) ->
2830 // (SPUindirect <arg>, <arg>)
2832 #if !defined(NDEBUG)
2833 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2834 errs() << "\n"
2835 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2836 << "With: (SPUindirect <arg>, <arg>)\n";
2838 #endif
2840 return IndirectArg;
2841 } else if (isa<ConstantSDNode>(IndOp1)) {
2842 // (add (SPUindirect <arg>, <const>), <const>) ->
2843 // (SPUindirect <arg>, <const + const>)
2844 ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2845 int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2846 SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2848 #if !defined(NDEBUG)
2849 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2850 errs() << "\n"
2851 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2852 << "), " << CN0->getSExtValue() << ")\n"
2853 << "With: (SPUindirect <arg>, "
2854 << combinedConst << ")\n";
2856 #endif
2858 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2859 IndirectArg, combinedValue);
2863 break;
2865 case ISD::SIGN_EXTEND:
2866 case ISD::ZERO_EXTEND:
2867 case ISD::ANY_EXTEND: {
2868 if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2869 // (any_extend (SPUextract_elt0 <arg>)) ->
2870 // (SPUextract_elt0 <arg>)
2871 // Types must match, however...
2872 #if !defined(NDEBUG)
2873 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2874 errs() << "\nReplace: ";
2875 N->dump(&DAG);
2876 errs() << "\nWith: ";
2877 Op0.getNode()->dump(&DAG);
2878 errs() << "\n";
2880 #endif
2882 return Op0;
2884 break;
2886 case SPUISD::IndirectAddr: {
2887 if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2888 ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2889 if (CN != 0 && CN->getZExtValue() == 0) {
2890 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2891 // (SPUaform <addr>, 0)
2893 DEBUG(errs() << "Replace: ");
2894 DEBUG(N->dump(&DAG));
2895 DEBUG(errs() << "\nWith: ");
2896 DEBUG(Op0.getNode()->dump(&DAG));
2897 DEBUG(errs() << "\n");
2899 return Op0;
2901 } else if (Op0.getOpcode() == ISD::ADD) {
2902 SDValue Op1 = N->getOperand(1);
2903 if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2904 // (SPUindirect (add <arg>, <arg>), 0) ->
2905 // (SPUindirect <arg>, <arg>)
2906 if (CN1->isNullValue()) {
2908 #if !defined(NDEBUG)
2909 if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2910 errs() << "\n"
2911 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2912 << "With: (SPUindirect <arg>, <arg>)\n";
2914 #endif
2916 return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2917 Op0.getOperand(0), Op0.getOperand(1));
2921 break;
2923 case SPUISD::SHLQUAD_L_BITS:
2924 case SPUISD::SHLQUAD_L_BYTES:
2925 case SPUISD::ROTBYTES_LEFT: {
2926 SDValue Op1 = N->getOperand(1);
2928 // Kill degenerate vector shifts:
2929 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2930 if (CN->isNullValue()) {
2931 Result = Op0;
2934 break;
2936 case SPUISD::PREFSLOT2VEC: {
2937 switch (Op0.getOpcode()) {
2938 default:
2939 break;
2940 case ISD::ANY_EXTEND:
2941 case ISD::ZERO_EXTEND:
2942 case ISD::SIGN_EXTEND: {
2943 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2944 // <arg>
2945 // but only if the SPUprefslot2vec and <arg> types match.
2946 SDValue Op00 = Op0.getOperand(0);
2947 if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2948 SDValue Op000 = Op00.getOperand(0);
2949 if (Op000.getValueType() == NodeVT) {
2950 Result = Op000;
2953 break;
2955 case SPUISD::VEC2PREFSLOT: {
2956 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2957 // <arg>
2958 Result = Op0.getOperand(0);
2959 break;
2962 break;
2966 // Otherwise, return unchanged.
2967 #ifndef NDEBUG
2968 if (Result.getNode()) {
2969 DEBUG(errs() << "\nReplace.SPU: ");
2970 DEBUG(N->dump(&DAG));
2971 DEBUG(errs() << "\nWith: ");
2972 DEBUG(Result.getNode()->dump(&DAG));
2973 DEBUG(errs() << "\n");
2975 #endif
2977 return Result;
2980 //===----------------------------------------------------------------------===//
2981 // Inline Assembly Support
2982 //===----------------------------------------------------------------------===//
2984 /// getConstraintType - Given a constraint letter, return the type of
2985 /// constraint it is for this target.
2986 SPUTargetLowering::ConstraintType
2987 SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2988 if (ConstraintLetter.size() == 1) {
2989 switch (ConstraintLetter[0]) {
2990 default: break;
2991 case 'b':
2992 case 'r':
2993 case 'f':
2994 case 'v':
2995 case 'y':
2996 return C_RegisterClass;
2999 return TargetLowering::getConstraintType(ConstraintLetter);
3002 std::pair<unsigned, const TargetRegisterClass*>
3003 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3004 EVT VT) const
3006 if (Constraint.size() == 1) {
3007 // GCC RS6000 Constraint Letters
3008 switch (Constraint[0]) {
3009 case 'b': // R1-R31
3010 case 'r': // R0-R31
3011 if (VT == MVT::i64)
3012 return std::make_pair(0U, SPU::R64CRegisterClass);
3013 return std::make_pair(0U, SPU::R32CRegisterClass);
3014 case 'f':
3015 if (VT == MVT::f32)
3016 return std::make_pair(0U, SPU::R32FPRegisterClass);
3017 else if (VT == MVT::f64)
3018 return std::make_pair(0U, SPU::R64FPRegisterClass);
3019 break;
3020 case 'v':
3021 return std::make_pair(0U, SPU::GPRCRegisterClass);
3025 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3028 //! Compute used/known bits for a SPU operand
3029 void
3030 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3031 const APInt &Mask,
3032 APInt &KnownZero,
3033 APInt &KnownOne,
3034 const SelectionDAG &DAG,
3035 unsigned Depth ) const {
3036 #if 0
3037 const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3039 switch (Op.getOpcode()) {
3040 default:
3041 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3042 break;
3043 case CALL:
3044 case SHUFB:
3045 case SHUFFLE_MASK:
3046 case CNTB:
3047 case SPUISD::PREFSLOT2VEC:
3048 case SPUISD::LDRESULT:
3049 case SPUISD::VEC2PREFSLOT:
3050 case SPUISD::SHLQUAD_L_BITS:
3051 case SPUISD::SHLQUAD_L_BYTES:
3052 case SPUISD::VEC_ROTL:
3053 case SPUISD::VEC_ROTR:
3054 case SPUISD::ROTBYTES_LEFT:
3055 case SPUISD::SELECT_MASK:
3056 case SPUISD::SELB:
3058 #endif
3061 unsigned
3062 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3063 unsigned Depth) const {
3064 switch (Op.getOpcode()) {
3065 default:
3066 return 1;
3068 case ISD::SETCC: {
3069 EVT VT = Op.getValueType();
3071 if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3072 VT = MVT::i32;
3074 return VT.getSizeInBits();
3079 // LowerAsmOperandForConstraint
3080 void
3081 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3082 char ConstraintLetter,
3083 bool hasMemory,
3084 std::vector<SDValue> &Ops,
3085 SelectionDAG &DAG) const {
3086 // Default, for the time being, to the base class handler
3087 TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3088 Ops, DAG);
3091 /// isLegalAddressImmediate - Return true if the integer value can be used
3092 /// as the offset of the target addressing mode.
3093 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3094 const Type *Ty) const {
3095 // SPU's addresses are 256K:
3096 return (V > -(1 << 18) && V < (1 << 18) - 1);
3099 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3100 return false;
3103 bool
3104 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3105 // The SPU target isn't yet aware of offsets.
3106 return false;