2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map
<unsigned, const char *> node_names
;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s
{
45 const int prefslot_byte
;
48 const valtype_map_s valtype_map
[] = {
59 const size_t n_valtype_map
= sizeof(valtype_map
) / sizeof(valtype_map
[0]);
61 const valtype_map_s
*getValueTypeMapEntry(MVT VT
) {
62 const valtype_map_s
*retval
= 0;
64 for (size_t i
= 0; i
< n_valtype_map
; ++i
) {
65 if (valtype_map
[i
].valtype
== VT
) {
66 retval
= valtype_map
+ i
;
73 cerr
<< "getValueTypeMapEntry returns NULL for "
83 //! Expand a library call into an actual call DAG node
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
91 ExpandLibCall(RTLIB::Libcall LC
, SDValue Op
, SelectionDAG
&DAG
,
92 bool isSigned
, SDValue
&Hi
, SPUTargetLowering
&TLI
) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
96 SDValue InChain
= DAG
.getEntryNode();
98 TargetLowering::ArgListTy Args
;
99 TargetLowering::ArgListEntry Entry
;
100 for (unsigned i
= 0, e
= Op
.getNumOperands(); i
!= e
; ++i
) {
101 MVT ArgVT
= Op
.getOperand(i
).getValueType();
102 const Type
*ArgTy
= ArgVT
.getTypeForMVT();
103 Entry
.Node
= Op
.getOperand(i
);
105 Entry
.isSExt
= isSigned
;
106 Entry
.isZExt
= !isSigned
;
107 Args
.push_back(Entry
);
109 SDValue Callee
= DAG
.getExternalSymbol(TLI
.getLibcallName(LC
),
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type
*RetTy
= Op
.getNode()->getValueType(0).getTypeForMVT();
114 std::pair
<SDValue
, SDValue
> CallInfo
=
115 TLI
.LowerCallTo(InChain
, RetTy
, isSigned
, !isSigned
, false, false,
116 CallingConv::C
, false, Callee
, Args
, DAG
,
119 return CallInfo
.first
;
123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine
&TM
)
124 : TargetLowering(TM
),
127 // Fold away setcc operations if possible.
130 // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 setUseUnderscoreSetJmp(true);
132 setUseUnderscoreLongJmp(true);
134 // Set RTLIB libcall names as used by SPU:
135 setLibcallName(RTLIB::DIV_F64
, "__fast_divdf3");
137 // Set up the SPU's register classes:
138 addRegisterClass(MVT::i8
, SPU::R8CRegisterClass
);
139 addRegisterClass(MVT::i16
, SPU::R16CRegisterClass
);
140 addRegisterClass(MVT::i32
, SPU::R32CRegisterClass
);
141 addRegisterClass(MVT::i64
, SPU::R64CRegisterClass
);
142 addRegisterClass(MVT::f32
, SPU::R32FPRegisterClass
);
143 addRegisterClass(MVT::f64
, SPU::R64FPRegisterClass
);
144 addRegisterClass(MVT::i128
, SPU::GPRCRegisterClass
);
146 // SPU has no sign or zero extended loads for i1, i8, i16:
147 setLoadExtAction(ISD::EXTLOAD
, MVT::i1
, Promote
);
148 setLoadExtAction(ISD::SEXTLOAD
, MVT::i1
, Promote
);
149 setLoadExtAction(ISD::ZEXTLOAD
, MVT::i1
, Promote
);
151 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, Expand
);
152 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, Expand
);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
156 setOperationAction(ISD::ConstantFP
, MVT::f64
, Custom
);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype
= (unsigned) MVT::i8
; sctype
< (unsigned) MVT::i128
;
161 MVT VT
= (MVT::SimpleValueType
)sctype
;
163 setOperationAction(ISD::LOAD
, VT
, Custom
);
164 setOperationAction(ISD::STORE
, VT
, Custom
);
165 setLoadExtAction(ISD::EXTLOAD
, VT
, Custom
);
166 setLoadExtAction(ISD::ZEXTLOAD
, VT
, Custom
);
167 setLoadExtAction(ISD::SEXTLOAD
, VT
, Custom
);
169 for (unsigned stype
= sctype
- 1; stype
>= (unsigned) MVT::i8
; --stype
) {
170 MVT StoreVT
= (MVT::SimpleValueType
) stype
;
171 setTruncStoreAction(VT
, StoreVT
, Expand
);
175 for (unsigned sctype
= (unsigned) MVT::f32
; sctype
< (unsigned) MVT::f64
;
177 MVT VT
= (MVT::SimpleValueType
) sctype
;
179 setOperationAction(ISD::LOAD
, VT
, Custom
);
180 setOperationAction(ISD::STORE
, VT
, Custom
);
182 for (unsigned stype
= sctype
- 1; stype
>= (unsigned) MVT::f32
; --stype
) {
183 MVT StoreVT
= (MVT::SimpleValueType
) stype
;
184 setTruncStoreAction(VT
, StoreVT
, Expand
);
188 // Expand the jumptable branches
189 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
190 setOperationAction(ISD::BR_CC
, MVT::Other
, Expand
);
192 // Custom lower SELECT_CC for most cases, but expand by default
193 setOperationAction(ISD::SELECT_CC
, MVT::Other
, Expand
);
194 setOperationAction(ISD::SELECT_CC
, MVT::i8
, Custom
);
195 setOperationAction(ISD::SELECT_CC
, MVT::i16
, Custom
);
196 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
197 setOperationAction(ISD::SELECT_CC
, MVT::i64
, Custom
);
199 // SPU has no intrinsics for these particular operations:
200 setOperationAction(ISD::MEMBARRIER
, MVT::Other
, Expand
);
202 // SPU has no SREM/UREM instructions
203 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
204 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
205 setOperationAction(ISD::SREM
, MVT::i64
, Expand
);
206 setOperationAction(ISD::UREM
, MVT::i64
, Expand
);
208 // We don't support sin/cos/sqrt/fmod
209 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
210 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
211 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
212 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
213 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
214 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
216 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
218 setOperationAction(ISD::FSQRT
, MVT::f64
, Expand
);
219 setOperationAction(ISD::FSQRT
, MVT::f32
, Expand
);
221 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
222 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
224 // SPU can do rotate right and left, so legalize it... but customize for i8
225 // because instructions don't exist.
227 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
229 setOperationAction(ISD::ROTR
, MVT::i32
, Expand
/*Legal*/);
230 setOperationAction(ISD::ROTR
, MVT::i16
, Expand
/*Legal*/);
231 setOperationAction(ISD::ROTR
, MVT::i8
, Expand
/*Custom*/);
233 setOperationAction(ISD::ROTL
, MVT::i32
, Legal
);
234 setOperationAction(ISD::ROTL
, MVT::i16
, Legal
);
235 setOperationAction(ISD::ROTL
, MVT::i8
, Custom
);
237 // SPU has no native version of shift left/right for i8
238 setOperationAction(ISD::SHL
, MVT::i8
, Custom
);
239 setOperationAction(ISD::SRL
, MVT::i8
, Custom
);
240 setOperationAction(ISD::SRA
, MVT::i8
, Custom
);
242 // Make these operations legal and handle them during instruction selection:
243 setOperationAction(ISD::SHL
, MVT::i64
, Legal
);
244 setOperationAction(ISD::SRL
, MVT::i64
, Legal
);
245 setOperationAction(ISD::SRA
, MVT::i64
, Legal
);
247 // Custom lower i8, i32 and i64 multiplications
248 setOperationAction(ISD::MUL
, MVT::i8
, Custom
);
249 setOperationAction(ISD::MUL
, MVT::i32
, Legal
);
250 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
252 // Need to custom handle (some) common i8, i64 math ops
253 setOperationAction(ISD::ADD
, MVT::i8
, Custom
);
254 setOperationAction(ISD::ADD
, MVT::i64
, Legal
);
255 setOperationAction(ISD::SUB
, MVT::i8
, Custom
);
256 setOperationAction(ISD::SUB
, MVT::i64
, Legal
);
258 // SPU does not have BSWAP. It does have i32 support CTLZ.
259 // CTPOP has to be custom lowered.
260 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
261 setOperationAction(ISD::BSWAP
, MVT::i64
, Expand
);
263 setOperationAction(ISD::CTPOP
, MVT::i8
, Custom
);
264 setOperationAction(ISD::CTPOP
, MVT::i16
, Custom
);
265 setOperationAction(ISD::CTPOP
, MVT::i32
, Custom
);
266 setOperationAction(ISD::CTPOP
, MVT::i64
, Custom
);
268 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
269 setOperationAction(ISD::CTTZ
, MVT::i64
, Expand
);
271 setOperationAction(ISD::CTLZ
, MVT::i32
, Legal
);
273 // SPU has a version of select that implements (a&~c)|(b&c), just like
274 // select ought to work:
275 setOperationAction(ISD::SELECT
, MVT::i8
, Legal
);
276 setOperationAction(ISD::SELECT
, MVT::i16
, Legal
);
277 setOperationAction(ISD::SELECT
, MVT::i32
, Legal
);
278 setOperationAction(ISD::SELECT
, MVT::i64
, Legal
);
280 setOperationAction(ISD::SETCC
, MVT::i8
, Legal
);
281 setOperationAction(ISD::SETCC
, MVT::i16
, Legal
);
282 setOperationAction(ISD::SETCC
, MVT::i32
, Legal
);
283 setOperationAction(ISD::SETCC
, MVT::i64
, Legal
);
284 setOperationAction(ISD::SETCC
, MVT::f64
, Custom
);
286 // Custom lower i128 -> i64 truncates
287 setOperationAction(ISD::TRUNCATE
, MVT::i64
, Custom
);
289 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
290 // to expand to a libcall, hence the custom lowering:
291 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
292 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV
, MVT::f64
, Expand
); // to libcall
297 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
298 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
299 setOperationAction(ISD::SINT_TO_FP
, MVT::i16
, Promote
);
300 setOperationAction(ISD::SINT_TO_FP
, MVT::i8
, Promote
);
301 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
302 setOperationAction(ISD::UINT_TO_FP
, MVT::i16
, Promote
);
303 setOperationAction(ISD::UINT_TO_FP
, MVT::i8
, Promote
);
304 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
305 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Custom
);
307 setOperationAction(ISD::BIT_CONVERT
, MVT::i32
, Legal
);
308 setOperationAction(ISD::BIT_CONVERT
, MVT::f32
, Legal
);
309 setOperationAction(ISD::BIT_CONVERT
, MVT::i64
, Legal
);
310 setOperationAction(ISD::BIT_CONVERT
, MVT::f64
, Legal
);
312 // We cannot sextinreg(i1). Expand to shifts.
313 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
315 // Support label based line numbers.
316 setOperationAction(ISD::DBG_STOPPOINT
, MVT::Other
, Expand
);
317 setOperationAction(ISD::DEBUG_LOC
, MVT::Other
, Expand
);
319 // We want to legalize GlobalAddress and ConstantPool nodes into the
320 // appropriate instructions to materialize the address.
321 for (unsigned sctype
= (unsigned) MVT::i8
; sctype
< (unsigned) MVT::f128
;
323 MVT VT
= (MVT::SimpleValueType
)sctype
;
325 setOperationAction(ISD::GlobalAddress
, VT
, Custom
);
326 setOperationAction(ISD::ConstantPool
, VT
, Custom
);
327 setOperationAction(ISD::JumpTable
, VT
, Custom
);
330 // RET must be custom lowered, to meet ABI requirements
331 setOperationAction(ISD::RET
, MVT::Other
, Custom
);
333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
334 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
336 // Use the default implementation.
337 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
338 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
339 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
340 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
341 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Expand
);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Expand
);
345 // Cell SPU has instructions for converting between i64 and fp.
346 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
347 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
349 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
350 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Promote
);
352 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
353 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
355 // First set operation action for all vector types to expand. Then we
356 // will selectively turn on ones that can be effectively codegen'd.
357 addRegisterClass(MVT::v16i8
, SPU::VECREGRegisterClass
);
358 addRegisterClass(MVT::v8i16
, SPU::VECREGRegisterClass
);
359 addRegisterClass(MVT::v4i32
, SPU::VECREGRegisterClass
);
360 addRegisterClass(MVT::v2i64
, SPU::VECREGRegisterClass
);
361 addRegisterClass(MVT::v4f32
, SPU::VECREGRegisterClass
);
362 addRegisterClass(MVT::v2f64
, SPU::VECREGRegisterClass
);
364 // "Odd size" vector classes that we're willing to support:
365 addRegisterClass(MVT::v2i32
, SPU::VECREGRegisterClass
);
367 for (unsigned i
= (unsigned)MVT::FIRST_VECTOR_VALUETYPE
;
368 i
<= (unsigned)MVT::LAST_VECTOR_VALUETYPE
; ++i
) {
369 MVT VT
= (MVT::SimpleValueType
)i
;
371 // add/sub are legal for all supported vector VT's.
372 setOperationAction(ISD::ADD
, VT
, Legal
);
373 setOperationAction(ISD::SUB
, VT
, Legal
);
374 // mul has to be custom lowered.
375 setOperationAction(ISD::MUL
, VT
, Legal
);
377 setOperationAction(ISD::AND
, VT
, Legal
);
378 setOperationAction(ISD::OR
, VT
, Legal
);
379 setOperationAction(ISD::XOR
, VT
, Legal
);
380 setOperationAction(ISD::LOAD
, VT
, Legal
);
381 setOperationAction(ISD::SELECT
, VT
, Legal
);
382 setOperationAction(ISD::STORE
, VT
, Legal
);
384 // These operations need to be expanded:
385 setOperationAction(ISD::SDIV
, VT
, Expand
);
386 setOperationAction(ISD::SREM
, VT
, Expand
);
387 setOperationAction(ISD::UDIV
, VT
, Expand
);
388 setOperationAction(ISD::UREM
, VT
, Expand
);
390 // Custom lower build_vector, constant pool spills, insert and
391 // extract vector elements:
392 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
393 setOperationAction(ISD::ConstantPool
, VT
, Custom
);
394 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
395 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Custom
);
396 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Custom
);
397 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
400 setOperationAction(ISD::AND
, MVT::v16i8
, Custom
);
401 setOperationAction(ISD::OR
, MVT::v16i8
, Custom
);
402 setOperationAction(ISD::XOR
, MVT::v16i8
, Custom
);
403 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Custom
);
405 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
407 setShiftAmountType(MVT::i32
);
408 setBooleanContents(ZeroOrNegativeOneBooleanContent
);
410 setStackPointerRegisterToSaveRestore(SPU::R1
);
412 // We have target-specific dag combine patterns for the following nodes:
413 setTargetDAGCombine(ISD::ADD
);
414 setTargetDAGCombine(ISD::ZERO_EXTEND
);
415 setTargetDAGCombine(ISD::SIGN_EXTEND
);
416 setTargetDAGCombine(ISD::ANY_EXTEND
);
418 computeRegisterProperties();
420 // Set pre-RA register scheduler default to BURR, which produces slightly
421 // better code than the default (could also be TDRR, but TargetLowering.h
422 // needs a mod to support that model):
423 setSchedulingPreference(SchedulingForRegPressure
);
427 SPUTargetLowering::getTargetNodeName(unsigned Opcode
) const
429 if (node_names
.empty()) {
430 node_names
[(unsigned) SPUISD::RET_FLAG
] = "SPUISD::RET_FLAG";
431 node_names
[(unsigned) SPUISD::Hi
] = "SPUISD::Hi";
432 node_names
[(unsigned) SPUISD::Lo
] = "SPUISD::Lo";
433 node_names
[(unsigned) SPUISD::PCRelAddr
] = "SPUISD::PCRelAddr";
434 node_names
[(unsigned) SPUISD::AFormAddr
] = "SPUISD::AFormAddr";
435 node_names
[(unsigned) SPUISD::IndirectAddr
] = "SPUISD::IndirectAddr";
436 node_names
[(unsigned) SPUISD::LDRESULT
] = "SPUISD::LDRESULT";
437 node_names
[(unsigned) SPUISD::CALL
] = "SPUISD::CALL";
438 node_names
[(unsigned) SPUISD::SHUFB
] = "SPUISD::SHUFB";
439 node_names
[(unsigned) SPUISD::SHUFFLE_MASK
] = "SPUISD::SHUFFLE_MASK";
440 node_names
[(unsigned) SPUISD::CNTB
] = "SPUISD::CNTB";
441 node_names
[(unsigned) SPUISD::PREFSLOT2VEC
] = "SPUISD::PREFSLOT2VEC";
442 node_names
[(unsigned) SPUISD::VEC2PREFSLOT
] = "SPUISD::VEC2PREFSLOT";
443 node_names
[(unsigned) SPUISD::SHLQUAD_L_BITS
] = "SPUISD::SHLQUAD_L_BITS";
444 node_names
[(unsigned) SPUISD::SHLQUAD_L_BYTES
] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names
[(unsigned) SPUISD::VEC_SHL
] = "SPUISD::VEC_SHL";
446 node_names
[(unsigned) SPUISD::VEC_SRL
] = "SPUISD::VEC_SRL";
447 node_names
[(unsigned) SPUISD::VEC_SRA
] = "SPUISD::VEC_SRA";
448 node_names
[(unsigned) SPUISD::VEC_ROTL
] = "SPUISD::VEC_ROTL";
449 node_names
[(unsigned) SPUISD::VEC_ROTR
] = "SPUISD::VEC_ROTR";
450 node_names
[(unsigned) SPUISD::ROTBYTES_LEFT
] = "SPUISD::ROTBYTES_LEFT";
451 node_names
[(unsigned) SPUISD::ROTBYTES_LEFT_BITS
] =
452 "SPUISD::ROTBYTES_LEFT_BITS";
453 node_names
[(unsigned) SPUISD::SELECT_MASK
] = "SPUISD::SELECT_MASK";
454 node_names
[(unsigned) SPUISD::SELB
] = "SPUISD::SELB";
455 node_names
[(unsigned) SPUISD::ADD64_MARKER
] = "SPUISD::ADD64_MARKER";
456 node_names
[(unsigned) SPUISD::SUB64_MARKER
] = "SPUISD::SUB64_MARKER";
457 node_names
[(unsigned) SPUISD::MUL64_MARKER
] = "SPUISD::MUL64_MARKER";
460 std::map
<unsigned, const char *>::iterator i
= node_names
.find(Opcode
);
462 return ((i
!= node_names
.end()) ? i
->second
: 0);
465 //===----------------------------------------------------------------------===//
466 // Return the Cell SPU's SETCC result type
467 //===----------------------------------------------------------------------===//
469 MVT
SPUTargetLowering::getSetCCResultType(MVT VT
) const {
470 // i16 and i32 are valid SETCC result types
471 return ((VT
== MVT::i8
|| VT
== MVT::i16
|| VT
== MVT::i32
) ? VT
: MVT::i32
);
474 //===----------------------------------------------------------------------===//
475 // Calling convention code:
476 //===----------------------------------------------------------------------===//
478 #include "SPUGenCallingConv.inc"
480 //===----------------------------------------------------------------------===//
481 // LowerOperation implementation
482 //===----------------------------------------------------------------------===//
484 /// Custom lower loads for CellSPU
486 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
487 within a 16-byte block, we have to rotate to extract the requested element.
489 For extending loads, we also want to ensure that the following sequence is
490 emitted, e.g. for MVT::f32 extending load to MVT::f64:
494 %2 v16i8,ch = rotate %1
495 %3 v4f8, ch = bitconvert %2
496 %4 f32 = vec2perfslot %3
497 %5 f64 = fp_extend %4
501 LowerLOAD(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
502 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
);
503 SDValue the_chain
= LN
->getChain();
504 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
505 MVT InVT
= LN
->getMemoryVT();
506 MVT OutVT
= Op
.getValueType();
507 ISD::LoadExtType ExtType
= LN
->getExtensionType();
508 unsigned alignment
= LN
->getAlignment();
509 const valtype_map_s
*vtm
= getValueTypeMapEntry(InVT
);
510 DebugLoc dl
= Op
.getDebugLoc();
512 switch (LN
->getAddressingMode()) {
513 case ISD::UNINDEXED
: {
515 SDValue basePtr
= LN
->getBasePtr();
518 if (alignment
== 16) {
521 // Special cases for a known aligned load to simplify the base pointer
522 // and the rotation amount:
523 if (basePtr
.getOpcode() == ISD::ADD
524 && (CN
= dyn_cast
<ConstantSDNode
> (basePtr
.getOperand(1))) != 0) {
525 // Known offset into basePtr
526 int64_t offset
= CN
->getSExtValue();
527 int64_t rotamt
= int64_t((offset
& 0xf) - vtm
->prefslot_byte
);
532 rotate
= DAG
.getConstant(rotamt
, MVT::i16
);
534 // Simplify the base pointer for this case:
535 basePtr
= basePtr
.getOperand(0);
536 if ((offset
& ~0xf) > 0) {
537 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
539 DAG
.getConstant((offset
& ~0xf), PtrVT
));
541 } else if ((basePtr
.getOpcode() == SPUISD::AFormAddr
)
542 || (basePtr
.getOpcode() == SPUISD::IndirectAddr
543 && basePtr
.getOperand(0).getOpcode() == SPUISD::Hi
544 && basePtr
.getOperand(1).getOpcode() == SPUISD::Lo
)) {
545 // Plain aligned a-form address: rotate into preferred slot
546 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
547 int64_t rotamt
= -vtm
->prefslot_byte
;
550 rotate
= DAG
.getConstant(rotamt
, MVT::i16
);
552 // Offset the rotate amount by the basePtr and the preferred slot
554 int64_t rotamt
= -vtm
->prefslot_byte
;
557 rotate
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
,
559 DAG
.getConstant(rotamt
, PtrVT
));
562 // Unaligned load: must be more pessimistic about addressing modes:
563 if (basePtr
.getOpcode() == ISD::ADD
) {
564 MachineFunction
&MF
= DAG
.getMachineFunction();
565 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
566 unsigned VReg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
569 SDValue Op0
= basePtr
.getOperand(0);
570 SDValue Op1
= basePtr
.getOperand(1);
572 if (isa
<ConstantSDNode
>(Op1
)) {
573 // Convert the (add <ptr>, <const>) to an indirect address contained
574 // in a register. Note that this is done because we need to avoid
575 // creating a 0(reg) d-form address due to the SPU's block loads.
576 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Op0
, Op1
);
577 the_chain
= DAG
.getCopyToReg(the_chain
, dl
, VReg
, basePtr
, Flag
);
578 basePtr
= DAG
.getCopyFromReg(the_chain
, dl
, VReg
, PtrVT
);
580 // Convert the (add <arg1>, <arg2>) to an indirect address, which
581 // will likely be lowered as a reg(reg) x-form address.
582 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Op0
, Op1
);
585 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
587 DAG
.getConstant(0, PtrVT
));
590 // Offset the rotate amount by the basePtr and the preferred slot
592 rotate
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
,
594 DAG
.getConstant(-vtm
->prefslot_byte
, PtrVT
));
597 // Re-emit as a v16i8 vector load
598 result
= DAG
.getLoad(MVT::v16i8
, dl
, the_chain
, basePtr
,
599 LN
->getSrcValue(), LN
->getSrcValueOffset(),
600 LN
->isVolatile(), 16);
603 the_chain
= result
.getValue(1);
605 // Rotate into the preferred slot:
606 result
= DAG
.getNode(SPUISD::ROTBYTES_LEFT
, dl
, MVT::v16i8
,
607 result
.getValue(0), rotate
);
609 // Convert the loaded v16i8 vector to the appropriate vector type
610 // specified by the operand:
611 MVT vecVT
= MVT::getVectorVT(InVT
, (128 / InVT
.getSizeInBits()));
612 result
= DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, InVT
,
613 DAG
.getNode(ISD::BIT_CONVERT
, dl
, vecVT
, result
));
615 // Handle extending loads by extending the scalar result:
616 if (ExtType
== ISD::SEXTLOAD
) {
617 result
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, OutVT
, result
);
618 } else if (ExtType
== ISD::ZEXTLOAD
) {
619 result
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, OutVT
, result
);
620 } else if (ExtType
== ISD::EXTLOAD
) {
621 unsigned NewOpc
= ISD::ANY_EXTEND
;
623 if (OutVT
.isFloatingPoint())
624 NewOpc
= ISD::FP_EXTEND
;
626 result
= DAG
.getNode(NewOpc
, dl
, OutVT
, result
);
629 SDVTList retvts
= DAG
.getVTList(OutVT
, MVT::Other
);
630 SDValue retops
[2] = {
635 result
= DAG
.getNode(SPUISD::LDRESULT
, dl
, retvts
,
636 retops
, sizeof(retops
) / sizeof(retops
[0]));
643 case ISD::LAST_INDEXED_MODE
:
644 cerr
<< "LowerLOAD: Got a LoadSDNode with an addr mode other than "
646 cerr
<< (unsigned) LN
->getAddressingMode() << "\n";
654 /// Custom lower stores for CellSPU
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
661 LowerSTORE(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
662 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
);
663 SDValue Value
= SN
->getValue();
664 MVT VT
= Value
.getValueType();
665 MVT StVT
= (!SN
->isTruncatingStore() ? VT
: SN
->getMemoryVT());
666 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
667 DebugLoc dl
= Op
.getDebugLoc();
668 unsigned alignment
= SN
->getAlignment();
670 switch (SN
->getAddressingMode()) {
671 case ISD::UNINDEXED
: {
672 // The vector type we really want to load from the 16-byte chunk.
673 MVT vecVT
= MVT::getVectorVT(VT
, (128 / VT
.getSizeInBits())),
674 stVecVT
= MVT::getVectorVT(StVT
, (128 / StVT
.getSizeInBits()));
676 SDValue alignLoadVec
;
677 SDValue basePtr
= SN
->getBasePtr();
678 SDValue the_chain
= SN
->getChain();
679 SDValue insertEltOffs
;
681 if (alignment
== 16) {
684 // Special cases for a known aligned load to simplify the base pointer
685 // and insertion byte:
686 if (basePtr
.getOpcode() == ISD::ADD
687 && (CN
= dyn_cast
<ConstantSDNode
>(basePtr
.getOperand(1))) != 0) {
688 // Known offset into basePtr
689 int64_t offset
= CN
->getSExtValue();
691 // Simplify the base pointer for this case:
692 basePtr
= basePtr
.getOperand(0);
693 insertEltOffs
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
695 DAG
.getConstant((offset
& 0xf), PtrVT
));
697 if ((offset
& ~0xf) > 0) {
698 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
700 DAG
.getConstant((offset
& ~0xf), PtrVT
));
703 // Otherwise, assume it's at byte 0 of basePtr
704 insertEltOffs
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
706 DAG
.getConstant(0, PtrVT
));
709 // Unaligned load: must be more pessimistic about addressing modes:
710 if (basePtr
.getOpcode() == ISD::ADD
) {
711 MachineFunction
&MF
= DAG
.getMachineFunction();
712 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
713 unsigned VReg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
716 SDValue Op0
= basePtr
.getOperand(0);
717 SDValue Op1
= basePtr
.getOperand(1);
719 if (isa
<ConstantSDNode
>(Op1
)) {
720 // Convert the (add <ptr>, <const>) to an indirect address contained
721 // in a register. Note that this is done because we need to avoid
722 // creating a 0(reg) d-form address due to the SPU's block loads.
723 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Op0
, Op1
);
724 the_chain
= DAG
.getCopyToReg(the_chain
, dl
, VReg
, basePtr
, Flag
);
725 basePtr
= DAG
.getCopyFromReg(the_chain
, dl
, VReg
, PtrVT
);
727 // Convert the (add <arg1>, <arg2>) to an indirect address, which
728 // will likely be lowered as a reg(reg) x-form address.
729 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Op0
, Op1
);
732 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
734 DAG
.getConstant(0, PtrVT
));
737 // Insertion point is solely determined by basePtr's contents
738 insertEltOffs
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
,
740 DAG
.getConstant(0, PtrVT
));
743 // Re-emit as a v16i8 vector load
744 alignLoadVec
= DAG
.getLoad(MVT::v16i8
, dl
, the_chain
, basePtr
,
745 SN
->getSrcValue(), SN
->getSrcValueOffset(),
746 SN
->isVolatile(), 16);
749 the_chain
= alignLoadVec
.getValue(1);
751 LoadSDNode
*LN
= cast
<LoadSDNode
>(alignLoadVec
);
752 SDValue theValue
= SN
->getValue();
756 && (theValue
.getOpcode() == ISD::AssertZext
757 || theValue
.getOpcode() == ISD::AssertSext
)) {
758 // Drill down and get the value for zero- and sign-extended
760 theValue
= theValue
.getOperand(0);
763 // If the base pointer is already a D-form address, then just create
764 // a new D-form address with a slot offset and the orignal base pointer.
765 // Otherwise generate a D-form address with the slot offset relative
766 // to the stack pointer, which is always aligned.
768 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
769 cerr
<< "CellSPU LowerSTORE: basePtr = ";
770 basePtr
.getNode()->dump(&DAG
);
775 SDValue insertEltOp
=
776 DAG
.getNode(SPUISD::SHUFFLE_MASK
, dl
, vecVT
, insertEltOffs
);
777 SDValue vectorizeOp
=
778 DAG
.getNode(ISD::SCALAR_TO_VECTOR
, dl
, vecVT
, theValue
);
780 result
= DAG
.getNode(SPUISD::SHUFB
, dl
, vecVT
,
781 vectorizeOp
, alignLoadVec
,
782 DAG
.getNode(ISD::BIT_CONVERT
, dl
,
783 MVT::v4i32
, insertEltOp
));
785 result
= DAG
.getStore(the_chain
, dl
, result
, basePtr
,
786 LN
->getSrcValue(), LN
->getSrcValueOffset(),
787 LN
->isVolatile(), LN
->getAlignment());
789 #if 0 && !defined(NDEBUG)
790 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
791 const SDValue
¤tRoot
= DAG
.getRoot();
794 cerr
<< "------- CellSPU:LowerStore result:\n";
797 DAG
.setRoot(currentRoot
);
808 case ISD::LAST_INDEXED_MODE
:
809 cerr
<< "LowerLOAD: Got a LoadSDNode with an addr mode other than "
811 cerr
<< (unsigned) SN
->getAddressingMode() << "\n";
819 //! Generate the address of a constant pool entry.
821 LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
822 MVT PtrVT
= Op
.getValueType();
823 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
824 Constant
*C
= CP
->getConstVal();
825 SDValue CPI
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment());
826 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
827 const TargetMachine
&TM
= DAG
.getTarget();
828 // FIXME there is no actual debug info here
829 DebugLoc dl
= Op
.getDebugLoc();
831 if (TM
.getRelocationModel() == Reloc::Static
) {
832 if (!ST
->usingLargeMem()) {
833 // Just return the SDValue with the constant pool address in it.
834 return DAG
.getNode(SPUISD::AFormAddr
, dl
, PtrVT
, CPI
, Zero
);
836 SDValue Hi
= DAG
.getNode(SPUISD::Hi
, dl
, PtrVT
, CPI
, Zero
);
837 SDValue Lo
= DAG
.getNode(SPUISD::Lo
, dl
, PtrVT
, CPI
, Zero
);
838 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Hi
, Lo
);
843 "LowerConstantPool: Relocation model other than static"
848 //! Alternate entry point for generating the address of a constant pool entry
850 SPU::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
, const SPUTargetMachine
&TM
) {
851 return ::LowerConstantPool(Op
, DAG
, TM
.getSubtargetImpl());
855 LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
856 MVT PtrVT
= Op
.getValueType();
857 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
858 SDValue JTI
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
859 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
860 const TargetMachine
&TM
= DAG
.getTarget();
861 // FIXME there is no actual debug info here
862 DebugLoc dl
= Op
.getDebugLoc();
864 if (TM
.getRelocationModel() == Reloc::Static
) {
865 if (!ST
->usingLargeMem()) {
866 return DAG
.getNode(SPUISD::AFormAddr
, dl
, PtrVT
, JTI
, Zero
);
868 SDValue Hi
= DAG
.getNode(SPUISD::Hi
, dl
, PtrVT
, JTI
, Zero
);
869 SDValue Lo
= DAG
.getNode(SPUISD::Lo
, dl
, PtrVT
, JTI
, Zero
);
870 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Hi
, Lo
);
875 "LowerJumpTable: Relocation model other than static not supported.");
880 LowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
881 MVT PtrVT
= Op
.getValueType();
882 GlobalAddressSDNode
*GSDN
= cast
<GlobalAddressSDNode
>(Op
);
883 GlobalValue
*GV
= GSDN
->getGlobal();
884 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, PtrVT
, GSDN
->getOffset());
885 const TargetMachine
&TM
= DAG
.getTarget();
886 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
887 // FIXME there is no actual debug info here
888 DebugLoc dl
= Op
.getDebugLoc();
890 if (TM
.getRelocationModel() == Reloc::Static
) {
891 if (!ST
->usingLargeMem()) {
892 return DAG
.getNode(SPUISD::AFormAddr
, dl
, PtrVT
, GA
, Zero
);
894 SDValue Hi
= DAG
.getNode(SPUISD::Hi
, dl
, PtrVT
, GA
, Zero
);
895 SDValue Lo
= DAG
.getNode(SPUISD::Lo
, dl
, PtrVT
, GA
, Zero
);
896 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Hi
, Lo
);
899 cerr
<< "LowerGlobalAddress: Relocation model other than static not "
908 //! Custom lower double precision floating point constants
910 LowerConstantFP(SDValue Op
, SelectionDAG
&DAG
) {
911 MVT VT
= Op
.getValueType();
912 // FIXME there is no actual debug info here
913 DebugLoc dl
= Op
.getDebugLoc();
915 if (VT
== MVT::f64
) {
916 ConstantFPSDNode
*FP
= cast
<ConstantFPSDNode
>(Op
.getNode());
919 "LowerConstantFP: Node is not ConstantFPSDNode");
921 uint64_t dbits
= DoubleToBits(FP
->getValueAPF().convertToDouble());
922 SDValue T
= DAG
.getConstant(dbits
, MVT::i64
);
923 SDValue Tvec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
, T
, T
);
924 return DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
,
925 DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v2f64
, Tvec
));
932 LowerFORMAL_ARGUMENTS(SDValue Op
, SelectionDAG
&DAG
, int &VarArgsFrameIndex
)
934 MachineFunction
&MF
= DAG
.getMachineFunction();
935 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
936 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
937 SmallVector
<SDValue
, 48> ArgValues
;
938 SDValue Root
= Op
.getOperand(0);
939 bool isVarArg
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue() != 0;
940 DebugLoc dl
= Op
.getDebugLoc();
942 const unsigned *ArgRegs
= SPURegisterInfo::getArgRegs();
943 const unsigned NumArgRegs
= SPURegisterInfo::getNumArgRegs();
945 unsigned ArgOffset
= SPUFrameInfo::minStackSize();
946 unsigned ArgRegIdx
= 0;
947 unsigned StackSlotSize
= SPUFrameInfo::stackSlotSize();
949 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
951 // Add DAG nodes to load the arguments or copy them out of registers.
952 for (unsigned ArgNo
= 0, e
= Op
.getNode()->getNumValues() - 1;
953 ArgNo
!= e
; ++ArgNo
) {
954 MVT ObjectVT
= Op
.getValue(ArgNo
).getValueType();
955 unsigned ObjSize
= ObjectVT
.getSizeInBits()/8;
958 if (ArgRegIdx
< NumArgRegs
) {
959 const TargetRegisterClass
*ArgRegClass
;
961 switch (ObjectVT
.getSimpleVT()) {
963 cerr
<< "LowerFORMAL_ARGUMENTS Unhandled argument type: "
964 << ObjectVT
.getMVTString()
969 ArgRegClass
= &SPU::R8CRegClass
;
972 ArgRegClass
= &SPU::R16CRegClass
;
975 ArgRegClass
= &SPU::R32CRegClass
;
978 ArgRegClass
= &SPU::R64CRegClass
;
981 ArgRegClass
= &SPU::GPRCRegClass
;
984 ArgRegClass
= &SPU::R32FPRegClass
;
987 ArgRegClass
= &SPU::R64FPRegClass
;
995 ArgRegClass
= &SPU::VECREGRegClass
;
999 unsigned VReg
= RegInfo
.createVirtualRegister(ArgRegClass
);
1000 RegInfo
.addLiveIn(ArgRegs
[ArgRegIdx
], VReg
);
1001 ArgVal
= DAG
.getCopyFromReg(Root
, dl
, VReg
, ObjectVT
);
1004 // We need to load the argument to a virtual register if we determined
1005 // above that we ran out of physical registers of the appropriate type
1006 // or we're forced to do vararg
1007 int FI
= MFI
->CreateFixedObject(ObjSize
, ArgOffset
);
1008 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1009 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Root
, FIN
, NULL
, 0);
1010 ArgOffset
+= StackSlotSize
;
1013 ArgValues
.push_back(ArgVal
);
1015 Root
= ArgVal
.getOperand(0);
1020 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1021 // We will spill (79-3)+1 registers to the stack
1022 SmallVector
<SDValue
, 79-3+1> MemOps
;
1024 // Create the frame slot
1026 for (; ArgRegIdx
!= NumArgRegs
; ++ArgRegIdx
) {
1027 VarArgsFrameIndex
= MFI
->CreateFixedObject(StackSlotSize
, ArgOffset
);
1028 SDValue FIN
= DAG
.getFrameIndex(VarArgsFrameIndex
, PtrVT
);
1029 SDValue ArgVal
= DAG
.getRegister(ArgRegs
[ArgRegIdx
], MVT::v16i8
);
1030 SDValue Store
= DAG
.getStore(Root
, dl
, ArgVal
, FIN
, NULL
, 0);
1031 Root
= Store
.getOperand(0);
1032 MemOps
.push_back(Store
);
1034 // Increment address by stack slot size for the next stored argument
1035 ArgOffset
+= StackSlotSize
;
1037 if (!MemOps
.empty())
1038 Root
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1039 &MemOps
[0], MemOps
.size());
1042 ArgValues
.push_back(Root
);
1044 // Return the new list of results.
1045 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, Op
.getNode()->getVTList(),
1046 &ArgValues
[0], ArgValues
.size());
1049 /// isLSAAddress - Return the immediate to use if the specified
1050 /// value is representable as a LSA address.
1051 static SDNode
*isLSAAddress(SDValue Op
, SelectionDAG
&DAG
) {
1052 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
1055 int Addr
= C
->getZExtValue();
1056 if ((Addr
& 3) != 0 || // Low 2 bits are implicitly zero.
1057 (Addr
<< 14 >> 14) != Addr
)
1058 return 0; // Top 14 bits have to be sext of immediate.
1060 return DAG
.getConstant((int)C
->getZExtValue() >> 2, MVT::i32
).getNode();
1064 LowerCALL(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
1065 CallSDNode
*TheCall
= cast
<CallSDNode
>(Op
.getNode());
1066 SDValue Chain
= TheCall
->getChain();
1067 SDValue Callee
= TheCall
->getCallee();
1068 unsigned NumOps
= TheCall
->getNumArgs();
1069 unsigned StackSlotSize
= SPUFrameInfo::stackSlotSize();
1070 const unsigned *ArgRegs
= SPURegisterInfo::getArgRegs();
1071 const unsigned NumArgRegs
= SPURegisterInfo::getNumArgRegs();
1072 DebugLoc dl
= TheCall
->getDebugLoc();
1074 // Handy pointer type
1075 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1077 // Accumulate how many bytes are to be pushed on the stack, including the
1078 // linkage area, and parameter passing area. According to the SPU ABI,
1079 // we minimally need space for [LR] and [SP]
1080 unsigned NumStackBytes
= SPUFrameInfo::minStackSize();
1082 // Set up a copy of the stack pointer for use loading and storing any
1083 // arguments that may not fit in the registers available for argument
1085 SDValue StackPtr
= DAG
.getRegister(SPU::R1
, MVT::i32
);
1087 // Figure out which arguments are going to go in registers, and which in
1089 unsigned ArgOffset
= SPUFrameInfo::minStackSize(); // Just below [LR]
1090 unsigned ArgRegIdx
= 0;
1092 // Keep track of registers passing arguments
1093 std::vector
<std::pair
<unsigned, SDValue
> > RegsToPass
;
1094 // And the arguments passed on the stack
1095 SmallVector
<SDValue
, 8> MemOpChains
;
1097 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
1098 SDValue Arg
= TheCall
->getArg(i
);
1100 // PtrOff will be used to store the current argument to the stack if a
1101 // register cannot be found for it.
1102 SDValue PtrOff
= DAG
.getConstant(ArgOffset
, StackPtr
.getValueType());
1103 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
1105 switch (Arg
.getValueType().getSimpleVT()) {
1106 default: assert(0 && "Unexpected ValueType for argument!");
1112 if (ArgRegIdx
!= NumArgRegs
) {
1113 RegsToPass
.push_back(std::make_pair(ArgRegs
[ArgRegIdx
++], Arg
));
1115 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, NULL
, 0));
1116 ArgOffset
+= StackSlotSize
;
1121 if (ArgRegIdx
!= NumArgRegs
) {
1122 RegsToPass
.push_back(std::make_pair(ArgRegs
[ArgRegIdx
++], Arg
));
1124 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, NULL
, 0));
1125 ArgOffset
+= StackSlotSize
;
1134 if (ArgRegIdx
!= NumArgRegs
) {
1135 RegsToPass
.push_back(std::make_pair(ArgRegs
[ArgRegIdx
++], Arg
));
1137 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, NULL
, 0));
1138 ArgOffset
+= StackSlotSize
;
1144 // Update number of stack bytes actually used, insert a call sequence start
1145 NumStackBytes
= (ArgOffset
- SPUFrameInfo::minStackSize());
1146 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumStackBytes
,
1149 if (!MemOpChains
.empty()) {
1150 // Adjust the stack pointer for the stack arguments.
1151 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1152 &MemOpChains
[0], MemOpChains
.size());
1155 // Build a sequence of copy-to-reg nodes chained together with token chain
1156 // and flag operands which copy the outgoing args into the appropriate regs.
1158 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1159 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
1160 RegsToPass
[i
].second
, InFlag
);
1161 InFlag
= Chain
.getValue(1);
1164 SmallVector
<SDValue
, 8> Ops
;
1165 unsigned CallOpc
= SPUISD::CALL
;
1167 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1168 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1169 // node so that legalize doesn't hack it.
1170 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1171 GlobalValue
*GV
= G
->getGlobal();
1172 MVT CalleeVT
= Callee
.getValueType();
1173 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
1174 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, CalleeVT
);
1176 if (!ST
->usingLargeMem()) {
1177 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1178 // style calls, otherwise, external symbols are BRASL calls. This assumes
1179 // that declared/defined symbols are in the same compilation unit and can
1180 // be reached through PC-relative jumps.
1183 // This may be an unsafe assumption for JIT and really large compilation
1185 if (GV
->isDeclaration()) {
1186 Callee
= DAG
.getNode(SPUISD::AFormAddr
, dl
, CalleeVT
, GA
, Zero
);
1188 Callee
= DAG
.getNode(SPUISD::PCRelAddr
, dl
, CalleeVT
, GA
, Zero
);
1191 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1193 Callee
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, GA
, Zero
);
1195 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1196 MVT CalleeVT
= Callee
.getValueType();
1197 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
1198 SDValue ExtSym
= DAG
.getTargetExternalSymbol(S
->getSymbol(),
1199 Callee
.getValueType());
1201 if (!ST
->usingLargeMem()) {
1202 Callee
= DAG
.getNode(SPUISD::AFormAddr
, dl
, CalleeVT
, ExtSym
, Zero
);
1204 Callee
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, ExtSym
, Zero
);
1206 } else if (SDNode
*Dest
= isLSAAddress(Callee
, DAG
)) {
1207 // If this is an absolute destination address that appears to be a legal
1208 // local store address, use the munged value.
1209 Callee
= SDValue(Dest
, 0);
1212 Ops
.push_back(Chain
);
1213 Ops
.push_back(Callee
);
1215 // Add argument registers to the end of the list so that they are known live
1217 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
1218 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
1219 RegsToPass
[i
].second
.getValueType()));
1221 if (InFlag
.getNode())
1222 Ops
.push_back(InFlag
);
1223 // Returns a chain and a flag for retval copy to use.
1224 Chain
= DAG
.getNode(CallOpc
, dl
, DAG
.getVTList(MVT::Other
, MVT::Flag
),
1225 &Ops
[0], Ops
.size());
1226 InFlag
= Chain
.getValue(1);
1228 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumStackBytes
, true),
1229 DAG
.getIntPtrConstant(0, true), InFlag
);
1230 if (TheCall
->getValueType(0) != MVT::Other
)
1231 InFlag
= Chain
.getValue(1);
1233 SDValue ResultVals
[3];
1234 unsigned NumResults
= 0;
1236 // If the call has results, copy the values out of the ret val registers.
1237 switch (TheCall
->getValueType(0).getSimpleVT()) {
1238 default: assert(0 && "Unexpected ret value!");
1239 case MVT::Other
: break;
1241 if (TheCall
->getValueType(1) == MVT::i32
) {
1242 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R4
,
1243 MVT::i32
, InFlag
).getValue(1);
1244 ResultVals
[0] = Chain
.getValue(0);
1245 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, MVT::i32
,
1246 Chain
.getValue(2)).getValue(1);
1247 ResultVals
[1] = Chain
.getValue(0);
1250 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, MVT::i32
,
1251 InFlag
).getValue(1);
1252 ResultVals
[0] = Chain
.getValue(0);
1257 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, MVT::i64
,
1258 InFlag
).getValue(1);
1259 ResultVals
[0] = Chain
.getValue(0);
1263 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, MVT::i128
,
1264 InFlag
).getValue(1);
1265 ResultVals
[0] = Chain
.getValue(0);
1270 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, TheCall
->getValueType(0),
1271 InFlag
).getValue(1);
1272 ResultVals
[0] = Chain
.getValue(0);
1281 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, TheCall
->getValueType(0),
1282 InFlag
).getValue(1);
1283 ResultVals
[0] = Chain
.getValue(0);
1288 // If the function returns void, just return the chain.
1289 if (NumResults
== 0)
1292 // Otherwise, merge everything together with a MERGE_VALUES node.
1293 ResultVals
[NumResults
++] = Chain
;
1294 SDValue Res
= DAG
.getMergeValues(ResultVals
, NumResults
, dl
);
1295 return Res
.getValue(Op
.getResNo());
1299 LowerRET(SDValue Op
, SelectionDAG
&DAG
, TargetMachine
&TM
) {
1300 SmallVector
<CCValAssign
, 16> RVLocs
;
1301 unsigned CC
= DAG
.getMachineFunction().getFunction()->getCallingConv();
1302 bool isVarArg
= DAG
.getMachineFunction().getFunction()->isVarArg();
1303 DebugLoc dl
= Op
.getDebugLoc();
1304 CCState
CCInfo(CC
, isVarArg
, TM
, RVLocs
);
1305 CCInfo
.AnalyzeReturn(Op
.getNode(), RetCC_SPU
);
1307 // If this is the first return lowered for this function, add the regs to the
1308 // liveout set for the function.
1309 if (DAG
.getMachineFunction().getRegInfo().liveout_empty()) {
1310 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
)
1311 DAG
.getMachineFunction().getRegInfo().addLiveOut(RVLocs
[i
].getLocReg());
1314 SDValue Chain
= Op
.getOperand(0);
1317 // Copy the result values into the output registers.
1318 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
1319 CCValAssign
&VA
= RVLocs
[i
];
1320 assert(VA
.isRegLoc() && "Can only return in registers!");
1321 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(),
1322 Op
.getOperand(i
*2+1), Flag
);
1323 Flag
= Chain
.getValue(1);
1327 return DAG
.getNode(SPUISD::RET_FLAG
, dl
, MVT::Other
, Chain
, Flag
);
1329 return DAG
.getNode(SPUISD::RET_FLAG
, dl
, MVT::Other
, Chain
);
1333 //===----------------------------------------------------------------------===//
1334 // Vector related lowering:
1335 //===----------------------------------------------------------------------===//
1337 static ConstantSDNode
*
1338 getVecImm(SDNode
*N
) {
1339 SDValue
OpVal(0, 0);
1341 // Check to see if this buildvec has a single non-undef value in its elements.
1342 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
1343 if (N
->getOperand(i
).getOpcode() == ISD::UNDEF
) continue;
1344 if (OpVal
.getNode() == 0)
1345 OpVal
= N
->getOperand(i
);
1346 else if (OpVal
!= N
->getOperand(i
))
1350 if (OpVal
.getNode() != 0) {
1351 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(OpVal
)) {
1359 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1360 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1362 SDValue
SPU::get_vec_u18imm(SDNode
*N
, SelectionDAG
&DAG
,
1364 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1365 uint64_t Value
= CN
->getZExtValue();
1366 if (ValueType
== MVT::i64
) {
1367 uint64_t UValue
= CN
->getZExtValue();
1368 uint32_t upper
= uint32_t(UValue
>> 32);
1369 uint32_t lower
= uint32_t(UValue
);
1372 Value
= Value
>> 32;
1374 if (Value
<= 0x3ffff)
1375 return DAG
.getTargetConstant(Value
, ValueType
);
1381 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1382 /// and the value fits into a signed 16-bit constant, and if so, return the
1384 SDValue
SPU::get_vec_i16imm(SDNode
*N
, SelectionDAG
&DAG
,
1386 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1387 int64_t Value
= CN
->getSExtValue();
1388 if (ValueType
== MVT::i64
) {
1389 uint64_t UValue
= CN
->getZExtValue();
1390 uint32_t upper
= uint32_t(UValue
>> 32);
1391 uint32_t lower
= uint32_t(UValue
);
1394 Value
= Value
>> 32;
1396 if (Value
>= -(1 << 15) && Value
<= ((1 << 15) - 1)) {
1397 return DAG
.getTargetConstant(Value
, ValueType
);
1404 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1405 /// and the value fits into a signed 10-bit constant, and if so, return the
1407 SDValue
SPU::get_vec_i10imm(SDNode
*N
, SelectionDAG
&DAG
,
1409 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1410 int64_t Value
= CN
->getSExtValue();
1411 if (ValueType
== MVT::i64
) {
1412 uint64_t UValue
= CN
->getZExtValue();
1413 uint32_t upper
= uint32_t(UValue
>> 32);
1414 uint32_t lower
= uint32_t(UValue
);
1417 Value
= Value
>> 32;
1419 if (isS10Constant(Value
))
1420 return DAG
.getTargetConstant(Value
, ValueType
);
1426 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1427 /// and the value fits into a signed 8-bit constant, and if so, return the
1430 /// @note: The incoming vector is v16i8 because that's the only way we can load
1431 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1433 SDValue
SPU::get_vec_i8imm(SDNode
*N
, SelectionDAG
&DAG
,
1435 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1436 int Value
= (int) CN
->getZExtValue();
1437 if (ValueType
== MVT::i16
1438 && Value
<= 0xffff /* truncated from uint64_t */
1439 && ((short) Value
>> 8) == ((short) Value
& 0xff))
1440 return DAG
.getTargetConstant(Value
& 0xff, ValueType
);
1441 else if (ValueType
== MVT::i8
1442 && (Value
& 0xff) == Value
)
1443 return DAG
.getTargetConstant(Value
, ValueType
);
1449 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1450 /// and the value fits into a signed 16-bit constant, and if so, return the
1452 SDValue
SPU::get_ILHUvec_imm(SDNode
*N
, SelectionDAG
&DAG
,
1454 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1455 uint64_t Value
= CN
->getZExtValue();
1456 if ((ValueType
== MVT::i32
1457 && ((unsigned) Value
& 0xffff0000) == (unsigned) Value
)
1458 || (ValueType
== MVT::i64
&& (Value
& 0xffff0000) == Value
))
1459 return DAG
.getTargetConstant(Value
>> 16, ValueType
);
1465 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1466 SDValue
SPU::get_v4i32_imm(SDNode
*N
, SelectionDAG
&DAG
) {
1467 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1468 return DAG
.getTargetConstant((unsigned) CN
->getZExtValue(), MVT::i32
);
1474 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1475 SDValue
SPU::get_v2i64_imm(SDNode
*N
, SelectionDAG
&DAG
) {
1476 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1477 return DAG
.getTargetConstant((unsigned) CN
->getZExtValue(), MVT::i64
);
1483 //! Lower a BUILD_VECTOR instruction creatively:
1485 LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) {
1486 MVT VT
= Op
.getValueType();
1487 MVT EltVT
= VT
.getVectorElementType();
1488 DebugLoc dl
= Op
.getDebugLoc();
1489 BuildVectorSDNode
*BCN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
1490 assert(BCN
!= 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1491 unsigned minSplatBits
= EltVT
.getSizeInBits();
1493 if (minSplatBits
< 16)
1496 APInt APSplatBits
, APSplatUndef
;
1497 unsigned SplatBitSize
;
1500 if (!BCN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
1501 HasAnyUndefs
, minSplatBits
)
1502 || minSplatBits
< SplatBitSize
)
1503 return SDValue(); // Wasn't a constant vector or splat exceeded min
1505 uint64_t SplatBits
= APSplatBits
.getZExtValue();
1507 switch (VT
.getSimpleVT()) {
1509 cerr
<< "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1510 << VT
.getMVTString()
1515 uint32_t Value32
= uint32_t(SplatBits
);
1516 assert(SplatBitSize
== 32
1517 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1518 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1519 SDValue T
= DAG
.getConstant(Value32
, MVT::i32
);
1520 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v4f32
,
1521 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, T
,T
,T
,T
));
1525 uint64_t f64val
= uint64_t(SplatBits
);
1526 assert(SplatBitSize
== 64
1527 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1528 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1529 SDValue T
= DAG
.getConstant(f64val
, MVT::i64
);
1530 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v2f64
,
1531 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
, T
, T
));
1535 // 8-bit constants have to be expanded to 16-bits
1536 unsigned short Value16
= SplatBits
/* | (SplatBits << 8) */;
1537 SmallVector
<SDValue
, 8> Ops
;
1539 Ops
.assign(8, DAG
.getConstant(Value16
, MVT::i16
));
1540 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
,
1541 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v8i16
, &Ops
[0], Ops
.size()));
1544 unsigned short Value16
= SplatBits
;
1545 SDValue T
= DAG
.getConstant(Value16
, EltVT
);
1546 SmallVector
<SDValue
, 8> Ops
;
1549 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VT
, &Ops
[0], Ops
.size());
1552 SDValue T
= DAG
.getConstant(unsigned(SplatBits
), VT
.getVectorElementType());
1553 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VT
, T
, T
, T
, T
);
1556 SDValue T
= DAG
.getConstant(unsigned(SplatBits
), VT
.getVectorElementType());
1557 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VT
, T
, T
);
1560 return SPU::LowerV2I64Splat(VT
, DAG
, SplatBits
, dl
);
1570 SPU::LowerV2I64Splat(MVT OpVT
, SelectionDAG
& DAG
, uint64_t SplatVal
,
1572 uint32_t upper
= uint32_t(SplatVal
>> 32);
1573 uint32_t lower
= uint32_t(SplatVal
);
1575 if (upper
== lower
) {
1576 // Magic constant that can be matched by IL, ILA, et. al.
1577 SDValue Val
= DAG
.getTargetConstant(upper
, MVT::i32
);
1578 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, OpVT
,
1579 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1580 Val
, Val
, Val
, Val
));
1582 bool upper_special
, lower_special
;
1584 // NOTE: This code creates common-case shuffle masks that can be easily
1585 // detected as common expressions. It is not attempting to create highly
1586 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1588 // Detect if the upper or lower half is a special shuffle mask pattern:
1589 upper_special
= (upper
== 0 || upper
== 0xffffffff || upper
== 0x80000000);
1590 lower_special
= (lower
== 0 || lower
== 0xffffffff || lower
== 0x80000000);
1592 // Both upper and lower are special, lower to a constant pool load:
1593 if (lower_special
&& upper_special
) {
1594 SDValue SplatValCN
= DAG
.getConstant(SplatVal
, MVT::i64
);
1595 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
,
1596 SplatValCN
, SplatValCN
);
1601 SmallVector
<SDValue
, 16> ShufBytes
;
1604 // Create lower vector if not a special pattern
1605 if (!lower_special
) {
1606 SDValue LO32C
= DAG
.getConstant(lower
, MVT::i32
);
1607 LO32
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, OpVT
,
1608 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1609 LO32C
, LO32C
, LO32C
, LO32C
));
1612 // Create upper vector if not a special pattern
1613 if (!upper_special
) {
1614 SDValue HI32C
= DAG
.getConstant(upper
, MVT::i32
);
1615 HI32
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, OpVT
,
1616 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1617 HI32C
, HI32C
, HI32C
, HI32C
));
1620 // If either upper or lower are special, then the two input operands are
1621 // the same (basically, one of them is a "don't care")
1627 for (int i
= 0; i
< 4; ++i
) {
1629 for (int j
= 0; j
< 4; ++j
) {
1631 bool process_upper
, process_lower
;
1633 process_upper
= (upper_special
&& (i
& 1) == 0);
1634 process_lower
= (lower_special
&& (i
& 1) == 1);
1636 if (process_upper
|| process_lower
) {
1637 if ((process_upper
&& upper
== 0)
1638 || (process_lower
&& lower
== 0))
1640 else if ((process_upper
&& upper
== 0xffffffff)
1641 || (process_lower
&& lower
== 0xffffffff))
1643 else if ((process_upper
&& upper
== 0x80000000)
1644 || (process_lower
&& lower
== 0x80000000))
1645 val
|= (j
== 0 ? 0xe0 : 0x80);
1647 val
|= i
* 4 + j
+ ((i
& 1) * 16);
1650 ShufBytes
.push_back(DAG
.getConstant(val
, MVT::i32
));
1653 return DAG
.getNode(SPUISD::SHUFB
, dl
, OpVT
, HI32
, LO32
,
1654 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1655 &ShufBytes
[0], ShufBytes
.size()));
1659 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1660 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1661 /// permutation vector, V3, is monotonically increasing with one "exception"
1662 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1663 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1664 /// In either case, the net result is going to eventually invoke SHUFB to
1665 /// permute/shuffle the bytes from V1 and V2.
1667 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1668 /// control word for byte/halfword/word insertion. This takes care of a single
1669 /// element move from V2 into V1.
1671 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1672 static SDValue
LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
) {
1673 const ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(Op
);
1674 SDValue V1
= Op
.getOperand(0);
1675 SDValue V2
= Op
.getOperand(1);
1676 DebugLoc dl
= Op
.getDebugLoc();
1678 if (V2
.getOpcode() == ISD::UNDEF
) V2
= V1
;
1680 // If we have a single element being moved from V1 to V2, this can be handled
1681 // using the C*[DX] compute mask instructions, but the vector elements have
1682 // to be monotonically increasing with one exception element.
1683 MVT VecVT
= V1
.getValueType();
1684 MVT EltVT
= VecVT
.getVectorElementType();
1685 unsigned EltsFromV2
= 0;
1687 unsigned V2EltIdx0
= 0;
1688 unsigned CurrElt
= 0;
1689 unsigned MaxElts
= VecVT
.getVectorNumElements();
1690 unsigned PrevElt
= 0;
1692 bool monotonic
= true;
1695 if (EltVT
== MVT::i8
) {
1697 } else if (EltVT
== MVT::i16
) {
1699 } else if (EltVT
== MVT::i32
|| EltVT
== MVT::f32
) {
1701 } else if (EltVT
== MVT::i64
|| EltVT
== MVT::f64
) {
1704 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1706 for (unsigned i
= 0; i
!= MaxElts
; ++i
) {
1707 if (SVN
->getMaskElt(i
) < 0)
1710 unsigned SrcElt
= SVN
->getMaskElt(i
);
1713 if (SrcElt
>= V2EltIdx0
) {
1714 if (1 >= (++EltsFromV2
)) {
1715 V2Elt
= (V2EltIdx0
- SrcElt
) << 2;
1717 } else if (CurrElt
!= SrcElt
) {
1725 if (PrevElt
> 0 && SrcElt
< MaxElts
) {
1726 if ((PrevElt
== SrcElt
- 1)
1727 || (PrevElt
== MaxElts
- 1 && SrcElt
== 0)) {
1734 } else if (PrevElt
== 0) {
1735 // First time through, need to keep track of previous element
1738 // This isn't a rotation, takes elements from vector 2
1744 if (EltsFromV2
== 1 && monotonic
) {
1745 // Compute mask and shuffle
1746 MachineFunction
&MF
= DAG
.getMachineFunction();
1747 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1748 unsigned VReg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
1749 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1750 // Initialize temporary register to 0
1751 SDValue InitTempReg
=
1752 DAG
.getCopyToReg(DAG
.getEntryNode(), dl
, VReg
, DAG
.getConstant(0, PtrVT
));
1753 // Copy register's contents as index in SHUFFLE_MASK:
1754 SDValue ShufMaskOp
=
1755 DAG
.getNode(SPUISD::SHUFFLE_MASK
, dl
, MVT::v4i32
,
1756 DAG
.getTargetConstant(V2Elt
, MVT::i32
),
1757 DAG
.getCopyFromReg(InitTempReg
, dl
, VReg
, PtrVT
));
1758 // Use shuffle mask in SHUFB synthetic instruction:
1759 return DAG
.getNode(SPUISD::SHUFB
, dl
, V1
.getValueType(), V2
, V1
,
1761 } else if (rotate
) {
1762 int rotamt
= (MaxElts
- V0Elt
) * EltVT
.getSizeInBits()/8;
1764 return DAG
.getNode(SPUISD::ROTBYTES_LEFT
, dl
, V1
.getValueType(),
1765 V1
, DAG
.getConstant(rotamt
, MVT::i16
));
1767 // Convert the SHUFFLE_VECTOR mask's input element units to the
1769 unsigned BytesPerElement
= EltVT
.getSizeInBits()/8;
1771 SmallVector
<SDValue
, 16> ResultMask
;
1772 for (unsigned i
= 0, e
= MaxElts
; i
!= e
; ++i
) {
1773 unsigned SrcElt
= SVN
->getMaskElt(i
) < 0 ? 0 : SVN
->getMaskElt(i
);
1775 for (unsigned j
= 0; j
< BytesPerElement
; ++j
)
1776 ResultMask
.push_back(DAG
.getConstant(SrcElt
*BytesPerElement
+j
,MVT::i8
));
1779 SDValue VPermMask
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v16i8
,
1780 &ResultMask
[0], ResultMask
.size());
1781 return DAG
.getNode(SPUISD::SHUFB
, dl
, V1
.getValueType(), V1
, V2
, VPermMask
);
1785 static SDValue
LowerSCALAR_TO_VECTOR(SDValue Op
, SelectionDAG
&DAG
) {
1786 SDValue Op0
= Op
.getOperand(0); // Op0 = the scalar
1787 DebugLoc dl
= Op
.getDebugLoc();
1789 if (Op0
.getNode()->getOpcode() == ISD::Constant
) {
1790 // For a constant, build the appropriate constant vector, which will
1791 // eventually simplify to a vector register load.
1793 ConstantSDNode
*CN
= cast
<ConstantSDNode
>(Op0
.getNode());
1794 SmallVector
<SDValue
, 16> ConstVecValues
;
1798 // Create a constant vector:
1799 switch (Op
.getValueType().getSimpleVT()) {
1800 default: assert(0 && "Unexpected constant value type in "
1801 "LowerSCALAR_TO_VECTOR");
1802 case MVT::v16i8
: n_copies
= 16; VT
= MVT::i8
; break;
1803 case MVT::v8i16
: n_copies
= 8; VT
= MVT::i16
; break;
1804 case MVT::v4i32
: n_copies
= 4; VT
= MVT::i32
; break;
1805 case MVT::v4f32
: n_copies
= 4; VT
= MVT::f32
; break;
1806 case MVT::v2i64
: n_copies
= 2; VT
= MVT::i64
; break;
1807 case MVT::v2f64
: n_copies
= 2; VT
= MVT::f64
; break;
1810 SDValue CValue
= DAG
.getConstant(CN
->getZExtValue(), VT
);
1811 for (size_t j
= 0; j
< n_copies
; ++j
)
1812 ConstVecValues
.push_back(CValue
);
1814 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, Op
.getValueType(),
1815 &ConstVecValues
[0], ConstVecValues
.size());
1817 // Otherwise, copy the value from one register to another:
1818 switch (Op0
.getValueType().getSimpleVT()) {
1819 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1826 return DAG
.getNode(SPUISD::PREFSLOT2VEC
, dl
, Op
.getValueType(), Op0
, Op0
);
1833 static SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) {
1834 MVT VT
= Op
.getValueType();
1835 SDValue N
= Op
.getOperand(0);
1836 SDValue Elt
= Op
.getOperand(1);
1837 DebugLoc dl
= Op
.getDebugLoc();
1840 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Elt
)) {
1841 // Constant argument:
1842 int EltNo
= (int) C
->getZExtValue();
1845 if (VT
== MVT::i8
&& EltNo
>= 16)
1846 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1847 else if (VT
== MVT::i16
&& EltNo
>= 8)
1848 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1849 else if (VT
== MVT::i32
&& EltNo
>= 4)
1850 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1851 else if (VT
== MVT::i64
&& EltNo
>= 2)
1852 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1854 if (EltNo
== 0 && (VT
== MVT::i32
|| VT
== MVT::i64
)) {
1855 // i32 and i64: Element 0 is the preferred slot
1856 return DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
, N
);
1859 // Need to generate shuffle mask and extract:
1860 int prefslot_begin
= -1, prefslot_end
= -1;
1861 int elt_byte
= EltNo
* VT
.getSizeInBits() / 8;
1863 switch (VT
.getSimpleVT()) {
1865 assert(false && "Invalid value type!");
1867 prefslot_begin
= prefslot_end
= 3;
1871 prefslot_begin
= 2; prefslot_end
= 3;
1876 prefslot_begin
= 0; prefslot_end
= 3;
1881 prefslot_begin
= 0; prefslot_end
= 7;
1886 assert(prefslot_begin
!= -1 && prefslot_end
!= -1 &&
1887 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1889 unsigned int ShufBytes
[16];
1890 for (int i
= 0; i
< 16; ++i
) {
1891 // zero fill uppper part of preferred slot, don't care about the
1893 unsigned int mask_val
;
1894 if (i
<= prefslot_end
) {
1896 ((i
< prefslot_begin
)
1898 : elt_byte
+ (i
- prefslot_begin
));
1900 ShufBytes
[i
] = mask_val
;
1902 ShufBytes
[i
] = ShufBytes
[i
% (prefslot_end
+ 1)];
1905 SDValue ShufMask
[4];
1906 for (unsigned i
= 0; i
< sizeof(ShufMask
)/sizeof(ShufMask
[0]); ++i
) {
1907 unsigned bidx
= i
* 4;
1908 unsigned int bits
= ((ShufBytes
[bidx
] << 24) |
1909 (ShufBytes
[bidx
+1] << 16) |
1910 (ShufBytes
[bidx
+2] << 8) |
1912 ShufMask
[i
] = DAG
.getConstant(bits
, MVT::i32
);
1915 SDValue ShufMaskVec
=
1916 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1917 &ShufMask
[0], sizeof(ShufMask
)/sizeof(ShufMask
[0]));
1919 retval
= DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
,
1920 DAG
.getNode(SPUISD::SHUFB
, dl
, N
.getValueType(),
1921 N
, N
, ShufMaskVec
));
1923 // Variable index: Rotate the requested element into slot 0, then replicate
1924 // slot 0 across the vector
1925 MVT VecVT
= N
.getValueType();
1926 if (!VecVT
.isSimple() || !VecVT
.isVector() || !VecVT
.is128BitVector()) {
1927 cerr
<< "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1931 // Make life easier by making sure the index is zero-extended to i32
1932 if (Elt
.getValueType() != MVT::i32
)
1933 Elt
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::i32
, Elt
);
1935 // Scale the index to a bit/byte shift quantity
1937 APInt(32, uint64_t(16 / N
.getValueType().getVectorNumElements()), false);
1938 unsigned scaleShift
= scaleFactor
.logBase2();
1941 if (scaleShift
> 0) {
1942 // Scale the shift factor:
1943 Elt
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, Elt
,
1944 DAG
.getConstant(scaleShift
, MVT::i32
));
1947 vecShift
= DAG
.getNode(SPUISD::SHLQUAD_L_BYTES
, dl
, VecVT
, N
, Elt
);
1949 // Replicate the bytes starting at byte 0 across the entire vector (for
1950 // consistency with the notion of a unified register set)
1953 switch (VT
.getSimpleVT()) {
1955 cerr
<< "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
1959 SDValue factor
= DAG
.getConstant(0x00000000, MVT::i32
);
1960 replicate
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1961 factor
, factor
, factor
, factor
);
1965 SDValue factor
= DAG
.getConstant(0x00010001, MVT::i32
);
1966 replicate
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1967 factor
, factor
, factor
, factor
);
1972 SDValue factor
= DAG
.getConstant(0x00010203, MVT::i32
);
1973 replicate
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1974 factor
, factor
, factor
, factor
);
1979 SDValue loFactor
= DAG
.getConstant(0x00010203, MVT::i32
);
1980 SDValue hiFactor
= DAG
.getConstant(0x04050607, MVT::i32
);
1981 replicate
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1982 loFactor
, hiFactor
, loFactor
, hiFactor
);
1987 retval
= DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
,
1988 DAG
.getNode(SPUISD::SHUFB
, dl
, VecVT
,
1989 vecShift
, vecShift
, replicate
));
1995 static SDValue
LowerINSERT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) {
1996 SDValue VecOp
= Op
.getOperand(0);
1997 SDValue ValOp
= Op
.getOperand(1);
1998 SDValue IdxOp
= Op
.getOperand(2);
1999 DebugLoc dl
= Op
.getDebugLoc();
2000 MVT VT
= Op
.getValueType();
2002 ConstantSDNode
*CN
= cast
<ConstantSDNode
>(IdxOp
);
2003 assert(CN
!= 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2005 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2006 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2007 SDValue Pointer
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
2008 DAG
.getRegister(SPU::R1
, PtrVT
),
2009 DAG
.getConstant(CN
->getSExtValue(), PtrVT
));
2010 SDValue ShufMask
= DAG
.getNode(SPUISD::SHUFFLE_MASK
, dl
, VT
, Pointer
);
2013 DAG
.getNode(SPUISD::SHUFB
, dl
, VT
,
2014 DAG
.getNode(ISD::SCALAR_TO_VECTOR
, dl
, VT
, ValOp
),
2016 DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v4i32
, ShufMask
));
2021 static SDValue
LowerI8Math(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
,
2022 const TargetLowering
&TLI
)
2024 SDValue N0
= Op
.getOperand(0); // Everything has at least one operand
2025 DebugLoc dl
= Op
.getDebugLoc();
2026 MVT ShiftVT
= TLI
.getShiftAmountTy();
2028 assert(Op
.getValueType() == MVT::i8
);
2031 assert(0 && "Unhandled i8 math operator");
2035 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2037 SDValue N1
= Op
.getOperand(1);
2038 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N0
);
2039 N1
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N1
);
2040 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2041 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2046 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2048 SDValue N1
= Op
.getOperand(1);
2049 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N0
);
2050 N1
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N1
);
2051 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2052 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2056 SDValue N1
= Op
.getOperand(1);
2057 MVT N1VT
= N1
.getValueType();
2059 N0
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::i16
, N0
);
2060 if (!N1VT
.bitsEq(ShiftVT
)) {
2061 unsigned N1Opc
= N1
.getValueType().bitsLT(ShiftVT
)
2064 N1
= DAG
.getNode(N1Opc
, dl
, ShiftVT
, N1
);
2067 // Replicate lower 8-bits into upper 8:
2069 DAG
.getNode(ISD::OR
, dl
, MVT::i16
, N0
,
2070 DAG
.getNode(ISD::SHL
, dl
, MVT::i16
,
2071 N0
, DAG
.getConstant(8, MVT::i32
)));
2073 // Truncate back down to i8
2074 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2075 DAG
.getNode(Opc
, dl
, MVT::i16
, ExpandArg
, N1
));
2079 SDValue N1
= Op
.getOperand(1);
2080 MVT N1VT
= N1
.getValueType();
2082 N0
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::i16
, N0
);
2083 if (!N1VT
.bitsEq(ShiftVT
)) {
2084 unsigned N1Opc
= ISD::ZERO_EXTEND
;
2086 if (N1
.getValueType().bitsGT(ShiftVT
))
2087 N1Opc
= ISD::TRUNCATE
;
2089 N1
= DAG
.getNode(N1Opc
, dl
, ShiftVT
, N1
);
2092 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2093 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2096 SDValue N1
= Op
.getOperand(1);
2097 MVT N1VT
= N1
.getValueType();
2099 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N0
);
2100 if (!N1VT
.bitsEq(ShiftVT
)) {
2101 unsigned N1Opc
= ISD::SIGN_EXTEND
;
2103 if (N1VT
.bitsGT(ShiftVT
))
2104 N1Opc
= ISD::TRUNCATE
;
2105 N1
= DAG
.getNode(N1Opc
, dl
, ShiftVT
, N1
);
2108 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2109 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2112 SDValue N1
= Op
.getOperand(1);
2114 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N0
);
2115 N1
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N1
);
2116 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2117 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2125 //! Lower byte immediate operations for v16i8 vectors:
2127 LowerByteImmed(SDValue Op
, SelectionDAG
&DAG
) {
2130 MVT VT
= Op
.getValueType();
2131 DebugLoc dl
= Op
.getDebugLoc();
2133 ConstVec
= Op
.getOperand(0);
2134 Arg
= Op
.getOperand(1);
2135 if (ConstVec
.getNode()->getOpcode() != ISD::BUILD_VECTOR
) {
2136 if (ConstVec
.getNode()->getOpcode() == ISD::BIT_CONVERT
) {
2137 ConstVec
= ConstVec
.getOperand(0);
2139 ConstVec
= Op
.getOperand(1);
2140 Arg
= Op
.getOperand(0);
2141 if (ConstVec
.getNode()->getOpcode() == ISD::BIT_CONVERT
) {
2142 ConstVec
= ConstVec
.getOperand(0);
2147 if (ConstVec
.getNode()->getOpcode() == ISD::BUILD_VECTOR
) {
2148 BuildVectorSDNode
*BCN
= dyn_cast
<BuildVectorSDNode
>(ConstVec
.getNode());
2149 assert(BCN
!= 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2151 APInt APSplatBits
, APSplatUndef
;
2152 unsigned SplatBitSize
;
2154 unsigned minSplatBits
= VT
.getVectorElementType().getSizeInBits();
2156 if (BCN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
2157 HasAnyUndefs
, minSplatBits
)
2158 && minSplatBits
<= SplatBitSize
) {
2159 uint64_t SplatBits
= APSplatBits
.getZExtValue();
2160 SDValue tc
= DAG
.getTargetConstant(SplatBits
& 0xff, MVT::i8
);
2162 SmallVector
<SDValue
, 16> tcVec
;
2163 tcVec
.assign(16, tc
);
2164 return DAG
.getNode(Op
.getNode()->getOpcode(), dl
, VT
, Arg
,
2165 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VT
, &tcVec
[0], tcVec
.size()));
2169 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2170 // lowered. Return the operation, rather than a null SDValue.
2174 //! Custom lowering for CTPOP (count population)
2176 Custom lowering code that counts the number ones in the input
2177 operand. SPU has such an instruction, but it counts the number of
2178 ones per byte, which then have to be accumulated.
2180 static SDValue
LowerCTPOP(SDValue Op
, SelectionDAG
&DAG
) {
2181 MVT VT
= Op
.getValueType();
2182 MVT vecVT
= MVT::getVectorVT(VT
, (128 / VT
.getSizeInBits()));
2183 DebugLoc dl
= Op
.getDebugLoc();
2185 switch (VT
.getSimpleVT()) {
2187 assert(false && "Invalid value type!");
2189 SDValue N
= Op
.getOperand(0);
2190 SDValue Elt0
= DAG
.getConstant(0, MVT::i32
);
2192 SDValue Promote
= DAG
.getNode(SPUISD::PREFSLOT2VEC
, dl
, vecVT
, N
, N
);
2193 SDValue CNTB
= DAG
.getNode(SPUISD::CNTB
, dl
, vecVT
, Promote
);
2195 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i8
, CNTB
, Elt0
);
2199 MachineFunction
&MF
= DAG
.getMachineFunction();
2200 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
2202 unsigned CNTB_reg
= RegInfo
.createVirtualRegister(&SPU::R16CRegClass
);
2204 SDValue N
= Op
.getOperand(0);
2205 SDValue Elt0
= DAG
.getConstant(0, MVT::i16
);
2206 SDValue Mask0
= DAG
.getConstant(0x0f, MVT::i16
);
2207 SDValue Shift1
= DAG
.getConstant(8, MVT::i32
);
2209 SDValue Promote
= DAG
.getNode(SPUISD::PREFSLOT2VEC
, dl
, vecVT
, N
, N
);
2210 SDValue CNTB
= DAG
.getNode(SPUISD::CNTB
, dl
, vecVT
, Promote
);
2212 // CNTB_result becomes the chain to which all of the virtual registers
2213 // CNTB_reg, SUM1_reg become associated:
2214 SDValue CNTB_result
=
2215 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i16
, CNTB
, Elt0
);
2217 SDValue CNTB_rescopy
=
2218 DAG
.getCopyToReg(CNTB_result
, dl
, CNTB_reg
, CNTB_result
);
2220 SDValue Tmp1
= DAG
.getCopyFromReg(CNTB_rescopy
, dl
, CNTB_reg
, MVT::i16
);
2222 return DAG
.getNode(ISD::AND
, dl
, MVT::i16
,
2223 DAG
.getNode(ISD::ADD
, dl
, MVT::i16
,
2224 DAG
.getNode(ISD::SRL
, dl
, MVT::i16
,
2231 MachineFunction
&MF
= DAG
.getMachineFunction();
2232 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
2234 unsigned CNTB_reg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
2235 unsigned SUM1_reg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
2237 SDValue N
= Op
.getOperand(0);
2238 SDValue Elt0
= DAG
.getConstant(0, MVT::i32
);
2239 SDValue Mask0
= DAG
.getConstant(0xff, MVT::i32
);
2240 SDValue Shift1
= DAG
.getConstant(16, MVT::i32
);
2241 SDValue Shift2
= DAG
.getConstant(8, MVT::i32
);
2243 SDValue Promote
= DAG
.getNode(SPUISD::PREFSLOT2VEC
, dl
, vecVT
, N
, N
);
2244 SDValue CNTB
= DAG
.getNode(SPUISD::CNTB
, dl
, vecVT
, Promote
);
2246 // CNTB_result becomes the chain to which all of the virtual registers
2247 // CNTB_reg, SUM1_reg become associated:
2248 SDValue CNTB_result
=
2249 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i32
, CNTB
, Elt0
);
2251 SDValue CNTB_rescopy
=
2252 DAG
.getCopyToReg(CNTB_result
, dl
, CNTB_reg
, CNTB_result
);
2255 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
2256 DAG
.getCopyFromReg(CNTB_rescopy
, dl
, CNTB_reg
, MVT::i32
),
2260 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Comp1
,
2261 DAG
.getCopyFromReg(CNTB_rescopy
, dl
, CNTB_reg
, MVT::i32
));
2263 SDValue Sum1_rescopy
=
2264 DAG
.getCopyToReg(CNTB_result
, dl
, SUM1_reg
, Sum1
);
2267 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
2268 DAG
.getCopyFromReg(Sum1_rescopy
, dl
, SUM1_reg
, MVT::i32
),
2271 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Comp2
,
2272 DAG
.getCopyFromReg(Sum1_rescopy
, dl
, SUM1_reg
, MVT::i32
));
2274 return DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Sum2
, Mask0
);
2284 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2286 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2287 All conversions to i64 are expanded to a libcall.
2289 static SDValue
LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
,
2290 SPUTargetLowering
&TLI
) {
2291 MVT OpVT
= Op
.getValueType();
2292 SDValue Op0
= Op
.getOperand(0);
2293 MVT Op0VT
= Op0
.getValueType();
2295 if ((OpVT
== MVT::i32
&& Op0VT
== MVT::f64
)
2296 || OpVT
== MVT::i64
) {
2297 // Convert f32 / f64 to i32 / i64 via libcall.
2299 (Op
.getOpcode() == ISD::FP_TO_SINT
)
2300 ? RTLIB::getFPTOSINT(Op0VT
, OpVT
)
2301 : RTLIB::getFPTOUINT(Op0VT
, OpVT
);
2302 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unexpectd fp-to-int conversion!");
2304 return ExpandLibCall(LC
, Op
, DAG
, false, Dummy
, TLI
);
2310 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2312 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2313 All conversions from i64 are expanded to a libcall.
2315 static SDValue
LowerINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
,
2316 SPUTargetLowering
&TLI
) {
2317 MVT OpVT
= Op
.getValueType();
2318 SDValue Op0
= Op
.getOperand(0);
2319 MVT Op0VT
= Op0
.getValueType();
2321 if ((OpVT
== MVT::f64
&& Op0VT
== MVT::i32
)
2322 || Op0VT
== MVT::i64
) {
2323 // Convert i32, i64 to f64 via libcall:
2325 (Op
.getOpcode() == ISD::SINT_TO_FP
)
2326 ? RTLIB::getSINTTOFP(Op0VT
, OpVT
)
2327 : RTLIB::getUINTTOFP(Op0VT
, OpVT
);
2328 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unexpectd int-to-fp conversion!");
2330 return ExpandLibCall(LC
, Op
, DAG
, false, Dummy
, TLI
);
2336 //! Lower ISD::SETCC
2338 This handles MVT::f64 (double floating point) condition lowering
2340 static SDValue
LowerSETCC(SDValue Op
, SelectionDAG
&DAG
,
2341 const TargetLowering
&TLI
) {
2342 CondCodeSDNode
*CC
= dyn_cast
<CondCodeSDNode
>(Op
.getOperand(2));
2343 DebugLoc dl
= Op
.getDebugLoc();
2344 assert(CC
!= 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2346 SDValue lhs
= Op
.getOperand(0);
2347 SDValue rhs
= Op
.getOperand(1);
2348 MVT lhsVT
= lhs
.getValueType();
2349 assert(lhsVT
== MVT::f64
&& "LowerSETCC: type other than MVT::64\n");
2351 MVT ccResultVT
= TLI
.getSetCCResultType(lhs
.getValueType());
2352 APInt ccResultOnes
= APInt::getAllOnesValue(ccResultVT
.getSizeInBits());
2353 MVT
IntVT(MVT::i64
);
2355 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2356 // selected to a NOP:
2357 SDValue i64lhs
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, IntVT
, lhs
);
2359 DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
,
2360 DAG
.getNode(ISD::SRL
, dl
, IntVT
,
2361 i64lhs
, DAG
.getConstant(32, MVT::i32
)));
2362 SDValue lhsHi32abs
=
2363 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
2364 lhsHi32
, DAG
.getConstant(0x7fffffff, MVT::i32
));
2366 DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, i64lhs
);
2368 // SETO and SETUO only use the lhs operand:
2369 if (CC
->get() == ISD::SETO
) {
2370 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2372 APInt ccResultAllOnes
= APInt::getAllOnesValue(ccResultVT
.getSizeInBits());
2373 return DAG
.getNode(ISD::XOR
, dl
, ccResultVT
,
2374 DAG
.getSetCC(dl
, ccResultVT
,
2375 lhs
, DAG
.getConstantFP(0.0, lhsVT
),
2377 DAG
.getConstant(ccResultAllOnes
, ccResultVT
));
2378 } else if (CC
->get() == ISD::SETUO
) {
2379 // Evaluates to true if Op0 is [SQ]NaN
2380 return DAG
.getNode(ISD::AND
, dl
, ccResultVT
,
2381 DAG
.getSetCC(dl
, ccResultVT
,
2383 DAG
.getConstant(0x7ff00000, MVT::i32
),
2385 DAG
.getSetCC(dl
, ccResultVT
,
2387 DAG
.getConstant(0, MVT::i32
),
2391 SDValue i64rhs
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, IntVT
, rhs
);
2393 DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
,
2394 DAG
.getNode(ISD::SRL
, dl
, IntVT
,
2395 i64rhs
, DAG
.getConstant(32, MVT::i32
)));
2397 // If a value is negative, subtract from the sign magnitude constant:
2398 SDValue signMag2TC
= DAG
.getConstant(0x8000000000000000ULL
, IntVT
);
2400 // Convert the sign-magnitude representation into 2's complement:
2401 SDValue lhsSelectMask
= DAG
.getNode(ISD::SRA
, dl
, ccResultVT
,
2402 lhsHi32
, DAG
.getConstant(31, MVT::i32
));
2403 SDValue lhsSignMag2TC
= DAG
.getNode(ISD::SUB
, dl
, IntVT
, signMag2TC
, i64lhs
);
2405 DAG
.getNode(ISD::SELECT
, dl
, IntVT
,
2406 lhsSelectMask
, lhsSignMag2TC
, i64lhs
);
2408 SDValue rhsSelectMask
= DAG
.getNode(ISD::SRA
, dl
, ccResultVT
,
2409 rhsHi32
, DAG
.getConstant(31, MVT::i32
));
2410 SDValue rhsSignMag2TC
= DAG
.getNode(ISD::SUB
, dl
, IntVT
, signMag2TC
, i64rhs
);
2412 DAG
.getNode(ISD::SELECT
, dl
, IntVT
,
2413 rhsSelectMask
, rhsSignMag2TC
, i64rhs
);
2417 switch (CC
->get()) {
2420 compareOp
= ISD::SETEQ
; break;
2423 compareOp
= ISD::SETGT
; break;
2426 compareOp
= ISD::SETGE
; break;
2429 compareOp
= ISD::SETLT
; break;
2432 compareOp
= ISD::SETLE
; break;
2435 compareOp
= ISD::SETNE
; break;
2437 cerr
<< "CellSPU ISel Select: unimplemented f64 condition\n";
2443 DAG
.getSetCC(dl
, ccResultVT
, lhsSelect
, rhsSelect
,
2444 (ISD::CondCode
) compareOp
);
2446 if ((CC
->get() & 0x8) == 0) {
2447 // Ordered comparison:
2448 SDValue lhsNaN
= DAG
.getSetCC(dl
, ccResultVT
,
2449 lhs
, DAG
.getConstantFP(0.0, MVT::f64
),
2451 SDValue rhsNaN
= DAG
.getSetCC(dl
, ccResultVT
,
2452 rhs
, DAG
.getConstantFP(0.0, MVT::f64
),
2454 SDValue ordered
= DAG
.getNode(ISD::AND
, dl
, ccResultVT
, lhsNaN
, rhsNaN
);
2456 result
= DAG
.getNode(ISD::AND
, dl
, ccResultVT
, ordered
, result
);
2462 //! Lower ISD::SELECT_CC
2464 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2467 \note Need to revisit this in the future: if the code path through the true
2468 and false value computations is longer than the latency of a branch (6
2469 cycles), then it would be more advantageous to branch and insert a new basic
2470 block and branch on the condition. However, this code does not make that
2471 assumption, given the simplisitc uses so far.
2474 static SDValue
LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
,
2475 const TargetLowering
&TLI
) {
2476 MVT VT
= Op
.getValueType();
2477 SDValue lhs
= Op
.getOperand(0);
2478 SDValue rhs
= Op
.getOperand(1);
2479 SDValue trueval
= Op
.getOperand(2);
2480 SDValue falseval
= Op
.getOperand(3);
2481 SDValue condition
= Op
.getOperand(4);
2482 DebugLoc dl
= Op
.getDebugLoc();
2484 // NOTE: SELB's arguments: $rA, $rB, $mask
2486 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2487 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2488 // condition was true and 0s where the condition was false. Hence, the
2489 // arguments to SELB get reversed.
2491 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2492 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2493 // with another "cannot select select_cc" assert:
2495 SDValue compare
= DAG
.getNode(ISD::SETCC
, dl
,
2496 TLI
.getSetCCResultType(Op
.getValueType()),
2497 lhs
, rhs
, condition
);
2498 return DAG
.getNode(SPUISD::SELB
, dl
, VT
, falseval
, trueval
, compare
);
2501 //! Custom lower ISD::TRUNCATE
2502 static SDValue
LowerTRUNCATE(SDValue Op
, SelectionDAG
&DAG
)
2504 // Type to truncate to
2505 MVT VT
= Op
.getValueType();
2506 MVT::SimpleValueType simpleVT
= VT
.getSimpleVT();
2507 MVT VecVT
= MVT::getVectorVT(VT
, (128 / VT
.getSizeInBits()));
2508 DebugLoc dl
= Op
.getDebugLoc();
2510 // Type to truncate from
2511 SDValue Op0
= Op
.getOperand(0);
2512 MVT Op0VT
= Op0
.getValueType();
2514 if (Op0VT
.getSimpleVT() == MVT::i128
&& simpleVT
== MVT::i64
) {
2515 // Create shuffle mask, least significant doubleword of quadword
2516 unsigned maskHigh
= 0x08090a0b;
2517 unsigned maskLow
= 0x0c0d0e0f;
2518 // Use a shuffle to perform the truncation
2519 SDValue shufMask
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
2520 DAG
.getConstant(maskHigh
, MVT::i32
),
2521 DAG
.getConstant(maskLow
, MVT::i32
),
2522 DAG
.getConstant(maskHigh
, MVT::i32
),
2523 DAG
.getConstant(maskLow
, MVT::i32
));
2525 SDValue truncShuffle
= DAG
.getNode(SPUISD::SHUFB
, dl
, VecVT
,
2526 Op0
, Op0
, shufMask
);
2528 return DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
, truncShuffle
);
2531 return SDValue(); // Leave the truncate unmolested
2534 //! Custom (target-specific) lowering entry point
2536 This is where LLVM's DAG selection process calls to do target-specific
2540 SPUTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
)
2542 unsigned Opc
= (unsigned) Op
.getOpcode();
2543 MVT VT
= Op
.getValueType();
2547 cerr
<< "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2548 cerr
<< "Op.getOpcode() = " << Opc
<< "\n";
2549 cerr
<< "*Op.getNode():\n";
2550 Op
.getNode()->dump();
2557 return LowerLOAD(Op
, DAG
, SPUTM
.getSubtargetImpl());
2559 return LowerSTORE(Op
, DAG
, SPUTM
.getSubtargetImpl());
2560 case ISD::ConstantPool
:
2561 return LowerConstantPool(Op
, DAG
, SPUTM
.getSubtargetImpl());
2562 case ISD::GlobalAddress
:
2563 return LowerGlobalAddress(Op
, DAG
, SPUTM
.getSubtargetImpl());
2564 case ISD::JumpTable
:
2565 return LowerJumpTable(Op
, DAG
, SPUTM
.getSubtargetImpl());
2566 case ISD::ConstantFP
:
2567 return LowerConstantFP(Op
, DAG
);
2568 case ISD::FORMAL_ARGUMENTS
:
2569 return LowerFORMAL_ARGUMENTS(Op
, DAG
, VarArgsFrameIndex
);
2571 return LowerCALL(Op
, DAG
, SPUTM
.getSubtargetImpl());
2573 return LowerRET(Op
, DAG
, getTargetMachine());
2575 // i8, i64 math ops:
2584 return LowerI8Math(Op
, DAG
, Opc
, *this);
2588 case ISD::FP_TO_SINT
:
2589 case ISD::FP_TO_UINT
:
2590 return LowerFP_TO_INT(Op
, DAG
, *this);
2592 case ISD::SINT_TO_FP
:
2593 case ISD::UINT_TO_FP
:
2594 return LowerINT_TO_FP(Op
, DAG
, *this);
2596 // Vector-related lowering.
2597 case ISD::BUILD_VECTOR
:
2598 return LowerBUILD_VECTOR(Op
, DAG
);
2599 case ISD::SCALAR_TO_VECTOR
:
2600 return LowerSCALAR_TO_VECTOR(Op
, DAG
);
2601 case ISD::VECTOR_SHUFFLE
:
2602 return LowerVECTOR_SHUFFLE(Op
, DAG
);
2603 case ISD::EXTRACT_VECTOR_ELT
:
2604 return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
2605 case ISD::INSERT_VECTOR_ELT
:
2606 return LowerINSERT_VECTOR_ELT(Op
, DAG
);
2608 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2612 return LowerByteImmed(Op
, DAG
);
2614 // Vector and i8 multiply:
2617 return LowerI8Math(Op
, DAG
, Opc
, *this);
2620 return LowerCTPOP(Op
, DAG
);
2622 case ISD::SELECT_CC
:
2623 return LowerSELECT_CC(Op
, DAG
, *this);
2626 return LowerSETCC(Op
, DAG
, *this);
2629 return LowerTRUNCATE(Op
, DAG
);
2635 void SPUTargetLowering::ReplaceNodeResults(SDNode
*N
,
2636 SmallVectorImpl
<SDValue
>&Results
,
2640 unsigned Opc
= (unsigned) N
->getOpcode();
2641 MVT OpVT
= N
->getValueType(0);
2645 cerr
<< "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2646 cerr
<< "Op.getOpcode() = " << Opc
<< "\n";
2647 cerr
<< "*Op.getNode():\n";
2655 /* Otherwise, return unchanged */
2658 //===----------------------------------------------------------------------===//
2659 // Target Optimization Hooks
2660 //===----------------------------------------------------------------------===//
2663 SPUTargetLowering::PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const
2666 TargetMachine
&TM
= getTargetMachine();
2668 const SPUSubtarget
*ST
= SPUTM
.getSubtargetImpl();
2669 SelectionDAG
&DAG
= DCI
.DAG
;
2670 SDValue Op0
= N
->getOperand(0); // everything has at least one operand
2671 MVT NodeVT
= N
->getValueType(0); // The node's value type
2672 MVT Op0VT
= Op0
.getValueType(); // The first operand's result
2673 SDValue Result
; // Initially, empty result
2674 DebugLoc dl
= N
->getDebugLoc();
2676 switch (N
->getOpcode()) {
2679 SDValue Op1
= N
->getOperand(1);
2681 if (Op0
.getOpcode() == SPUISD::IndirectAddr
2682 || Op1
.getOpcode() == SPUISD::IndirectAddr
) {
2683 // Normalize the operands to reduce repeated code
2684 SDValue IndirectArg
= Op0
, AddArg
= Op1
;
2686 if (Op1
.getOpcode() == SPUISD::IndirectAddr
) {
2691 if (isa
<ConstantSDNode
>(AddArg
)) {
2692 ConstantSDNode
*CN0
= cast
<ConstantSDNode
> (AddArg
);
2693 SDValue IndOp1
= IndirectArg
.getOperand(1);
2695 if (CN0
->isNullValue()) {
2696 // (add (SPUindirect <arg>, <arg>), 0) ->
2697 // (SPUindirect <arg>, <arg>)
2699 #if !defined(NDEBUG)
2700 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
2702 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2703 << "With: (SPUindirect <arg>, <arg>)\n";
2708 } else if (isa
<ConstantSDNode
>(IndOp1
)) {
2709 // (add (SPUindirect <arg>, <const>), <const>) ->
2710 // (SPUindirect <arg>, <const + const>)
2711 ConstantSDNode
*CN1
= cast
<ConstantSDNode
> (IndOp1
);
2712 int64_t combinedConst
= CN0
->getSExtValue() + CN1
->getSExtValue();
2713 SDValue combinedValue
= DAG
.getConstant(combinedConst
, Op0VT
);
2715 #if !defined(NDEBUG)
2716 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
2718 << "Replace: (add (SPUindirect <arg>, " << CN1
->getSExtValue()
2719 << "), " << CN0
->getSExtValue() << ")\n"
2720 << "With: (SPUindirect <arg>, "
2721 << combinedConst
<< ")\n";
2725 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, Op0VT
,
2726 IndirectArg
, combinedValue
);
2732 case ISD::SIGN_EXTEND
:
2733 case ISD::ZERO_EXTEND
:
2734 case ISD::ANY_EXTEND
: {
2735 if (Op0
.getOpcode() == SPUISD::VEC2PREFSLOT
&& NodeVT
== Op0VT
) {
2736 // (any_extend (SPUextract_elt0 <arg>)) ->
2737 // (SPUextract_elt0 <arg>)
2738 // Types must match, however...
2739 #if !defined(NDEBUG)
2740 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
2741 cerr
<< "\nReplace: ";
2744 Op0
.getNode()->dump(&DAG
);
2753 case SPUISD::IndirectAddr
: {
2754 if (!ST
->usingLargeMem() && Op0
.getOpcode() == SPUISD::AFormAddr
) {
2755 ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
2756 if (CN
!= 0 && CN
->getZExtValue() == 0) {
2757 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2758 // (SPUaform <addr>, 0)
2760 DEBUG(cerr
<< "Replace: ");
2761 DEBUG(N
->dump(&DAG
));
2762 DEBUG(cerr
<< "\nWith: ");
2763 DEBUG(Op0
.getNode()->dump(&DAG
));
2764 DEBUG(cerr
<< "\n");
2768 } else if (Op0
.getOpcode() == ISD::ADD
) {
2769 SDValue Op1
= N
->getOperand(1);
2770 if (ConstantSDNode
*CN1
= dyn_cast
<ConstantSDNode
>(Op1
)) {
2771 // (SPUindirect (add <arg>, <arg>), 0) ->
2772 // (SPUindirect <arg>, <arg>)
2773 if (CN1
->isNullValue()) {
2775 #if !defined(NDEBUG)
2776 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
2778 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2779 << "With: (SPUindirect <arg>, <arg>)\n";
2783 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, Op0VT
,
2784 Op0
.getOperand(0), Op0
.getOperand(1));
2790 case SPUISD::SHLQUAD_L_BITS
:
2791 case SPUISD::SHLQUAD_L_BYTES
:
2792 case SPUISD::VEC_SHL
:
2793 case SPUISD::VEC_SRL
:
2794 case SPUISD::VEC_SRA
:
2795 case SPUISD::ROTBYTES_LEFT
: {
2796 SDValue Op1
= N
->getOperand(1);
2798 // Kill degenerate vector shifts:
2799 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(Op1
)) {
2800 if (CN
->isNullValue()) {
2806 case SPUISD::PREFSLOT2VEC
: {
2807 switch (Op0
.getOpcode()) {
2810 case ISD::ANY_EXTEND
:
2811 case ISD::ZERO_EXTEND
:
2812 case ISD::SIGN_EXTEND
: {
2813 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2815 // but only if the SPUprefslot2vec and <arg> types match.
2816 SDValue Op00
= Op0
.getOperand(0);
2817 if (Op00
.getOpcode() == SPUISD::VEC2PREFSLOT
) {
2818 SDValue Op000
= Op00
.getOperand(0);
2819 if (Op000
.getValueType() == NodeVT
) {
2825 case SPUISD::VEC2PREFSLOT
: {
2826 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2828 Result
= Op0
.getOperand(0);
2836 // Otherwise, return unchanged.
2838 if (Result
.getNode()) {
2839 DEBUG(cerr
<< "\nReplace.SPU: ");
2840 DEBUG(N
->dump(&DAG
));
2841 DEBUG(cerr
<< "\nWith: ");
2842 DEBUG(Result
.getNode()->dump(&DAG
));
2843 DEBUG(cerr
<< "\n");
2850 //===----------------------------------------------------------------------===//
2851 // Inline Assembly Support
2852 //===----------------------------------------------------------------------===//
2854 /// getConstraintType - Given a constraint letter, return the type of
2855 /// constraint it is for this target.
2856 SPUTargetLowering::ConstraintType
2857 SPUTargetLowering::getConstraintType(const std::string
&ConstraintLetter
) const {
2858 if (ConstraintLetter
.size() == 1) {
2859 switch (ConstraintLetter
[0]) {
2866 return C_RegisterClass
;
2869 return TargetLowering::getConstraintType(ConstraintLetter
);
2872 std::pair
<unsigned, const TargetRegisterClass
*>
2873 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string
&Constraint
,
2876 if (Constraint
.size() == 1) {
2877 // GCC RS6000 Constraint Letters
2878 switch (Constraint
[0]) {
2882 return std::make_pair(0U, SPU::R64CRegisterClass
);
2883 return std::make_pair(0U, SPU::R32CRegisterClass
);
2886 return std::make_pair(0U, SPU::R32FPRegisterClass
);
2887 else if (VT
== MVT::f64
)
2888 return std::make_pair(0U, SPU::R64FPRegisterClass
);
2891 return std::make_pair(0U, SPU::GPRCRegisterClass
);
2895 return TargetLowering::getRegForInlineAsmConstraint(Constraint
, VT
);
2898 //! Compute used/known bits for a SPU operand
2900 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op
,
2904 const SelectionDAG
&DAG
,
2905 unsigned Depth
) const {
2907 const uint64_t uint64_sizebits
= sizeof(uint64_t) * CHAR_BIT
;
2909 switch (Op
.getOpcode()) {
2911 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2917 case SPUISD::PREFSLOT2VEC
:
2918 case SPUISD::LDRESULT
:
2919 case SPUISD::VEC2PREFSLOT
:
2920 case SPUISD::SHLQUAD_L_BITS
:
2921 case SPUISD::SHLQUAD_L_BYTES
:
2922 case SPUISD::VEC_SHL
:
2923 case SPUISD::VEC_SRL
:
2924 case SPUISD::VEC_SRA
:
2925 case SPUISD::VEC_ROTL
:
2926 case SPUISD::VEC_ROTR
:
2927 case SPUISD::ROTBYTES_LEFT
:
2928 case SPUISD::SELECT_MASK
:
2935 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op
,
2936 unsigned Depth
) const {
2937 switch (Op
.getOpcode()) {
2942 MVT VT
= Op
.getValueType();
2944 if (VT
!= MVT::i8
&& VT
!= MVT::i16
&& VT
!= MVT::i32
) {
2947 return VT
.getSizeInBits();
2952 // LowerAsmOperandForConstraint
2954 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
2955 char ConstraintLetter
,
2957 std::vector
<SDValue
> &Ops
,
2958 SelectionDAG
&DAG
) const {
2959 // Default, for the time being, to the base class handler
2960 TargetLowering::LowerAsmOperandForConstraint(Op
, ConstraintLetter
, hasMemory
,
2964 /// isLegalAddressImmediate - Return true if the integer value can be used
2965 /// as the offset of the target addressing mode.
2966 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V
,
2967 const Type
*Ty
) const {
2968 // SPU's addresses are 256K:
2969 return (V
> -(1 << 18) && V
< (1 << 18) - 1);
2972 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue
* GV
) const {
2977 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
2978 // The SPU target isn't yet aware of offsets.