2 //===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the SPUTargetLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "SPURegisterNames.h"
15 #include "SPUISelLowering.h"
16 #include "SPUTargetMachine.h"
17 #include "SPUFrameInfo.h"
18 #include "llvm/ADT/APInt.h"
19 #include "llvm/ADT/VectorExtras.h"
20 #include "llvm/CallingConv.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/SelectionDAG.h"
27 #include "llvm/Constants.h"
28 #include "llvm/Function.h"
29 #include "llvm/Intrinsics.h"
30 #include "llvm/Support/Debug.h"
31 #include "llvm/Support/MathExtras.h"
32 #include "llvm/Target/TargetOptions.h"
38 // Used in getTargetNodeName() below
40 std::map
<unsigned, const char *> node_names
;
42 //! MVT mapping to useful data for Cell SPU
43 struct valtype_map_s
{
45 const int prefslot_byte
;
48 const valtype_map_s valtype_map
[] = {
59 const size_t n_valtype_map
= sizeof(valtype_map
) / sizeof(valtype_map
[0]);
61 const valtype_map_s
*getValueTypeMapEntry(MVT VT
) {
62 const valtype_map_s
*retval
= 0;
64 for (size_t i
= 0; i
< n_valtype_map
; ++i
) {
65 if (valtype_map
[i
].valtype
== VT
) {
66 retval
= valtype_map
+ i
;
73 cerr
<< "getValueTypeMapEntry returns NULL for "
83 //! Expand a library call into an actual call DAG node
86 This code is taken from SelectionDAGLegalize, since it is not exposed as
87 part of the LLVM SelectionDAG API.
91 ExpandLibCall(RTLIB::Libcall LC
, SDValue Op
, SelectionDAG
&DAG
,
92 bool isSigned
, SDValue
&Hi
, SPUTargetLowering
&TLI
) {
93 // The input chain to this libcall is the entry node of the function.
94 // Legalizing the call will automatically add the previous call to the
96 SDValue InChain
= DAG
.getEntryNode();
98 TargetLowering::ArgListTy Args
;
99 TargetLowering::ArgListEntry Entry
;
100 for (unsigned i
= 0, e
= Op
.getNumOperands(); i
!= e
; ++i
) {
101 MVT ArgVT
= Op
.getOperand(i
).getValueType();
102 const Type
*ArgTy
= ArgVT
.getTypeForMVT();
103 Entry
.Node
= Op
.getOperand(i
);
105 Entry
.isSExt
= isSigned
;
106 Entry
.isZExt
= !isSigned
;
107 Args
.push_back(Entry
);
109 SDValue Callee
= DAG
.getExternalSymbol(TLI
.getLibcallName(LC
),
112 // Splice the libcall in wherever FindInputOutputChains tells us to.
113 const Type
*RetTy
= Op
.getNode()->getValueType(0).getTypeForMVT();
114 std::pair
<SDValue
, SDValue
> CallInfo
=
115 TLI
.LowerCallTo(InChain
, RetTy
, isSigned
, !isSigned
, false, false,
116 CallingConv::C
, false, Callee
, Args
, DAG
,
119 return CallInfo
.first
;
123 SPUTargetLowering::SPUTargetLowering(SPUTargetMachine
&TM
)
124 : TargetLowering(TM
),
127 // Fold away setcc operations if possible.
130 // Use _setjmp/_longjmp instead of setjmp/longjmp.
131 setUseUnderscoreSetJmp(true);
132 setUseUnderscoreLongJmp(true);
134 // Set RTLIB libcall names as used by SPU:
135 setLibcallName(RTLIB::DIV_F64
, "__fast_divdf3");
137 // Set up the SPU's register classes:
138 addRegisterClass(MVT::i8
, SPU::R8CRegisterClass
);
139 addRegisterClass(MVT::i16
, SPU::R16CRegisterClass
);
140 addRegisterClass(MVT::i32
, SPU::R32CRegisterClass
);
141 addRegisterClass(MVT::i64
, SPU::R64CRegisterClass
);
142 addRegisterClass(MVT::f32
, SPU::R32FPRegisterClass
);
143 addRegisterClass(MVT::f64
, SPU::R64FPRegisterClass
);
144 addRegisterClass(MVT::i128
, SPU::GPRCRegisterClass
);
146 // SPU has no sign or zero extended loads for i1, i8, i16:
147 setLoadExtAction(ISD::EXTLOAD
, MVT::i1
, Promote
);
148 setLoadExtAction(ISD::SEXTLOAD
, MVT::i1
, Promote
);
149 setLoadExtAction(ISD::ZEXTLOAD
, MVT::i1
, Promote
);
151 setLoadExtAction(ISD::EXTLOAD
, MVT::f32
, Expand
);
152 setLoadExtAction(ISD::EXTLOAD
, MVT::f64
, Expand
);
154 // SPU constant load actions are custom lowered:
155 setOperationAction(ISD::ConstantFP
, MVT::f32
, Legal
);
156 setOperationAction(ISD::ConstantFP
, MVT::f64
, Custom
);
158 // SPU's loads and stores have to be custom lowered:
159 for (unsigned sctype
= (unsigned) MVT::i8
; sctype
< (unsigned) MVT::i128
;
161 MVT VT
= (MVT::SimpleValueType
)sctype
;
163 setOperationAction(ISD::LOAD
, VT
, Custom
);
164 setOperationAction(ISD::STORE
, VT
, Custom
);
165 setLoadExtAction(ISD::EXTLOAD
, VT
, Custom
);
166 setLoadExtAction(ISD::ZEXTLOAD
, VT
, Custom
);
167 setLoadExtAction(ISD::SEXTLOAD
, VT
, Custom
);
169 for (unsigned stype
= sctype
- 1; stype
>= (unsigned) MVT::i8
; --stype
) {
170 MVT StoreVT
= (MVT::SimpleValueType
) stype
;
171 setTruncStoreAction(VT
, StoreVT
, Expand
);
175 for (unsigned sctype
= (unsigned) MVT::f32
; sctype
< (unsigned) MVT::f64
;
177 MVT VT
= (MVT::SimpleValueType
) sctype
;
179 setOperationAction(ISD::LOAD
, VT
, Custom
);
180 setOperationAction(ISD::STORE
, VT
, Custom
);
182 for (unsigned stype
= sctype
- 1; stype
>= (unsigned) MVT::f32
; --stype
) {
183 MVT StoreVT
= (MVT::SimpleValueType
) stype
;
184 setTruncStoreAction(VT
, StoreVT
, Expand
);
188 // Expand the jumptable branches
189 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
190 setOperationAction(ISD::BR_CC
, MVT::Other
, Expand
);
192 // Custom lower SELECT_CC for most cases, but expand by default
193 setOperationAction(ISD::SELECT_CC
, MVT::Other
, Expand
);
194 setOperationAction(ISD::SELECT_CC
, MVT::i8
, Custom
);
195 setOperationAction(ISD::SELECT_CC
, MVT::i16
, Custom
);
196 setOperationAction(ISD::SELECT_CC
, MVT::i32
, Custom
);
197 setOperationAction(ISD::SELECT_CC
, MVT::i64
, Custom
);
199 // SPU has no intrinsics for these particular operations:
200 setOperationAction(ISD::MEMBARRIER
, MVT::Other
, Expand
);
202 // SPU has no SREM/UREM instructions
203 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
204 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
205 setOperationAction(ISD::SREM
, MVT::i64
, Expand
);
206 setOperationAction(ISD::UREM
, MVT::i64
, Expand
);
208 // We don't support sin/cos/sqrt/fmod
209 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
210 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
211 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
212 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
213 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
214 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
216 // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
218 setOperationAction(ISD::FSQRT
, MVT::f64
, Expand
);
219 setOperationAction(ISD::FSQRT
, MVT::f32
, Expand
);
221 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
222 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
224 // SPU can do rotate right and left, so legalize it... but customize for i8
225 // because instructions don't exist.
227 // FIXME: Change from "expand" to appropriate type once ROTR is supported in
229 setOperationAction(ISD::ROTR
, MVT::i32
, Expand
/*Legal*/);
230 setOperationAction(ISD::ROTR
, MVT::i16
, Expand
/*Legal*/);
231 setOperationAction(ISD::ROTR
, MVT::i8
, Expand
/*Custom*/);
233 setOperationAction(ISD::ROTL
, MVT::i32
, Legal
);
234 setOperationAction(ISD::ROTL
, MVT::i16
, Legal
);
235 setOperationAction(ISD::ROTL
, MVT::i8
, Custom
);
237 // SPU has no native version of shift left/right for i8
238 setOperationAction(ISD::SHL
, MVT::i8
, Custom
);
239 setOperationAction(ISD::SRL
, MVT::i8
, Custom
);
240 setOperationAction(ISD::SRA
, MVT::i8
, Custom
);
242 // Make these operations legal and handle them during instruction selection:
243 setOperationAction(ISD::SHL
, MVT::i64
, Legal
);
244 setOperationAction(ISD::SRL
, MVT::i64
, Legal
);
245 setOperationAction(ISD::SRA
, MVT::i64
, Legal
);
247 // Custom lower i8, i32 and i64 multiplications
248 setOperationAction(ISD::MUL
, MVT::i8
, Custom
);
249 setOperationAction(ISD::MUL
, MVT::i32
, Legal
);
250 setOperationAction(ISD::MUL
, MVT::i64
, Legal
);
252 // Need to custom handle (some) common i8, i64 math ops
253 setOperationAction(ISD::ADD
, MVT::i8
, Custom
);
254 setOperationAction(ISD::ADD
, MVT::i64
, Legal
);
255 setOperationAction(ISD::SUB
, MVT::i8
, Custom
);
256 setOperationAction(ISD::SUB
, MVT::i64
, Legal
);
258 // SPU does not have BSWAP. It does have i32 support CTLZ.
259 // CTPOP has to be custom lowered.
260 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
261 setOperationAction(ISD::BSWAP
, MVT::i64
, Expand
);
263 setOperationAction(ISD::CTPOP
, MVT::i8
, Custom
);
264 setOperationAction(ISD::CTPOP
, MVT::i16
, Custom
);
265 setOperationAction(ISD::CTPOP
, MVT::i32
, Custom
);
266 setOperationAction(ISD::CTPOP
, MVT::i64
, Custom
);
268 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
269 setOperationAction(ISD::CTTZ
, MVT::i64
, Expand
);
271 setOperationAction(ISD::CTLZ
, MVT::i32
, Legal
);
273 // SPU has a version of select that implements (a&~c)|(b&c), just like
274 // select ought to work:
275 setOperationAction(ISD::SELECT
, MVT::i8
, Legal
);
276 setOperationAction(ISD::SELECT
, MVT::i16
, Legal
);
277 setOperationAction(ISD::SELECT
, MVT::i32
, Legal
);
278 setOperationAction(ISD::SELECT
, MVT::i64
, Legal
);
280 setOperationAction(ISD::SETCC
, MVT::i8
, Legal
);
281 setOperationAction(ISD::SETCC
, MVT::i16
, Legal
);
282 setOperationAction(ISD::SETCC
, MVT::i32
, Legal
);
283 setOperationAction(ISD::SETCC
, MVT::i64
, Legal
);
284 setOperationAction(ISD::SETCC
, MVT::f64
, Custom
);
286 // Custom lower i128 -> i64 truncates
287 setOperationAction(ISD::TRUNCATE
, MVT::i64
, Custom
);
289 // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
290 // to expand to a libcall, hence the custom lowering:
291 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
292 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
294 // FDIV on SPU requires custom lowering
295 setOperationAction(ISD::FDIV
, MVT::f64
, Expand
); // to libcall
297 // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
298 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
299 setOperationAction(ISD::SINT_TO_FP
, MVT::i16
, Promote
);
300 setOperationAction(ISD::SINT_TO_FP
, MVT::i8
, Promote
);
301 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
302 setOperationAction(ISD::UINT_TO_FP
, MVT::i16
, Promote
);
303 setOperationAction(ISD::UINT_TO_FP
, MVT::i8
, Promote
);
304 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
305 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Custom
);
307 setOperationAction(ISD::BIT_CONVERT
, MVT::i32
, Legal
);
308 setOperationAction(ISD::BIT_CONVERT
, MVT::f32
, Legal
);
309 setOperationAction(ISD::BIT_CONVERT
, MVT::i64
, Legal
);
310 setOperationAction(ISD::BIT_CONVERT
, MVT::f64
, Legal
);
312 // We cannot sextinreg(i1). Expand to shifts.
313 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
315 // Support label based line numbers.
316 setOperationAction(ISD::DBG_STOPPOINT
, MVT::Other
, Expand
);
317 setOperationAction(ISD::DEBUG_LOC
, MVT::Other
, Expand
);
319 // We want to legalize GlobalAddress and ConstantPool nodes into the
320 // appropriate instructions to materialize the address.
321 for (unsigned sctype
= (unsigned) MVT::i8
; sctype
< (unsigned) MVT::f128
;
323 MVT VT
= (MVT::SimpleValueType
)sctype
;
325 setOperationAction(ISD::GlobalAddress
, VT
, Custom
);
326 setOperationAction(ISD::ConstantPool
, VT
, Custom
);
327 setOperationAction(ISD::JumpTable
, VT
, Custom
);
330 // RET must be custom lowered, to meet ABI requirements
331 setOperationAction(ISD::RET
, MVT::Other
, Custom
);
333 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
334 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
336 // Use the default implementation.
337 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
338 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
339 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
340 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
341 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Expand
);
342 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Expand
);
343 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Expand
);
345 // Cell SPU has instructions for converting between i64 and fp.
346 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
347 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
349 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
350 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Promote
);
352 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
353 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
355 // First set operation action for all vector types to expand. Then we
356 // will selectively turn on ones that can be effectively codegen'd.
357 addRegisterClass(MVT::v16i8
, SPU::VECREGRegisterClass
);
358 addRegisterClass(MVT::v8i16
, SPU::VECREGRegisterClass
);
359 addRegisterClass(MVT::v4i32
, SPU::VECREGRegisterClass
);
360 addRegisterClass(MVT::v2i64
, SPU::VECREGRegisterClass
);
361 addRegisterClass(MVT::v4f32
, SPU::VECREGRegisterClass
);
362 addRegisterClass(MVT::v2f64
, SPU::VECREGRegisterClass
);
364 // "Odd size" vector classes that we're willing to support:
365 addRegisterClass(MVT::v2i32
, SPU::VECREGRegisterClass
);
367 for (unsigned i
= (unsigned)MVT::FIRST_VECTOR_VALUETYPE
;
368 i
<= (unsigned)MVT::LAST_VECTOR_VALUETYPE
; ++i
) {
369 MVT VT
= (MVT::SimpleValueType
)i
;
371 // add/sub are legal for all supported vector VT's.
372 setOperationAction(ISD::ADD
, VT
, Legal
);
373 setOperationAction(ISD::SUB
, VT
, Legal
);
374 // mul has to be custom lowered.
375 setOperationAction(ISD::MUL
, VT
, Legal
);
377 setOperationAction(ISD::AND
, VT
, Legal
);
378 setOperationAction(ISD::OR
, VT
, Legal
);
379 setOperationAction(ISD::XOR
, VT
, Legal
);
380 setOperationAction(ISD::LOAD
, VT
, Legal
);
381 setOperationAction(ISD::SELECT
, VT
, Legal
);
382 setOperationAction(ISD::STORE
, VT
, Legal
);
384 // These operations need to be expanded:
385 setOperationAction(ISD::SDIV
, VT
, Expand
);
386 setOperationAction(ISD::SREM
, VT
, Expand
);
387 setOperationAction(ISD::UDIV
, VT
, Expand
);
388 setOperationAction(ISD::UREM
, VT
, Expand
);
390 // Custom lower build_vector, constant pool spills, insert and
391 // extract vector elements:
392 setOperationAction(ISD::BUILD_VECTOR
, VT
, Custom
);
393 setOperationAction(ISD::ConstantPool
, VT
, Custom
);
394 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Custom
);
395 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Custom
);
396 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Custom
);
397 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Custom
);
400 setOperationAction(ISD::AND
, MVT::v16i8
, Custom
);
401 setOperationAction(ISD::OR
, MVT::v16i8
, Custom
);
402 setOperationAction(ISD::XOR
, MVT::v16i8
, Custom
);
403 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Custom
);
405 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
407 setShiftAmountType(MVT::i32
);
408 setBooleanContents(ZeroOrNegativeOneBooleanContent
);
410 setStackPointerRegisterToSaveRestore(SPU::R1
);
412 // We have target-specific dag combine patterns for the following nodes:
413 setTargetDAGCombine(ISD::ADD
);
414 setTargetDAGCombine(ISD::ZERO_EXTEND
);
415 setTargetDAGCombine(ISD::SIGN_EXTEND
);
416 setTargetDAGCombine(ISD::ANY_EXTEND
);
418 computeRegisterProperties();
420 // Set pre-RA register scheduler default to BURR, which produces slightly
421 // better code than the default (could also be TDRR, but TargetLowering.h
422 // needs a mod to support that model):
423 setSchedulingPreference(SchedulingForRegPressure
);
427 SPUTargetLowering::getTargetNodeName(unsigned Opcode
) const
429 if (node_names
.empty()) {
430 node_names
[(unsigned) SPUISD::RET_FLAG
] = "SPUISD::RET_FLAG";
431 node_names
[(unsigned) SPUISD::Hi
] = "SPUISD::Hi";
432 node_names
[(unsigned) SPUISD::Lo
] = "SPUISD::Lo";
433 node_names
[(unsigned) SPUISD::PCRelAddr
] = "SPUISD::PCRelAddr";
434 node_names
[(unsigned) SPUISD::AFormAddr
] = "SPUISD::AFormAddr";
435 node_names
[(unsigned) SPUISD::IndirectAddr
] = "SPUISD::IndirectAddr";
436 node_names
[(unsigned) SPUISD::LDRESULT
] = "SPUISD::LDRESULT";
437 node_names
[(unsigned) SPUISD::CALL
] = "SPUISD::CALL";
438 node_names
[(unsigned) SPUISD::SHUFB
] = "SPUISD::SHUFB";
439 node_names
[(unsigned) SPUISD::SHUFFLE_MASK
] = "SPUISD::SHUFFLE_MASK";
440 node_names
[(unsigned) SPUISD::CNTB
] = "SPUISD::CNTB";
441 node_names
[(unsigned) SPUISD::PREFSLOT2VEC
] = "SPUISD::PREFSLOT2VEC";
442 node_names
[(unsigned) SPUISD::VEC2PREFSLOT
] = "SPUISD::VEC2PREFSLOT";
443 node_names
[(unsigned) SPUISD::SHLQUAD_L_BITS
] = "SPUISD::SHLQUAD_L_BITS";
444 node_names
[(unsigned) SPUISD::SHLQUAD_L_BYTES
] = "SPUISD::SHLQUAD_L_BYTES";
445 node_names
[(unsigned) SPUISD::VEC_SHL
] = "SPUISD::VEC_SHL";
446 node_names
[(unsigned) SPUISD::VEC_SRL
] = "SPUISD::VEC_SRL";
447 node_names
[(unsigned) SPUISD::VEC_SRA
] = "SPUISD::VEC_SRA";
448 node_names
[(unsigned) SPUISD::VEC_ROTL
] = "SPUISD::VEC_ROTL";
449 node_names
[(unsigned) SPUISD::VEC_ROTR
] = "SPUISD::VEC_ROTR";
450 node_names
[(unsigned) SPUISD::ROTBYTES_LEFT
] = "SPUISD::ROTBYTES_LEFT";
451 node_names
[(unsigned) SPUISD::ROTBYTES_LEFT_BITS
] =
452 "SPUISD::ROTBYTES_LEFT_BITS";
453 node_names
[(unsigned) SPUISD::SELECT_MASK
] = "SPUISD::SELECT_MASK";
454 node_names
[(unsigned) SPUISD::SELB
] = "SPUISD::SELB";
455 node_names
[(unsigned) SPUISD::ADD64_MARKER
] = "SPUISD::ADD64_MARKER";
456 node_names
[(unsigned) SPUISD::SUB64_MARKER
] = "SPUISD::SUB64_MARKER";
457 node_names
[(unsigned) SPUISD::MUL64_MARKER
] = "SPUISD::MUL64_MARKER";
460 std::map
<unsigned, const char *>::iterator i
= node_names
.find(Opcode
);
462 return ((i
!= node_names
.end()) ? i
->second
: 0);
465 //===----------------------------------------------------------------------===//
466 // Return the Cell SPU's SETCC result type
467 //===----------------------------------------------------------------------===//
469 MVT
SPUTargetLowering::getSetCCResultType(MVT VT
) const {
470 // i16 and i32 are valid SETCC result types
471 return ((VT
== MVT::i8
|| VT
== MVT::i16
|| VT
== MVT::i32
) ? VT
: MVT::i32
);
474 //===----------------------------------------------------------------------===//
475 // Calling convention code:
476 //===----------------------------------------------------------------------===//
478 #include "SPUGenCallingConv.inc"
480 //===----------------------------------------------------------------------===//
481 // LowerOperation implementation
482 //===----------------------------------------------------------------------===//
484 /// Custom lower loads for CellSPU
486 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
487 within a 16-byte block, we have to rotate to extract the requested element.
489 For extending loads, we also want to ensure that the following sequence is
490 emitted, e.g. for MVT::f32 extending load to MVT::f64:
494 %2 v16i8,ch = rotate %1
495 %3 v4f8, ch = bitconvert %2
496 %4 f32 = vec2perfslot %3
497 %5 f64 = fp_extend %4
501 LowerLOAD(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
502 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
);
503 SDValue the_chain
= LN
->getChain();
504 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
505 MVT InVT
= LN
->getMemoryVT();
506 MVT OutVT
= Op
.getValueType();
507 ISD::LoadExtType ExtType
= LN
->getExtensionType();
508 unsigned alignment
= LN
->getAlignment();
509 const valtype_map_s
*vtm
= getValueTypeMapEntry(InVT
);
510 DebugLoc dl
= Op
.getDebugLoc();
512 switch (LN
->getAddressingMode()) {
513 case ISD::UNINDEXED
: {
515 SDValue basePtr
= LN
->getBasePtr();
518 if (alignment
== 16) {
521 // Special cases for a known aligned load to simplify the base pointer
522 // and the rotation amount:
523 if (basePtr
.getOpcode() == ISD::ADD
524 && (CN
= dyn_cast
<ConstantSDNode
> (basePtr
.getOperand(1))) != 0) {
525 // Known offset into basePtr
526 int64_t offset
= CN
->getSExtValue();
527 int64_t rotamt
= int64_t((offset
& 0xf) - vtm
->prefslot_byte
);
532 rotate
= DAG
.getConstant(rotamt
, MVT::i16
);
534 // Simplify the base pointer for this case:
535 basePtr
= basePtr
.getOperand(0);
536 if ((offset
& ~0xf) > 0) {
537 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
539 DAG
.getConstant((offset
& ~0xf), PtrVT
));
541 } else if ((basePtr
.getOpcode() == SPUISD::AFormAddr
)
542 || (basePtr
.getOpcode() == SPUISD::IndirectAddr
543 && basePtr
.getOperand(0).getOpcode() == SPUISD::Hi
544 && basePtr
.getOperand(1).getOpcode() == SPUISD::Lo
)) {
545 // Plain aligned a-form address: rotate into preferred slot
546 // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
547 int64_t rotamt
= -vtm
->prefslot_byte
;
550 rotate
= DAG
.getConstant(rotamt
, MVT::i16
);
552 // Offset the rotate amount by the basePtr and the preferred slot
554 int64_t rotamt
= -vtm
->prefslot_byte
;
557 rotate
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
,
559 DAG
.getConstant(rotamt
, PtrVT
));
562 // Unaligned load: must be more pessimistic about addressing modes:
563 if (basePtr
.getOpcode() == ISD::ADD
) {
564 MachineFunction
&MF
= DAG
.getMachineFunction();
565 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
566 unsigned VReg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
569 SDValue Op0
= basePtr
.getOperand(0);
570 SDValue Op1
= basePtr
.getOperand(1);
572 if (isa
<ConstantSDNode
>(Op1
)) {
573 // Convert the (add <ptr>, <const>) to an indirect address contained
574 // in a register. Note that this is done because we need to avoid
575 // creating a 0(reg) d-form address due to the SPU's block loads.
576 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Op0
, Op1
);
577 the_chain
= DAG
.getCopyToReg(the_chain
, dl
, VReg
, basePtr
, Flag
);
578 basePtr
= DAG
.getCopyFromReg(the_chain
, dl
, VReg
, PtrVT
);
580 // Convert the (add <arg1>, <arg2>) to an indirect address, which
581 // will likely be lowered as a reg(reg) x-form address.
582 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Op0
, Op1
);
585 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
587 DAG
.getConstant(0, PtrVT
));
590 // Offset the rotate amount by the basePtr and the preferred slot
592 rotate
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
,
594 DAG
.getConstant(-vtm
->prefslot_byte
, PtrVT
));
597 // Re-emit as a v16i8 vector load
598 result
= DAG
.getLoad(MVT::v16i8
, dl
, the_chain
, basePtr
,
599 LN
->getSrcValue(), LN
->getSrcValueOffset(),
600 LN
->isVolatile(), 16);
603 the_chain
= result
.getValue(1);
605 // Rotate into the preferred slot:
606 result
= DAG
.getNode(SPUISD::ROTBYTES_LEFT
, dl
, MVT::v16i8
,
607 result
.getValue(0), rotate
);
609 // Convert the loaded v16i8 vector to the appropriate vector type
610 // specified by the operand:
611 MVT vecVT
= MVT::getVectorVT(InVT
, (128 / InVT
.getSizeInBits()));
612 result
= DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, InVT
,
613 DAG
.getNode(ISD::BIT_CONVERT
, dl
, vecVT
, result
));
615 // Handle extending loads by extending the scalar result:
616 if (ExtType
== ISD::SEXTLOAD
) {
617 result
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, OutVT
, result
);
618 } else if (ExtType
== ISD::ZEXTLOAD
) {
619 result
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, OutVT
, result
);
620 } else if (ExtType
== ISD::EXTLOAD
) {
621 unsigned NewOpc
= ISD::ANY_EXTEND
;
623 if (OutVT
.isFloatingPoint())
624 NewOpc
= ISD::FP_EXTEND
;
626 result
= DAG
.getNode(NewOpc
, dl
, OutVT
, result
);
629 SDVTList retvts
= DAG
.getVTList(OutVT
, MVT::Other
);
630 SDValue retops
[2] = {
635 result
= DAG
.getNode(SPUISD::LDRESULT
, dl
, retvts
,
636 retops
, sizeof(retops
) / sizeof(retops
[0]));
643 case ISD::LAST_INDEXED_MODE
:
644 cerr
<< "LowerLOAD: Got a LoadSDNode with an addr mode other than "
646 cerr
<< (unsigned) LN
->getAddressingMode() << "\n";
654 /// Custom lower stores for CellSPU
656 All CellSPU stores are aligned to 16-byte boundaries, so for elements
657 within a 16-byte block, we have to generate a shuffle to insert the
658 requested element into its place, then store the resulting block.
661 LowerSTORE(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
662 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
);
663 SDValue Value
= SN
->getValue();
664 MVT VT
= Value
.getValueType();
665 MVT StVT
= (!SN
->isTruncatingStore() ? VT
: SN
->getMemoryVT());
666 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
667 DebugLoc dl
= Op
.getDebugLoc();
668 unsigned alignment
= SN
->getAlignment();
670 switch (SN
->getAddressingMode()) {
671 case ISD::UNINDEXED
: {
672 // The vector type we really want to load from the 16-byte chunk.
673 MVT vecVT
= MVT::getVectorVT(VT
, (128 / VT
.getSizeInBits())),
674 stVecVT
= MVT::getVectorVT(StVT
, (128 / StVT
.getSizeInBits()));
676 SDValue alignLoadVec
;
677 SDValue basePtr
= SN
->getBasePtr();
678 SDValue the_chain
= SN
->getChain();
679 SDValue insertEltOffs
;
681 if (alignment
== 16) {
684 // Special cases for a known aligned load to simplify the base pointer
685 // and insertion byte:
686 if (basePtr
.getOpcode() == ISD::ADD
687 && (CN
= dyn_cast
<ConstantSDNode
>(basePtr
.getOperand(1))) != 0) {
688 // Known offset into basePtr
689 int64_t offset
= CN
->getSExtValue();
691 // Simplify the base pointer for this case:
692 basePtr
= basePtr
.getOperand(0);
693 insertEltOffs
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
695 DAG
.getConstant((offset
& 0xf), PtrVT
));
697 if ((offset
& ~0xf) > 0) {
698 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
700 DAG
.getConstant((offset
& ~0xf), PtrVT
));
703 // Otherwise, assume it's at byte 0 of basePtr
704 insertEltOffs
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
706 DAG
.getConstant(0, PtrVT
));
709 // Unaligned load: must be more pessimistic about addressing modes:
710 if (basePtr
.getOpcode() == ISD::ADD
) {
711 MachineFunction
&MF
= DAG
.getMachineFunction();
712 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
713 unsigned VReg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
716 SDValue Op0
= basePtr
.getOperand(0);
717 SDValue Op1
= basePtr
.getOperand(1);
719 if (isa
<ConstantSDNode
>(Op1
)) {
720 // Convert the (add <ptr>, <const>) to an indirect address contained
721 // in a register. Note that this is done because we need to avoid
722 // creating a 0(reg) d-form address due to the SPU's block loads.
723 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Op0
, Op1
);
724 the_chain
= DAG
.getCopyToReg(the_chain
, dl
, VReg
, basePtr
, Flag
);
725 basePtr
= DAG
.getCopyFromReg(the_chain
, dl
, VReg
, PtrVT
);
727 // Convert the (add <arg1>, <arg2>) to an indirect address, which
728 // will likely be lowered as a reg(reg) x-form address.
729 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Op0
, Op1
);
732 basePtr
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
734 DAG
.getConstant(0, PtrVT
));
737 // Insertion point is solely determined by basePtr's contents
738 insertEltOffs
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
,
740 DAG
.getConstant(0, PtrVT
));
743 // Re-emit as a v16i8 vector load
744 alignLoadVec
= DAG
.getLoad(MVT::v16i8
, dl
, the_chain
, basePtr
,
745 SN
->getSrcValue(), SN
->getSrcValueOffset(),
746 SN
->isVolatile(), 16);
749 the_chain
= alignLoadVec
.getValue(1);
751 LoadSDNode
*LN
= cast
<LoadSDNode
>(alignLoadVec
);
752 SDValue theValue
= SN
->getValue();
756 && (theValue
.getOpcode() == ISD::AssertZext
757 || theValue
.getOpcode() == ISD::AssertSext
)) {
758 // Drill down and get the value for zero- and sign-extended
760 theValue
= theValue
.getOperand(0);
763 // If the base pointer is already a D-form address, then just create
764 // a new D-form address with a slot offset and the orignal base pointer.
765 // Otherwise generate a D-form address with the slot offset relative
766 // to the stack pointer, which is always aligned.
768 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
769 cerr
<< "CellSPU LowerSTORE: basePtr = ";
770 basePtr
.getNode()->dump(&DAG
);
775 SDValue insertEltOp
=
776 DAG
.getNode(SPUISD::SHUFFLE_MASK
, dl
, vecVT
, insertEltOffs
);
777 SDValue vectorizeOp
=
778 DAG
.getNode(ISD::SCALAR_TO_VECTOR
, dl
, vecVT
, theValue
);
780 result
= DAG
.getNode(SPUISD::SHUFB
, dl
, vecVT
,
781 vectorizeOp
, alignLoadVec
,
782 DAG
.getNode(ISD::BIT_CONVERT
, dl
,
783 MVT::v4i32
, insertEltOp
));
785 result
= DAG
.getStore(the_chain
, dl
, result
, basePtr
,
786 LN
->getSrcValue(), LN
->getSrcValueOffset(),
787 LN
->isVolatile(), LN
->getAlignment());
789 #if 0 && !defined(NDEBUG)
790 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
791 const SDValue
¤tRoot
= DAG
.getRoot();
794 cerr
<< "------- CellSPU:LowerStore result:\n";
797 DAG
.setRoot(currentRoot
);
808 case ISD::LAST_INDEXED_MODE
:
809 cerr
<< "LowerLOAD: Got a LoadSDNode with an addr mode other than "
811 cerr
<< (unsigned) SN
->getAddressingMode() << "\n";
819 //! Generate the address of a constant pool entry.
821 LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
822 MVT PtrVT
= Op
.getValueType();
823 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
824 Constant
*C
= CP
->getConstVal();
825 SDValue CPI
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment());
826 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
827 const TargetMachine
&TM
= DAG
.getTarget();
828 // FIXME there is no actual debug info here
829 DebugLoc dl
= Op
.getDebugLoc();
831 if (TM
.getRelocationModel() == Reloc::Static
) {
832 if (!ST
->usingLargeMem()) {
833 // Just return the SDValue with the constant pool address in it.
834 return DAG
.getNode(SPUISD::AFormAddr
, dl
, PtrVT
, CPI
, Zero
);
836 SDValue Hi
= DAG
.getNode(SPUISD::Hi
, dl
, PtrVT
, CPI
, Zero
);
837 SDValue Lo
= DAG
.getNode(SPUISD::Lo
, dl
, PtrVT
, CPI
, Zero
);
838 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Hi
, Lo
);
843 "LowerConstantPool: Relocation model other than static"
848 //! Alternate entry point for generating the address of a constant pool entry
850 SPU::LowerConstantPool(SDValue Op
, SelectionDAG
&DAG
, const SPUTargetMachine
&TM
) {
851 return ::LowerConstantPool(Op
, DAG
, TM
.getSubtargetImpl());
855 LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
856 MVT PtrVT
= Op
.getValueType();
857 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
858 SDValue JTI
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
859 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
860 const TargetMachine
&TM
= DAG
.getTarget();
861 // FIXME there is no actual debug info here
862 DebugLoc dl
= Op
.getDebugLoc();
864 if (TM
.getRelocationModel() == Reloc::Static
) {
865 if (!ST
->usingLargeMem()) {
866 return DAG
.getNode(SPUISD::AFormAddr
, dl
, PtrVT
, JTI
, Zero
);
868 SDValue Hi
= DAG
.getNode(SPUISD::Hi
, dl
, PtrVT
, JTI
, Zero
);
869 SDValue Lo
= DAG
.getNode(SPUISD::Lo
, dl
, PtrVT
, JTI
, Zero
);
870 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Hi
, Lo
);
875 "LowerJumpTable: Relocation model other than static not supported.");
880 LowerGlobalAddress(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
881 MVT PtrVT
= Op
.getValueType();
882 GlobalAddressSDNode
*GSDN
= cast
<GlobalAddressSDNode
>(Op
);
883 GlobalValue
*GV
= GSDN
->getGlobal();
884 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, PtrVT
, GSDN
->getOffset());
885 const TargetMachine
&TM
= DAG
.getTarget();
886 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
887 // FIXME there is no actual debug info here
888 DebugLoc dl
= Op
.getDebugLoc();
890 if (TM
.getRelocationModel() == Reloc::Static
) {
891 if (!ST
->usingLargeMem()) {
892 return DAG
.getNode(SPUISD::AFormAddr
, dl
, PtrVT
, GA
, Zero
);
894 SDValue Hi
= DAG
.getNode(SPUISD::Hi
, dl
, PtrVT
, GA
, Zero
);
895 SDValue Lo
= DAG
.getNode(SPUISD::Lo
, dl
, PtrVT
, GA
, Zero
);
896 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, Hi
, Lo
);
899 cerr
<< "LowerGlobalAddress: Relocation model other than static not "
908 //! Custom lower double precision floating point constants
910 LowerConstantFP(SDValue Op
, SelectionDAG
&DAG
) {
911 MVT VT
= Op
.getValueType();
912 // FIXME there is no actual debug info here
913 DebugLoc dl
= Op
.getDebugLoc();
915 if (VT
== MVT::f64
) {
916 ConstantFPSDNode
*FP
= cast
<ConstantFPSDNode
>(Op
.getNode());
919 "LowerConstantFP: Node is not ConstantFPSDNode");
921 uint64_t dbits
= DoubleToBits(FP
->getValueAPF().convertToDouble());
922 SDValue T
= DAG
.getConstant(dbits
, MVT::i64
);
923 SDValue Tvec
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
, T
, T
);
924 return DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
,
925 DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v2f64
, Tvec
));
932 LowerFORMAL_ARGUMENTS(SDValue Op
, SelectionDAG
&DAG
, int &VarArgsFrameIndex
)
934 MachineFunction
&MF
= DAG
.getMachineFunction();
935 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
936 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
937 SmallVector
<SDValue
, 48> ArgValues
;
938 SDValue Root
= Op
.getOperand(0);
939 bool isVarArg
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue() != 0;
940 DebugLoc dl
= Op
.getDebugLoc();
942 const unsigned *ArgRegs
= SPURegisterInfo::getArgRegs();
943 const unsigned NumArgRegs
= SPURegisterInfo::getNumArgRegs();
945 unsigned ArgOffset
= SPUFrameInfo::minStackSize();
946 unsigned ArgRegIdx
= 0;
947 unsigned StackSlotSize
= SPUFrameInfo::stackSlotSize();
949 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
951 // Add DAG nodes to load the arguments or copy them out of registers.
952 for (unsigned ArgNo
= 0, e
= Op
.getNode()->getNumValues() - 1;
953 ArgNo
!= e
; ++ArgNo
) {
954 MVT ObjectVT
= Op
.getValue(ArgNo
).getValueType();
955 unsigned ObjSize
= ObjectVT
.getSizeInBits()/8;
958 if (ArgRegIdx
< NumArgRegs
) {
959 const TargetRegisterClass
*ArgRegClass
;
961 switch (ObjectVT
.getSimpleVT()) {
963 cerr
<< "LowerFORMAL_ARGUMENTS Unhandled argument type: "
964 << ObjectVT
.getMVTString()
969 ArgRegClass
= &SPU::R8CRegClass
;
972 ArgRegClass
= &SPU::R16CRegClass
;
975 ArgRegClass
= &SPU::R32CRegClass
;
978 ArgRegClass
= &SPU::R64CRegClass
;
981 ArgRegClass
= &SPU::GPRCRegClass
;
984 ArgRegClass
= &SPU::R32FPRegClass
;
987 ArgRegClass
= &SPU::R64FPRegClass
;
995 ArgRegClass
= &SPU::VECREGRegClass
;
999 unsigned VReg
= RegInfo
.createVirtualRegister(ArgRegClass
);
1000 RegInfo
.addLiveIn(ArgRegs
[ArgRegIdx
], VReg
);
1001 ArgVal
= DAG
.getCopyFromReg(Root
, dl
, VReg
, ObjectVT
);
1004 // We need to load the argument to a virtual register if we determined
1005 // above that we ran out of physical registers of the appropriate type
1006 // or we're forced to do vararg
1007 int FI
= MFI
->CreateFixedObject(ObjSize
, ArgOffset
);
1008 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1009 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Root
, FIN
, NULL
, 0);
1010 ArgOffset
+= StackSlotSize
;
1013 ArgValues
.push_back(ArgVal
);
1015 Root
= ArgVal
.getOperand(0);
1020 // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1021 // We will spill (79-3)+1 registers to the stack
1022 SmallVector
<SDValue
, 79-3+1> MemOps
;
1024 // Create the frame slot
1026 for (; ArgRegIdx
!= NumArgRegs
; ++ArgRegIdx
) {
1027 VarArgsFrameIndex
= MFI
->CreateFixedObject(StackSlotSize
, ArgOffset
);
1028 SDValue FIN
= DAG
.getFrameIndex(VarArgsFrameIndex
, PtrVT
);
1029 SDValue ArgVal
= DAG
.getRegister(ArgRegs
[ArgRegIdx
], MVT::v16i8
);
1030 SDValue Store
= DAG
.getStore(Root
, dl
, ArgVal
, FIN
, NULL
, 0);
1031 Root
= Store
.getOperand(0);
1032 MemOps
.push_back(Store
);
1034 // Increment address by stack slot size for the next stored argument
1035 ArgOffset
+= StackSlotSize
;
1037 if (!MemOps
.empty())
1038 Root
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1039 &MemOps
[0], MemOps
.size());
1042 ArgValues
.push_back(Root
);
1044 // Return the new list of results.
1045 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, Op
.getNode()->getVTList(),
1046 &ArgValues
[0], ArgValues
.size());
1049 /// isLSAAddress - Return the immediate to use if the specified
1050 /// value is representable as a LSA address.
1051 static SDNode
*isLSAAddress(SDValue Op
, SelectionDAG
&DAG
) {
1052 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
1055 int Addr
= C
->getZExtValue();
1056 if ((Addr
& 3) != 0 || // Low 2 bits are implicitly zero.
1057 (Addr
<< 14 >> 14) != Addr
)
1058 return 0; // Top 14 bits have to be sext of immediate.
1060 return DAG
.getConstant((int)C
->getZExtValue() >> 2, MVT::i32
).getNode();
1064 LowerCALL(SDValue Op
, SelectionDAG
&DAG
, const SPUSubtarget
*ST
) {
1065 CallSDNode
*TheCall
= cast
<CallSDNode
>(Op
.getNode());
1066 SDValue Chain
= TheCall
->getChain();
1067 SDValue Callee
= TheCall
->getCallee();
1068 unsigned NumOps
= TheCall
->getNumArgs();
1069 unsigned StackSlotSize
= SPUFrameInfo::stackSlotSize();
1070 const unsigned *ArgRegs
= SPURegisterInfo::getArgRegs();
1071 const unsigned NumArgRegs
= SPURegisterInfo::getNumArgRegs();
1072 DebugLoc dl
= TheCall
->getDebugLoc();
1074 // Handy pointer type
1075 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1077 // Accumulate how many bytes are to be pushed on the stack, including the
1078 // linkage area, and parameter passing area. According to the SPU ABI,
1079 // we minimally need space for [LR] and [SP]
1080 unsigned NumStackBytes
= SPUFrameInfo::minStackSize();
1082 // Set up a copy of the stack pointer for use loading and storing any
1083 // arguments that may not fit in the registers available for argument
1085 SDValue StackPtr
= DAG
.getRegister(SPU::R1
, MVT::i32
);
1087 // Figure out which arguments are going to go in registers, and which in
1089 unsigned ArgOffset
= SPUFrameInfo::minStackSize(); // Just below [LR]
1090 unsigned ArgRegIdx
= 0;
1092 // Keep track of registers passing arguments
1093 std::vector
<std::pair
<unsigned, SDValue
> > RegsToPass
;
1094 // And the arguments passed on the stack
1095 SmallVector
<SDValue
, 8> MemOpChains
;
1097 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
1098 SDValue Arg
= TheCall
->getArg(i
);
1100 // PtrOff will be used to store the current argument to the stack if a
1101 // register cannot be found for it.
1102 SDValue PtrOff
= DAG
.getConstant(ArgOffset
, StackPtr
.getValueType());
1103 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
1105 switch (Arg
.getValueType().getSimpleVT()) {
1106 default: assert(0 && "Unexpected ValueType for argument!");
1112 if (ArgRegIdx
!= NumArgRegs
) {
1113 RegsToPass
.push_back(std::make_pair(ArgRegs
[ArgRegIdx
++], Arg
));
1115 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, NULL
, 0));
1116 ArgOffset
+= StackSlotSize
;
1121 if (ArgRegIdx
!= NumArgRegs
) {
1122 RegsToPass
.push_back(std::make_pair(ArgRegs
[ArgRegIdx
++], Arg
));
1124 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, NULL
, 0));
1125 ArgOffset
+= StackSlotSize
;
1134 if (ArgRegIdx
!= NumArgRegs
) {
1135 RegsToPass
.push_back(std::make_pair(ArgRegs
[ArgRegIdx
++], Arg
));
1137 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, NULL
, 0));
1138 ArgOffset
+= StackSlotSize
;
1144 // Update number of stack bytes actually used, insert a call sequence start
1145 NumStackBytes
= (ArgOffset
- SPUFrameInfo::minStackSize());
1146 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumStackBytes
,
1149 if (!MemOpChains
.empty()) {
1150 // Adjust the stack pointer for the stack arguments.
1151 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
1152 &MemOpChains
[0], MemOpChains
.size());
1155 // Build a sequence of copy-to-reg nodes chained together with token chain
1156 // and flag operands which copy the outgoing args into the appropriate regs.
1158 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
1159 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
1160 RegsToPass
[i
].second
, InFlag
);
1161 InFlag
= Chain
.getValue(1);
1164 SmallVector
<SDValue
, 8> Ops
;
1165 unsigned CallOpc
= SPUISD::CALL
;
1167 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1168 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1169 // node so that legalize doesn't hack it.
1170 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
1171 GlobalValue
*GV
= G
->getGlobal();
1172 MVT CalleeVT
= Callee
.getValueType();
1173 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
1174 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, CalleeVT
);
1176 if (!ST
->usingLargeMem()) {
1177 // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1178 // style calls, otherwise, external symbols are BRASL calls. This assumes
1179 // that declared/defined symbols are in the same compilation unit and can
1180 // be reached through PC-relative jumps.
1183 // This may be an unsafe assumption for JIT and really large compilation
1185 if (GV
->isDeclaration()) {
1186 Callee
= DAG
.getNode(SPUISD::AFormAddr
, dl
, CalleeVT
, GA
, Zero
);
1188 Callee
= DAG
.getNode(SPUISD::PCRelAddr
, dl
, CalleeVT
, GA
, Zero
);
1191 // "Large memory" mode: Turn all calls into indirect calls with a X-form
1193 Callee
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, GA
, Zero
);
1195 } else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
1196 MVT CalleeVT
= Callee
.getValueType();
1197 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
1198 SDValue ExtSym
= DAG
.getTargetExternalSymbol(S
->getSymbol(),
1199 Callee
.getValueType());
1201 if (!ST
->usingLargeMem()) {
1202 Callee
= DAG
.getNode(SPUISD::AFormAddr
, dl
, CalleeVT
, ExtSym
, Zero
);
1204 Callee
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
, ExtSym
, Zero
);
1206 } else if (SDNode
*Dest
= isLSAAddress(Callee
, DAG
)) {
1207 // If this is an absolute destination address that appears to be a legal
1208 // local store address, use the munged value.
1209 Callee
= SDValue(Dest
, 0);
1212 Ops
.push_back(Chain
);
1213 Ops
.push_back(Callee
);
1215 // Add argument registers to the end of the list so that they are known live
1217 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
1218 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
1219 RegsToPass
[i
].second
.getValueType()));
1221 if (InFlag
.getNode())
1222 Ops
.push_back(InFlag
);
1223 // Returns a chain and a flag for retval copy to use.
1224 Chain
= DAG
.getNode(CallOpc
, dl
, DAG
.getVTList(MVT::Other
, MVT::Flag
),
1225 &Ops
[0], Ops
.size());
1226 InFlag
= Chain
.getValue(1);
1228 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumStackBytes
, true),
1229 DAG
.getIntPtrConstant(0, true), InFlag
);
1230 if (TheCall
->getValueType(0) != MVT::Other
)
1231 InFlag
= Chain
.getValue(1);
1233 SDValue ResultVals
[3];
1234 unsigned NumResults
= 0;
1236 // If the call has results, copy the values out of the ret val registers.
1237 switch (TheCall
->getValueType(0).getSimpleVT()) {
1238 default: assert(0 && "Unexpected ret value!");
1239 case MVT::Other
: break;
1241 if (TheCall
->getValueType(1) == MVT::i32
) {
1242 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R4
,
1243 MVT::i32
, InFlag
).getValue(1);
1244 ResultVals
[0] = Chain
.getValue(0);
1245 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, MVT::i32
,
1246 Chain
.getValue(2)).getValue(1);
1247 ResultVals
[1] = Chain
.getValue(0);
1250 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, MVT::i32
,
1251 InFlag
).getValue(1);
1252 ResultVals
[0] = Chain
.getValue(0);
1257 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, MVT::i64
,
1258 InFlag
).getValue(1);
1259 ResultVals
[0] = Chain
.getValue(0);
1263 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, MVT::i128
,
1264 InFlag
).getValue(1);
1265 ResultVals
[0] = Chain
.getValue(0);
1270 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, TheCall
->getValueType(0),
1271 InFlag
).getValue(1);
1272 ResultVals
[0] = Chain
.getValue(0);
1281 Chain
= DAG
.getCopyFromReg(Chain
, dl
, SPU::R3
, TheCall
->getValueType(0),
1282 InFlag
).getValue(1);
1283 ResultVals
[0] = Chain
.getValue(0);
1288 // If the function returns void, just return the chain.
1289 if (NumResults
== 0)
1292 // Otherwise, merge everything together with a MERGE_VALUES node.
1293 ResultVals
[NumResults
++] = Chain
;
1294 SDValue Res
= DAG
.getMergeValues(ResultVals
, NumResults
, dl
);
1295 return Res
.getValue(Op
.getResNo());
1299 LowerRET(SDValue Op
, SelectionDAG
&DAG
, TargetMachine
&TM
) {
1300 SmallVector
<CCValAssign
, 16> RVLocs
;
1301 unsigned CC
= DAG
.getMachineFunction().getFunction()->getCallingConv();
1302 bool isVarArg
= DAG
.getMachineFunction().getFunction()->isVarArg();
1303 DebugLoc dl
= Op
.getDebugLoc();
1304 CCState
CCInfo(CC
, isVarArg
, TM
, RVLocs
);
1305 CCInfo
.AnalyzeReturn(Op
.getNode(), RetCC_SPU
);
1307 // If this is the first return lowered for this function, add the regs to the
1308 // liveout set for the function.
1309 if (DAG
.getMachineFunction().getRegInfo().liveout_empty()) {
1310 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
)
1311 DAG
.getMachineFunction().getRegInfo().addLiveOut(RVLocs
[i
].getLocReg());
1314 SDValue Chain
= Op
.getOperand(0);
1317 // Copy the result values into the output registers.
1318 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
1319 CCValAssign
&VA
= RVLocs
[i
];
1320 assert(VA
.isRegLoc() && "Can only return in registers!");
1321 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(),
1322 Op
.getOperand(i
*2+1), Flag
);
1323 Flag
= Chain
.getValue(1);
1327 return DAG
.getNode(SPUISD::RET_FLAG
, dl
, MVT::Other
, Chain
, Flag
);
1329 return DAG
.getNode(SPUISD::RET_FLAG
, dl
, MVT::Other
, Chain
);
1333 //===----------------------------------------------------------------------===//
1334 // Vector related lowering:
1335 //===----------------------------------------------------------------------===//
1337 static ConstantSDNode
*
1338 getVecImm(SDNode
*N
) {
1339 SDValue
OpVal(0, 0);
1341 // Check to see if this buildvec has a single non-undef value in its elements.
1342 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
1343 if (N
->getOperand(i
).getOpcode() == ISD::UNDEF
) continue;
1344 if (OpVal
.getNode() == 0)
1345 OpVal
= N
->getOperand(i
);
1346 else if (OpVal
!= N
->getOperand(i
))
1350 if (OpVal
.getNode() != 0) {
1351 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(OpVal
)) {
1359 /// get_vec_i18imm - Test if this vector is a vector filled with the same value
1360 /// and the value fits into an unsigned 18-bit constant, and if so, return the
1362 SDValue
SPU::get_vec_u18imm(SDNode
*N
, SelectionDAG
&DAG
,
1364 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1365 uint64_t Value
= CN
->getZExtValue();
1366 if (ValueType
== MVT::i64
) {
1367 uint64_t UValue
= CN
->getZExtValue();
1368 uint32_t upper
= uint32_t(UValue
>> 32);
1369 uint32_t lower
= uint32_t(UValue
);
1372 Value
= Value
>> 32;
1374 if (Value
<= 0x3ffff)
1375 return DAG
.getTargetConstant(Value
, ValueType
);
1381 /// get_vec_i16imm - Test if this vector is a vector filled with the same value
1382 /// and the value fits into a signed 16-bit constant, and if so, return the
1384 SDValue
SPU::get_vec_i16imm(SDNode
*N
, SelectionDAG
&DAG
,
1386 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1387 int64_t Value
= CN
->getSExtValue();
1388 if (ValueType
== MVT::i64
) {
1389 uint64_t UValue
= CN
->getZExtValue();
1390 uint32_t upper
= uint32_t(UValue
>> 32);
1391 uint32_t lower
= uint32_t(UValue
);
1394 Value
= Value
>> 32;
1396 if (Value
>= -(1 << 15) && Value
<= ((1 << 15) - 1)) {
1397 return DAG
.getTargetConstant(Value
, ValueType
);
1404 /// get_vec_i10imm - Test if this vector is a vector filled with the same value
1405 /// and the value fits into a signed 10-bit constant, and if so, return the
1407 SDValue
SPU::get_vec_i10imm(SDNode
*N
, SelectionDAG
&DAG
,
1409 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1410 int64_t Value
= CN
->getSExtValue();
1411 if (ValueType
== MVT::i64
) {
1412 uint64_t UValue
= CN
->getZExtValue();
1413 uint32_t upper
= uint32_t(UValue
>> 32);
1414 uint32_t lower
= uint32_t(UValue
);
1417 Value
= Value
>> 32;
1419 if (isS10Constant(Value
))
1420 return DAG
.getTargetConstant(Value
, ValueType
);
1426 /// get_vec_i8imm - Test if this vector is a vector filled with the same value
1427 /// and the value fits into a signed 8-bit constant, and if so, return the
1430 /// @note: The incoming vector is v16i8 because that's the only way we can load
1431 /// constant vectors. Thus, we test to see if the upper and lower bytes are the
1433 SDValue
SPU::get_vec_i8imm(SDNode
*N
, SelectionDAG
&DAG
,
1435 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1436 int Value
= (int) CN
->getZExtValue();
1437 if (ValueType
== MVT::i16
1438 && Value
<= 0xffff /* truncated from uint64_t */
1439 && ((short) Value
>> 8) == ((short) Value
& 0xff))
1440 return DAG
.getTargetConstant(Value
& 0xff, ValueType
);
1441 else if (ValueType
== MVT::i8
1442 && (Value
& 0xff) == Value
)
1443 return DAG
.getTargetConstant(Value
, ValueType
);
1449 /// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1450 /// and the value fits into a signed 16-bit constant, and if so, return the
1452 SDValue
SPU::get_ILHUvec_imm(SDNode
*N
, SelectionDAG
&DAG
,
1454 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1455 uint64_t Value
= CN
->getZExtValue();
1456 if ((ValueType
== MVT::i32
1457 && ((unsigned) Value
& 0xffff0000) == (unsigned) Value
)
1458 || (ValueType
== MVT::i64
&& (Value
& 0xffff0000) == Value
))
1459 return DAG
.getTargetConstant(Value
>> 16, ValueType
);
1465 /// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1466 SDValue
SPU::get_v4i32_imm(SDNode
*N
, SelectionDAG
&DAG
) {
1467 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1468 return DAG
.getTargetConstant((unsigned) CN
->getZExtValue(), MVT::i32
);
1474 /// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1475 SDValue
SPU::get_v2i64_imm(SDNode
*N
, SelectionDAG
&DAG
) {
1476 if (ConstantSDNode
*CN
= getVecImm(N
)) {
1477 return DAG
.getTargetConstant((unsigned) CN
->getZExtValue(), MVT::i64
);
1483 //! Lower a BUILD_VECTOR instruction creatively:
1485 LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) {
1486 MVT VT
= Op
.getValueType();
1487 MVT EltVT
= VT
.getVectorElementType();
1488 DebugLoc dl
= Op
.getDebugLoc();
1489 BuildVectorSDNode
*BCN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
1490 assert(BCN
!= 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1491 unsigned minSplatBits
= EltVT
.getSizeInBits();
1493 if (minSplatBits
< 16)
1496 APInt APSplatBits
, APSplatUndef
;
1497 unsigned SplatBitSize
;
1500 if (!BCN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
1501 HasAnyUndefs
, minSplatBits
)
1502 || minSplatBits
< SplatBitSize
)
1503 return SDValue(); // Wasn't a constant vector or splat exceeded min
1505 uint64_t SplatBits
= APSplatBits
.getZExtValue();
1507 switch (VT
.getSimpleVT()) {
1509 cerr
<< "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1510 << VT
.getMVTString()
1515 uint32_t Value32
= uint32_t(SplatBits
);
1516 assert(SplatBitSize
== 32
1517 && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1518 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1519 SDValue T
= DAG
.getConstant(Value32
, MVT::i32
);
1520 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v4f32
,
1521 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, T
,T
,T
,T
));
1525 uint64_t f64val
= uint64_t(SplatBits
);
1526 assert(SplatBitSize
== 64
1527 && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1528 // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1529 SDValue T
= DAG
.getConstant(f64val
, MVT::i64
);
1530 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v2f64
,
1531 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
, T
, T
));
1535 // 8-bit constants have to be expanded to 16-bits
1536 unsigned short Value16
= SplatBits
/* | (SplatBits << 8) */;
1537 SmallVector
<SDValue
, 8> Ops
;
1539 Ops
.assign(8, DAG
.getConstant(Value16
, MVT::i16
));
1540 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
,
1541 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v8i16
, &Ops
[0], Ops
.size()));
1544 unsigned short Value16
= SplatBits
;
1545 SDValue T
= DAG
.getConstant(Value16
, EltVT
);
1546 SmallVector
<SDValue
, 8> Ops
;
1549 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VT
, &Ops
[0], Ops
.size());
1552 SDValue T
= DAG
.getConstant(unsigned(SplatBits
), VT
.getVectorElementType());
1553 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VT
, T
, T
, T
, T
);
1556 SDValue T
= DAG
.getConstant(unsigned(SplatBits
), VT
.getVectorElementType());
1557 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VT
, T
, T
);
1560 return SPU::LowerV2I64Splat(VT
, DAG
, SplatBits
, dl
);
1570 SPU::LowerV2I64Splat(MVT OpVT
, SelectionDAG
& DAG
, uint64_t SplatVal
,
1572 uint32_t upper
= uint32_t(SplatVal
>> 32);
1573 uint32_t lower
= uint32_t(SplatVal
);
1575 if (upper
== lower
) {
1576 // Magic constant that can be matched by IL, ILA, et. al.
1577 SDValue Val
= DAG
.getTargetConstant(upper
, MVT::i32
);
1578 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, OpVT
,
1579 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1580 Val
, Val
, Val
, Val
));
1582 bool upper_special
, lower_special
;
1584 // NOTE: This code creates common-case shuffle masks that can be easily
1585 // detected as common expressions. It is not attempting to create highly
1586 // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1588 // Detect if the upper or lower half is a special shuffle mask pattern:
1589 upper_special
= (upper
== 0 || upper
== 0xffffffff || upper
== 0x80000000);
1590 lower_special
= (lower
== 0 || lower
== 0xffffffff || lower
== 0x80000000);
1592 // Both upper and lower are special, lower to a constant pool load:
1593 if (lower_special
&& upper_special
) {
1594 SDValue SplatValCN
= DAG
.getConstant(SplatVal
, MVT::i64
);
1595 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
,
1596 SplatValCN
, SplatValCN
);
1601 SmallVector
<SDValue
, 16> ShufBytes
;
1604 // Create lower vector if not a special pattern
1605 if (!lower_special
) {
1606 SDValue LO32C
= DAG
.getConstant(lower
, MVT::i32
);
1607 LO32
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, OpVT
,
1608 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1609 LO32C
, LO32C
, LO32C
, LO32C
));
1612 // Create upper vector if not a special pattern
1613 if (!upper_special
) {
1614 SDValue HI32C
= DAG
.getConstant(upper
, MVT::i32
);
1615 HI32
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, OpVT
,
1616 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1617 HI32C
, HI32C
, HI32C
, HI32C
));
1620 // If either upper or lower are special, then the two input operands are
1621 // the same (basically, one of them is a "don't care")
1627 for (int i
= 0; i
< 4; ++i
) {
1629 for (int j
= 0; j
< 4; ++j
) {
1631 bool process_upper
, process_lower
;
1633 process_upper
= (upper_special
&& (i
& 1) == 0);
1634 process_lower
= (lower_special
&& (i
& 1) == 1);
1636 if (process_upper
|| process_lower
) {
1637 if ((process_upper
&& upper
== 0)
1638 || (process_lower
&& lower
== 0))
1640 else if ((process_upper
&& upper
== 0xffffffff)
1641 || (process_lower
&& lower
== 0xffffffff))
1643 else if ((process_upper
&& upper
== 0x80000000)
1644 || (process_lower
&& lower
== 0x80000000))
1645 val
|= (j
== 0 ? 0xe0 : 0x80);
1647 val
|= i
* 4 + j
+ ((i
& 1) * 16);
1650 ShufBytes
.push_back(DAG
.getConstant(val
, MVT::i32
));
1653 return DAG
.getNode(SPUISD::SHUFB
, dl
, OpVT
, HI32
, LO32
,
1654 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1655 &ShufBytes
[0], ShufBytes
.size()));
1659 /// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1660 /// which the Cell can operate. The code inspects V3 to ascertain whether the
1661 /// permutation vector, V3, is monotonically increasing with one "exception"
1662 /// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1663 /// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1664 /// In either case, the net result is going to eventually invoke SHUFB to
1665 /// permute/shuffle the bytes from V1 and V2.
1667 /// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1668 /// control word for byte/halfword/word insertion. This takes care of a single
1669 /// element move from V2 into V1.
1671 /// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1672 static SDValue
LowerVECTOR_SHUFFLE(SDValue Op
, SelectionDAG
&DAG
) {
1673 SDValue V1
= Op
.getOperand(0);
1674 SDValue V2
= Op
.getOperand(1);
1675 SDValue PermMask
= Op
.getOperand(2);
1676 DebugLoc dl
= Op
.getDebugLoc();
1678 if (V2
.getOpcode() == ISD::UNDEF
) V2
= V1
;
1680 // If we have a single element being moved from V1 to V2, this can be handled
1681 // using the C*[DX] compute mask instructions, but the vector elements have
1682 // to be monotonically increasing with one exception element.
1683 MVT VecVT
= V1
.getValueType();
1684 MVT EltVT
= VecVT
.getVectorElementType();
1685 unsigned EltsFromV2
= 0;
1687 unsigned V2EltIdx0
= 0;
1688 unsigned CurrElt
= 0;
1689 unsigned MaxElts
= VecVT
.getVectorNumElements();
1690 unsigned PrevElt
= 0;
1692 bool monotonic
= true;
1695 if (EltVT
== MVT::i8
) {
1697 } else if (EltVT
== MVT::i16
) {
1699 } else if (EltVT
== MVT::i32
|| EltVT
== MVT::f32
) {
1701 } else if (EltVT
== MVT::i64
|| EltVT
== MVT::f64
) {
1704 assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1706 for (unsigned i
= 0; i
!= PermMask
.getNumOperands(); ++i
) {
1707 if (PermMask
.getOperand(i
).getOpcode() != ISD::UNDEF
) {
1708 unsigned SrcElt
= cast
<ConstantSDNode
> (PermMask
.getOperand(i
))->getZExtValue();
1711 if (SrcElt
>= V2EltIdx0
) {
1712 if (1 >= (++EltsFromV2
)) {
1713 V2Elt
= (V2EltIdx0
- SrcElt
) << 2;
1715 } else if (CurrElt
!= SrcElt
) {
1723 if (PrevElt
> 0 && SrcElt
< MaxElts
) {
1724 if ((PrevElt
== SrcElt
- 1)
1725 || (PrevElt
== MaxElts
- 1 && SrcElt
== 0)) {
1732 } else if (PrevElt
== 0) {
1733 // First time through, need to keep track of previous element
1736 // This isn't a rotation, takes elements from vector 2
1743 if (EltsFromV2
== 1 && monotonic
) {
1744 // Compute mask and shuffle
1745 MachineFunction
&MF
= DAG
.getMachineFunction();
1746 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1747 unsigned VReg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
1748 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1749 // Initialize temporary register to 0
1750 SDValue InitTempReg
=
1751 DAG
.getCopyToReg(DAG
.getEntryNode(), dl
, VReg
, DAG
.getConstant(0, PtrVT
));
1752 // Copy register's contents as index in SHUFFLE_MASK:
1753 SDValue ShufMaskOp
=
1754 DAG
.getNode(SPUISD::SHUFFLE_MASK
, dl
, MVT::v4i32
,
1755 DAG
.getTargetConstant(V2Elt
, MVT::i32
),
1756 DAG
.getCopyFromReg(InitTempReg
, dl
, VReg
, PtrVT
));
1757 // Use shuffle mask in SHUFB synthetic instruction:
1758 return DAG
.getNode(SPUISD::SHUFB
, dl
, V1
.getValueType(), V2
, V1
,
1760 } else if (rotate
) {
1761 int rotamt
= (MaxElts
- V0Elt
) * EltVT
.getSizeInBits()/8;
1763 return DAG
.getNode(SPUISD::ROTBYTES_LEFT
, dl
, V1
.getValueType(),
1764 V1
, DAG
.getConstant(rotamt
, MVT::i16
));
1766 // Convert the SHUFFLE_VECTOR mask's input element units to the
1768 unsigned BytesPerElement
= EltVT
.getSizeInBits()/8;
1770 SmallVector
<SDValue
, 16> ResultMask
;
1771 for (unsigned i
= 0, e
= PermMask
.getNumOperands(); i
!= e
; ++i
) {
1773 if (PermMask
.getOperand(i
).getOpcode() == ISD::UNDEF
)
1776 SrcElt
= cast
<ConstantSDNode
>(PermMask
.getOperand(i
))->getZExtValue();
1778 for (unsigned j
= 0; j
< BytesPerElement
; ++j
) {
1779 ResultMask
.push_back(DAG
.getConstant(SrcElt
*BytesPerElement
+j
,
1784 SDValue VPermMask
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v16i8
,
1785 &ResultMask
[0], ResultMask
.size());
1786 return DAG
.getNode(SPUISD::SHUFB
, dl
, V1
.getValueType(), V1
, V2
, VPermMask
);
1790 static SDValue
LowerSCALAR_TO_VECTOR(SDValue Op
, SelectionDAG
&DAG
) {
1791 SDValue Op0
= Op
.getOperand(0); // Op0 = the scalar
1792 DebugLoc dl
= Op
.getDebugLoc();
1794 if (Op0
.getNode()->getOpcode() == ISD::Constant
) {
1795 // For a constant, build the appropriate constant vector, which will
1796 // eventually simplify to a vector register load.
1798 ConstantSDNode
*CN
= cast
<ConstantSDNode
>(Op0
.getNode());
1799 SmallVector
<SDValue
, 16> ConstVecValues
;
1803 // Create a constant vector:
1804 switch (Op
.getValueType().getSimpleVT()) {
1805 default: assert(0 && "Unexpected constant value type in "
1806 "LowerSCALAR_TO_VECTOR");
1807 case MVT::v16i8
: n_copies
= 16; VT
= MVT::i8
; break;
1808 case MVT::v8i16
: n_copies
= 8; VT
= MVT::i16
; break;
1809 case MVT::v4i32
: n_copies
= 4; VT
= MVT::i32
; break;
1810 case MVT::v4f32
: n_copies
= 4; VT
= MVT::f32
; break;
1811 case MVT::v2i64
: n_copies
= 2; VT
= MVT::i64
; break;
1812 case MVT::v2f64
: n_copies
= 2; VT
= MVT::f64
; break;
1815 SDValue CValue
= DAG
.getConstant(CN
->getZExtValue(), VT
);
1816 for (size_t j
= 0; j
< n_copies
; ++j
)
1817 ConstVecValues
.push_back(CValue
);
1819 return DAG
.getNode(ISD::BUILD_VECTOR
, dl
, Op
.getValueType(),
1820 &ConstVecValues
[0], ConstVecValues
.size());
1822 // Otherwise, copy the value from one register to another:
1823 switch (Op0
.getValueType().getSimpleVT()) {
1824 default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1831 return DAG
.getNode(SPUISD::PREFSLOT2VEC
, dl
, Op
.getValueType(), Op0
, Op0
);
1838 static SDValue
LowerEXTRACT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) {
1839 MVT VT
= Op
.getValueType();
1840 SDValue N
= Op
.getOperand(0);
1841 SDValue Elt
= Op
.getOperand(1);
1842 DebugLoc dl
= Op
.getDebugLoc();
1845 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Elt
)) {
1846 // Constant argument:
1847 int EltNo
= (int) C
->getZExtValue();
1850 if (VT
== MVT::i8
&& EltNo
>= 16)
1851 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1852 else if (VT
== MVT::i16
&& EltNo
>= 8)
1853 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1854 else if (VT
== MVT::i32
&& EltNo
>= 4)
1855 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1856 else if (VT
== MVT::i64
&& EltNo
>= 2)
1857 assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1859 if (EltNo
== 0 && (VT
== MVT::i32
|| VT
== MVT::i64
)) {
1860 // i32 and i64: Element 0 is the preferred slot
1861 return DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
, N
);
1864 // Need to generate shuffle mask and extract:
1865 int prefslot_begin
= -1, prefslot_end
= -1;
1866 int elt_byte
= EltNo
* VT
.getSizeInBits() / 8;
1868 switch (VT
.getSimpleVT()) {
1870 assert(false && "Invalid value type!");
1872 prefslot_begin
= prefslot_end
= 3;
1876 prefslot_begin
= 2; prefslot_end
= 3;
1881 prefslot_begin
= 0; prefslot_end
= 3;
1886 prefslot_begin
= 0; prefslot_end
= 7;
1891 assert(prefslot_begin
!= -1 && prefslot_end
!= -1 &&
1892 "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1894 unsigned int ShufBytes
[16];
1895 for (int i
= 0; i
< 16; ++i
) {
1896 // zero fill uppper part of preferred slot, don't care about the
1898 unsigned int mask_val
;
1899 if (i
<= prefslot_end
) {
1901 ((i
< prefslot_begin
)
1903 : elt_byte
+ (i
- prefslot_begin
));
1905 ShufBytes
[i
] = mask_val
;
1907 ShufBytes
[i
] = ShufBytes
[i
% (prefslot_end
+ 1)];
1910 SDValue ShufMask
[4];
1911 for (unsigned i
= 0; i
< sizeof(ShufMask
)/sizeof(ShufMask
[0]); ++i
) {
1912 unsigned bidx
= i
* 4;
1913 unsigned int bits
= ((ShufBytes
[bidx
] << 24) |
1914 (ShufBytes
[bidx
+1] << 16) |
1915 (ShufBytes
[bidx
+2] << 8) |
1917 ShufMask
[i
] = DAG
.getConstant(bits
, MVT::i32
);
1920 SDValue ShufMaskVec
=
1921 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1922 &ShufMask
[0], sizeof(ShufMask
)/sizeof(ShufMask
[0]));
1924 retval
= DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
,
1925 DAG
.getNode(SPUISD::SHUFB
, dl
, N
.getValueType(),
1926 N
, N
, ShufMaskVec
));
1928 // Variable index: Rotate the requested element into slot 0, then replicate
1929 // slot 0 across the vector
1930 MVT VecVT
= N
.getValueType();
1931 if (!VecVT
.isSimple() || !VecVT
.isVector() || !VecVT
.is128BitVector()) {
1932 cerr
<< "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
1936 // Make life easier by making sure the index is zero-extended to i32
1937 if (Elt
.getValueType() != MVT::i32
)
1938 Elt
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::i32
, Elt
);
1940 // Scale the index to a bit/byte shift quantity
1942 APInt(32, uint64_t(16 / N
.getValueType().getVectorNumElements()), false);
1943 unsigned scaleShift
= scaleFactor
.logBase2();
1946 if (scaleShift
> 0) {
1947 // Scale the shift factor:
1948 Elt
= DAG
.getNode(ISD::SHL
, dl
, MVT::i32
, Elt
,
1949 DAG
.getConstant(scaleShift
, MVT::i32
));
1952 vecShift
= DAG
.getNode(SPUISD::SHLQUAD_L_BYTES
, dl
, VecVT
, N
, Elt
);
1954 // Replicate the bytes starting at byte 0 across the entire vector (for
1955 // consistency with the notion of a unified register set)
1958 switch (VT
.getSimpleVT()) {
1960 cerr
<< "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
1964 SDValue factor
= DAG
.getConstant(0x00000000, MVT::i32
);
1965 replicate
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1966 factor
, factor
, factor
, factor
);
1970 SDValue factor
= DAG
.getConstant(0x00010001, MVT::i32
);
1971 replicate
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1972 factor
, factor
, factor
, factor
);
1977 SDValue factor
= DAG
.getConstant(0x00010203, MVT::i32
);
1978 replicate
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1979 factor
, factor
, factor
, factor
);
1984 SDValue loFactor
= DAG
.getConstant(0x00010203, MVT::i32
);
1985 SDValue hiFactor
= DAG
.getConstant(0x04050607, MVT::i32
);
1986 replicate
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
1987 loFactor
, hiFactor
, loFactor
, hiFactor
);
1992 retval
= DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
,
1993 DAG
.getNode(SPUISD::SHUFB
, dl
, VecVT
,
1994 vecShift
, vecShift
, replicate
));
2000 static SDValue
LowerINSERT_VECTOR_ELT(SDValue Op
, SelectionDAG
&DAG
) {
2001 SDValue VecOp
= Op
.getOperand(0);
2002 SDValue ValOp
= Op
.getOperand(1);
2003 SDValue IdxOp
= Op
.getOperand(2);
2004 DebugLoc dl
= Op
.getDebugLoc();
2005 MVT VT
= Op
.getValueType();
2007 ConstantSDNode
*CN
= cast
<ConstantSDNode
>(IdxOp
);
2008 assert(CN
!= 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2010 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2011 // Use $sp ($1) because it's always 16-byte aligned and it's available:
2012 SDValue Pointer
= DAG
.getNode(SPUISD::IndirectAddr
, dl
, PtrVT
,
2013 DAG
.getRegister(SPU::R1
, PtrVT
),
2014 DAG
.getConstant(CN
->getSExtValue(), PtrVT
));
2015 SDValue ShufMask
= DAG
.getNode(SPUISD::SHUFFLE_MASK
, dl
, VT
, Pointer
);
2018 DAG
.getNode(SPUISD::SHUFB
, dl
, VT
,
2019 DAG
.getNode(ISD::SCALAR_TO_VECTOR
, dl
, VT
, ValOp
),
2021 DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v4i32
, ShufMask
));
2026 static SDValue
LowerI8Math(SDValue Op
, SelectionDAG
&DAG
, unsigned Opc
,
2027 const TargetLowering
&TLI
)
2029 SDValue N0
= Op
.getOperand(0); // Everything has at least one operand
2030 DebugLoc dl
= Op
.getDebugLoc();
2031 MVT ShiftVT
= TLI
.getShiftAmountTy();
2033 assert(Op
.getValueType() == MVT::i8
);
2036 assert(0 && "Unhandled i8 math operator");
2040 // 8-bit addition: Promote the arguments up to 16-bits and truncate
2042 SDValue N1
= Op
.getOperand(1);
2043 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N0
);
2044 N1
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N1
);
2045 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2046 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2051 // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2053 SDValue N1
= Op
.getOperand(1);
2054 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N0
);
2055 N1
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N1
);
2056 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2057 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2061 SDValue N1
= Op
.getOperand(1);
2062 MVT N1VT
= N1
.getValueType();
2064 N0
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::i16
, N0
);
2065 if (!N1VT
.bitsEq(ShiftVT
)) {
2066 unsigned N1Opc
= N1
.getValueType().bitsLT(ShiftVT
)
2069 N1
= DAG
.getNode(N1Opc
, dl
, ShiftVT
, N1
);
2072 // Replicate lower 8-bits into upper 8:
2074 DAG
.getNode(ISD::OR
, dl
, MVT::i16
, N0
,
2075 DAG
.getNode(ISD::SHL
, dl
, MVT::i16
,
2076 N0
, DAG
.getConstant(8, MVT::i32
)));
2078 // Truncate back down to i8
2079 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2080 DAG
.getNode(Opc
, dl
, MVT::i16
, ExpandArg
, N1
));
2084 SDValue N1
= Op
.getOperand(1);
2085 MVT N1VT
= N1
.getValueType();
2087 N0
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, MVT::i16
, N0
);
2088 if (!N1VT
.bitsEq(ShiftVT
)) {
2089 unsigned N1Opc
= ISD::ZERO_EXTEND
;
2091 if (N1
.getValueType().bitsGT(ShiftVT
))
2092 N1Opc
= ISD::TRUNCATE
;
2094 N1
= DAG
.getNode(N1Opc
, dl
, ShiftVT
, N1
);
2097 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2098 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2101 SDValue N1
= Op
.getOperand(1);
2102 MVT N1VT
= N1
.getValueType();
2104 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N0
);
2105 if (!N1VT
.bitsEq(ShiftVT
)) {
2106 unsigned N1Opc
= ISD::SIGN_EXTEND
;
2108 if (N1VT
.bitsGT(ShiftVT
))
2109 N1Opc
= ISD::TRUNCATE
;
2110 N1
= DAG
.getNode(N1Opc
, dl
, ShiftVT
, N1
);
2113 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2114 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2117 SDValue N1
= Op
.getOperand(1);
2119 N0
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N0
);
2120 N1
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i16
, N1
);
2121 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i8
,
2122 DAG
.getNode(Opc
, dl
, MVT::i16
, N0
, N1
));
2130 //! Lower byte immediate operations for v16i8 vectors:
2132 LowerByteImmed(SDValue Op
, SelectionDAG
&DAG
) {
2135 MVT VT
= Op
.getValueType();
2136 DebugLoc dl
= Op
.getDebugLoc();
2138 ConstVec
= Op
.getOperand(0);
2139 Arg
= Op
.getOperand(1);
2140 if (ConstVec
.getNode()->getOpcode() != ISD::BUILD_VECTOR
) {
2141 if (ConstVec
.getNode()->getOpcode() == ISD::BIT_CONVERT
) {
2142 ConstVec
= ConstVec
.getOperand(0);
2144 ConstVec
= Op
.getOperand(1);
2145 Arg
= Op
.getOperand(0);
2146 if (ConstVec
.getNode()->getOpcode() == ISD::BIT_CONVERT
) {
2147 ConstVec
= ConstVec
.getOperand(0);
2152 if (ConstVec
.getNode()->getOpcode() == ISD::BUILD_VECTOR
) {
2153 BuildVectorSDNode
*BCN
= dyn_cast
<BuildVectorSDNode
>(ConstVec
.getNode());
2154 assert(BCN
!= 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2156 APInt APSplatBits
, APSplatUndef
;
2157 unsigned SplatBitSize
;
2159 unsigned minSplatBits
= VT
.getVectorElementType().getSizeInBits();
2161 if (BCN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
2162 HasAnyUndefs
, minSplatBits
)
2163 && minSplatBits
<= SplatBitSize
) {
2164 uint64_t SplatBits
= APSplatBits
.getZExtValue();
2165 SDValue tc
= DAG
.getTargetConstant(SplatBits
& 0xff, MVT::i8
);
2167 SmallVector
<SDValue
, 16> tcVec
;
2168 tcVec
.assign(16, tc
);
2169 return DAG
.getNode(Op
.getNode()->getOpcode(), dl
, VT
, Arg
,
2170 DAG
.getNode(ISD::BUILD_VECTOR
, dl
, VT
, &tcVec
[0], tcVec
.size()));
2174 // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2175 // lowered. Return the operation, rather than a null SDValue.
2179 //! Custom lowering for CTPOP (count population)
2181 Custom lowering code that counts the number ones in the input
2182 operand. SPU has such an instruction, but it counts the number of
2183 ones per byte, which then have to be accumulated.
2185 static SDValue
LowerCTPOP(SDValue Op
, SelectionDAG
&DAG
) {
2186 MVT VT
= Op
.getValueType();
2187 MVT vecVT
= MVT::getVectorVT(VT
, (128 / VT
.getSizeInBits()));
2188 DebugLoc dl
= Op
.getDebugLoc();
2190 switch (VT
.getSimpleVT()) {
2192 assert(false && "Invalid value type!");
2194 SDValue N
= Op
.getOperand(0);
2195 SDValue Elt0
= DAG
.getConstant(0, MVT::i32
);
2197 SDValue Promote
= DAG
.getNode(SPUISD::PREFSLOT2VEC
, dl
, vecVT
, N
, N
);
2198 SDValue CNTB
= DAG
.getNode(SPUISD::CNTB
, dl
, vecVT
, Promote
);
2200 return DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i8
, CNTB
, Elt0
);
2204 MachineFunction
&MF
= DAG
.getMachineFunction();
2205 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
2207 unsigned CNTB_reg
= RegInfo
.createVirtualRegister(&SPU::R16CRegClass
);
2209 SDValue N
= Op
.getOperand(0);
2210 SDValue Elt0
= DAG
.getConstant(0, MVT::i16
);
2211 SDValue Mask0
= DAG
.getConstant(0x0f, MVT::i16
);
2212 SDValue Shift1
= DAG
.getConstant(8, MVT::i32
);
2214 SDValue Promote
= DAG
.getNode(SPUISD::PREFSLOT2VEC
, dl
, vecVT
, N
, N
);
2215 SDValue CNTB
= DAG
.getNode(SPUISD::CNTB
, dl
, vecVT
, Promote
);
2217 // CNTB_result becomes the chain to which all of the virtual registers
2218 // CNTB_reg, SUM1_reg become associated:
2219 SDValue CNTB_result
=
2220 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i16
, CNTB
, Elt0
);
2222 SDValue CNTB_rescopy
=
2223 DAG
.getCopyToReg(CNTB_result
, dl
, CNTB_reg
, CNTB_result
);
2225 SDValue Tmp1
= DAG
.getCopyFromReg(CNTB_rescopy
, dl
, CNTB_reg
, MVT::i16
);
2227 return DAG
.getNode(ISD::AND
, dl
, MVT::i16
,
2228 DAG
.getNode(ISD::ADD
, dl
, MVT::i16
,
2229 DAG
.getNode(ISD::SRL
, dl
, MVT::i16
,
2236 MachineFunction
&MF
= DAG
.getMachineFunction();
2237 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
2239 unsigned CNTB_reg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
2240 unsigned SUM1_reg
= RegInfo
.createVirtualRegister(&SPU::R32CRegClass
);
2242 SDValue N
= Op
.getOperand(0);
2243 SDValue Elt0
= DAG
.getConstant(0, MVT::i32
);
2244 SDValue Mask0
= DAG
.getConstant(0xff, MVT::i32
);
2245 SDValue Shift1
= DAG
.getConstant(16, MVT::i32
);
2246 SDValue Shift2
= DAG
.getConstant(8, MVT::i32
);
2248 SDValue Promote
= DAG
.getNode(SPUISD::PREFSLOT2VEC
, dl
, vecVT
, N
, N
);
2249 SDValue CNTB
= DAG
.getNode(SPUISD::CNTB
, dl
, vecVT
, Promote
);
2251 // CNTB_result becomes the chain to which all of the virtual registers
2252 // CNTB_reg, SUM1_reg become associated:
2253 SDValue CNTB_result
=
2254 DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i32
, CNTB
, Elt0
);
2256 SDValue CNTB_rescopy
=
2257 DAG
.getCopyToReg(CNTB_result
, dl
, CNTB_reg
, CNTB_result
);
2260 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
2261 DAG
.getCopyFromReg(CNTB_rescopy
, dl
, CNTB_reg
, MVT::i32
),
2265 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Comp1
,
2266 DAG
.getCopyFromReg(CNTB_rescopy
, dl
, CNTB_reg
, MVT::i32
));
2268 SDValue Sum1_rescopy
=
2269 DAG
.getCopyToReg(CNTB_result
, dl
, SUM1_reg
, Sum1
);
2272 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
2273 DAG
.getCopyFromReg(Sum1_rescopy
, dl
, SUM1_reg
, MVT::i32
),
2276 DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, Comp2
,
2277 DAG
.getCopyFromReg(Sum1_rescopy
, dl
, SUM1_reg
, MVT::i32
));
2279 return DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Sum2
, Mask0
);
2289 //! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2291 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2292 All conversions to i64 are expanded to a libcall.
2294 static SDValue
LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
,
2295 SPUTargetLowering
&TLI
) {
2296 MVT OpVT
= Op
.getValueType();
2297 SDValue Op0
= Op
.getOperand(0);
2298 MVT Op0VT
= Op0
.getValueType();
2300 if ((OpVT
== MVT::i32
&& Op0VT
== MVT::f64
)
2301 || OpVT
== MVT::i64
) {
2302 // Convert f32 / f64 to i32 / i64 via libcall.
2304 (Op
.getOpcode() == ISD::FP_TO_SINT
)
2305 ? RTLIB::getFPTOSINT(Op0VT
, OpVT
)
2306 : RTLIB::getFPTOUINT(Op0VT
, OpVT
);
2307 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unexpectd fp-to-int conversion!");
2309 return ExpandLibCall(LC
, Op
, DAG
, false, Dummy
, TLI
);
2315 //! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2317 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2318 All conversions from i64 are expanded to a libcall.
2320 static SDValue
LowerINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
,
2321 SPUTargetLowering
&TLI
) {
2322 MVT OpVT
= Op
.getValueType();
2323 SDValue Op0
= Op
.getOperand(0);
2324 MVT Op0VT
= Op0
.getValueType();
2326 if ((OpVT
== MVT::f64
&& Op0VT
== MVT::i32
)
2327 || Op0VT
== MVT::i64
) {
2328 // Convert i32, i64 to f64 via libcall:
2330 (Op
.getOpcode() == ISD::SINT_TO_FP
)
2331 ? RTLIB::getSINTTOFP(Op0VT
, OpVT
)
2332 : RTLIB::getUINTTOFP(Op0VT
, OpVT
);
2333 assert(LC
!= RTLIB::UNKNOWN_LIBCALL
&& "Unexpectd int-to-fp conversion!");
2335 return ExpandLibCall(LC
, Op
, DAG
, false, Dummy
, TLI
);
2341 //! Lower ISD::SETCC
2343 This handles MVT::f64 (double floating point) condition lowering
2345 static SDValue
LowerSETCC(SDValue Op
, SelectionDAG
&DAG
,
2346 const TargetLowering
&TLI
) {
2347 CondCodeSDNode
*CC
= dyn_cast
<CondCodeSDNode
>(Op
.getOperand(2));
2348 DebugLoc dl
= Op
.getDebugLoc();
2349 assert(CC
!= 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2351 SDValue lhs
= Op
.getOperand(0);
2352 SDValue rhs
= Op
.getOperand(1);
2353 MVT lhsVT
= lhs
.getValueType();
2354 assert(lhsVT
== MVT::f64
&& "LowerSETCC: type other than MVT::64\n");
2356 MVT ccResultVT
= TLI
.getSetCCResultType(lhs
.getValueType());
2357 APInt ccResultOnes
= APInt::getAllOnesValue(ccResultVT
.getSizeInBits());
2358 MVT
IntVT(MVT::i64
);
2360 // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2361 // selected to a NOP:
2362 SDValue i64lhs
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, IntVT
, lhs
);
2364 DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
,
2365 DAG
.getNode(ISD::SRL
, dl
, IntVT
,
2366 i64lhs
, DAG
.getConstant(32, MVT::i32
)));
2367 SDValue lhsHi32abs
=
2368 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
2369 lhsHi32
, DAG
.getConstant(0x7fffffff, MVT::i32
));
2371 DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, i64lhs
);
2373 // SETO and SETUO only use the lhs operand:
2374 if (CC
->get() == ISD::SETO
) {
2375 // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2377 APInt ccResultAllOnes
= APInt::getAllOnesValue(ccResultVT
.getSizeInBits());
2378 return DAG
.getNode(ISD::XOR
, dl
, ccResultVT
,
2379 DAG
.getSetCC(dl
, ccResultVT
,
2380 lhs
, DAG
.getConstantFP(0.0, lhsVT
),
2382 DAG
.getConstant(ccResultAllOnes
, ccResultVT
));
2383 } else if (CC
->get() == ISD::SETUO
) {
2384 // Evaluates to true if Op0 is [SQ]NaN
2385 return DAG
.getNode(ISD::AND
, dl
, ccResultVT
,
2386 DAG
.getSetCC(dl
, ccResultVT
,
2388 DAG
.getConstant(0x7ff00000, MVT::i32
),
2390 DAG
.getSetCC(dl
, ccResultVT
,
2392 DAG
.getConstant(0, MVT::i32
),
2396 SDValue i64rhs
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, IntVT
, rhs
);
2398 DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
,
2399 DAG
.getNode(ISD::SRL
, dl
, IntVT
,
2400 i64rhs
, DAG
.getConstant(32, MVT::i32
)));
2402 // If a value is negative, subtract from the sign magnitude constant:
2403 SDValue signMag2TC
= DAG
.getConstant(0x8000000000000000ULL
, IntVT
);
2405 // Convert the sign-magnitude representation into 2's complement:
2406 SDValue lhsSelectMask
= DAG
.getNode(ISD::SRA
, dl
, ccResultVT
,
2407 lhsHi32
, DAG
.getConstant(31, MVT::i32
));
2408 SDValue lhsSignMag2TC
= DAG
.getNode(ISD::SUB
, dl
, IntVT
, signMag2TC
, i64lhs
);
2410 DAG
.getNode(ISD::SELECT
, dl
, IntVT
,
2411 lhsSelectMask
, lhsSignMag2TC
, i64lhs
);
2413 SDValue rhsSelectMask
= DAG
.getNode(ISD::SRA
, dl
, ccResultVT
,
2414 rhsHi32
, DAG
.getConstant(31, MVT::i32
));
2415 SDValue rhsSignMag2TC
= DAG
.getNode(ISD::SUB
, dl
, IntVT
, signMag2TC
, i64rhs
);
2417 DAG
.getNode(ISD::SELECT
, dl
, IntVT
,
2418 rhsSelectMask
, rhsSignMag2TC
, i64rhs
);
2422 switch (CC
->get()) {
2425 compareOp
= ISD::SETEQ
; break;
2428 compareOp
= ISD::SETGT
; break;
2431 compareOp
= ISD::SETGE
; break;
2434 compareOp
= ISD::SETLT
; break;
2437 compareOp
= ISD::SETLE
; break;
2440 compareOp
= ISD::SETNE
; break;
2442 cerr
<< "CellSPU ISel Select: unimplemented f64 condition\n";
2448 DAG
.getSetCC(dl
, ccResultVT
, lhsSelect
, rhsSelect
,
2449 (ISD::CondCode
) compareOp
);
2451 if ((CC
->get() & 0x8) == 0) {
2452 // Ordered comparison:
2453 SDValue lhsNaN
= DAG
.getSetCC(dl
, ccResultVT
,
2454 lhs
, DAG
.getConstantFP(0.0, MVT::f64
),
2456 SDValue rhsNaN
= DAG
.getSetCC(dl
, ccResultVT
,
2457 rhs
, DAG
.getConstantFP(0.0, MVT::f64
),
2459 SDValue ordered
= DAG
.getNode(ISD::AND
, dl
, ccResultVT
, lhsNaN
, rhsNaN
);
2461 result
= DAG
.getNode(ISD::AND
, dl
, ccResultVT
, ordered
, result
);
2467 //! Lower ISD::SELECT_CC
2469 ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2472 \note Need to revisit this in the future: if the code path through the true
2473 and false value computations is longer than the latency of a branch (6
2474 cycles), then it would be more advantageous to branch and insert a new basic
2475 block and branch on the condition. However, this code does not make that
2476 assumption, given the simplisitc uses so far.
2479 static SDValue
LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
,
2480 const TargetLowering
&TLI
) {
2481 MVT VT
= Op
.getValueType();
2482 SDValue lhs
= Op
.getOperand(0);
2483 SDValue rhs
= Op
.getOperand(1);
2484 SDValue trueval
= Op
.getOperand(2);
2485 SDValue falseval
= Op
.getOperand(3);
2486 SDValue condition
= Op
.getOperand(4);
2487 DebugLoc dl
= Op
.getDebugLoc();
2489 // NOTE: SELB's arguments: $rA, $rB, $mask
2491 // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2492 // where bits in $mask are 1. CCond will be inverted, having 1s where the
2493 // condition was true and 0s where the condition was false. Hence, the
2494 // arguments to SELB get reversed.
2496 // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2497 // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2498 // with another "cannot select select_cc" assert:
2500 SDValue compare
= DAG
.getNode(ISD::SETCC
, dl
,
2501 TLI
.getSetCCResultType(Op
.getValueType()),
2502 lhs
, rhs
, condition
);
2503 return DAG
.getNode(SPUISD::SELB
, dl
, VT
, falseval
, trueval
, compare
);
2506 //! Custom lower ISD::TRUNCATE
2507 static SDValue
LowerTRUNCATE(SDValue Op
, SelectionDAG
&DAG
)
2509 // Type to truncate to
2510 MVT VT
= Op
.getValueType();
2511 MVT::SimpleValueType simpleVT
= VT
.getSimpleVT();
2512 MVT VecVT
= MVT::getVectorVT(VT
, (128 / VT
.getSizeInBits()));
2513 DebugLoc dl
= Op
.getDebugLoc();
2515 // Type to truncate from
2516 SDValue Op0
= Op
.getOperand(0);
2517 MVT Op0VT
= Op0
.getValueType();
2519 if (Op0VT
.getSimpleVT() == MVT::i128
&& simpleVT
== MVT::i64
) {
2520 // Create shuffle mask, least significant doubleword of quadword
2521 unsigned maskHigh
= 0x08090a0b;
2522 unsigned maskLow
= 0x0c0d0e0f;
2523 // Use a shuffle to perform the truncation
2524 SDValue shufMask
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
,
2525 DAG
.getConstant(maskHigh
, MVT::i32
),
2526 DAG
.getConstant(maskLow
, MVT::i32
),
2527 DAG
.getConstant(maskHigh
, MVT::i32
),
2528 DAG
.getConstant(maskLow
, MVT::i32
));
2530 SDValue truncShuffle
= DAG
.getNode(SPUISD::SHUFB
, dl
, VecVT
,
2531 Op0
, Op0
, shufMask
);
2533 return DAG
.getNode(SPUISD::VEC2PREFSLOT
, dl
, VT
, truncShuffle
);
2536 return SDValue(); // Leave the truncate unmolested
2539 //! Custom (target-specific) lowering entry point
2541 This is where LLVM's DAG selection process calls to do target-specific
2545 SPUTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
)
2547 unsigned Opc
= (unsigned) Op
.getOpcode();
2548 MVT VT
= Op
.getValueType();
2552 cerr
<< "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2553 cerr
<< "Op.getOpcode() = " << Opc
<< "\n";
2554 cerr
<< "*Op.getNode():\n";
2555 Op
.getNode()->dump();
2562 return LowerLOAD(Op
, DAG
, SPUTM
.getSubtargetImpl());
2564 return LowerSTORE(Op
, DAG
, SPUTM
.getSubtargetImpl());
2565 case ISD::ConstantPool
:
2566 return LowerConstantPool(Op
, DAG
, SPUTM
.getSubtargetImpl());
2567 case ISD::GlobalAddress
:
2568 return LowerGlobalAddress(Op
, DAG
, SPUTM
.getSubtargetImpl());
2569 case ISD::JumpTable
:
2570 return LowerJumpTable(Op
, DAG
, SPUTM
.getSubtargetImpl());
2571 case ISD::ConstantFP
:
2572 return LowerConstantFP(Op
, DAG
);
2573 case ISD::FORMAL_ARGUMENTS
:
2574 return LowerFORMAL_ARGUMENTS(Op
, DAG
, VarArgsFrameIndex
);
2576 return LowerCALL(Op
, DAG
, SPUTM
.getSubtargetImpl());
2578 return LowerRET(Op
, DAG
, getTargetMachine());
2580 // i8, i64 math ops:
2589 return LowerI8Math(Op
, DAG
, Opc
, *this);
2593 case ISD::FP_TO_SINT
:
2594 case ISD::FP_TO_UINT
:
2595 return LowerFP_TO_INT(Op
, DAG
, *this);
2597 case ISD::SINT_TO_FP
:
2598 case ISD::UINT_TO_FP
:
2599 return LowerINT_TO_FP(Op
, DAG
, *this);
2601 // Vector-related lowering.
2602 case ISD::BUILD_VECTOR
:
2603 return LowerBUILD_VECTOR(Op
, DAG
);
2604 case ISD::SCALAR_TO_VECTOR
:
2605 return LowerSCALAR_TO_VECTOR(Op
, DAG
);
2606 case ISD::VECTOR_SHUFFLE
:
2607 return LowerVECTOR_SHUFFLE(Op
, DAG
);
2608 case ISD::EXTRACT_VECTOR_ELT
:
2609 return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
2610 case ISD::INSERT_VECTOR_ELT
:
2611 return LowerINSERT_VECTOR_ELT(Op
, DAG
);
2613 // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2617 return LowerByteImmed(Op
, DAG
);
2619 // Vector and i8 multiply:
2622 return LowerI8Math(Op
, DAG
, Opc
, *this);
2625 return LowerCTPOP(Op
, DAG
);
2627 case ISD::SELECT_CC
:
2628 return LowerSELECT_CC(Op
, DAG
, *this);
2631 return LowerSETCC(Op
, DAG
, *this);
2634 return LowerTRUNCATE(Op
, DAG
);
2640 void SPUTargetLowering::ReplaceNodeResults(SDNode
*N
,
2641 SmallVectorImpl
<SDValue
>&Results
,
2645 unsigned Opc
= (unsigned) N
->getOpcode();
2646 MVT OpVT
= N
->getValueType(0);
2650 cerr
<< "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2651 cerr
<< "Op.getOpcode() = " << Opc
<< "\n";
2652 cerr
<< "*Op.getNode():\n";
2660 /* Otherwise, return unchanged */
2663 //===----------------------------------------------------------------------===//
2664 // Target Optimization Hooks
2665 //===----------------------------------------------------------------------===//
2668 SPUTargetLowering::PerformDAGCombine(SDNode
*N
, DAGCombinerInfo
&DCI
) const
2671 TargetMachine
&TM
= getTargetMachine();
2673 const SPUSubtarget
*ST
= SPUTM
.getSubtargetImpl();
2674 SelectionDAG
&DAG
= DCI
.DAG
;
2675 SDValue Op0
= N
->getOperand(0); // everything has at least one operand
2676 MVT NodeVT
= N
->getValueType(0); // The node's value type
2677 MVT Op0VT
= Op0
.getValueType(); // The first operand's result
2678 SDValue Result
; // Initially, empty result
2679 DebugLoc dl
= N
->getDebugLoc();
2681 switch (N
->getOpcode()) {
2684 SDValue Op1
= N
->getOperand(1);
2686 if (Op0
.getOpcode() == SPUISD::IndirectAddr
2687 || Op1
.getOpcode() == SPUISD::IndirectAddr
) {
2688 // Normalize the operands to reduce repeated code
2689 SDValue IndirectArg
= Op0
, AddArg
= Op1
;
2691 if (Op1
.getOpcode() == SPUISD::IndirectAddr
) {
2696 if (isa
<ConstantSDNode
>(AddArg
)) {
2697 ConstantSDNode
*CN0
= cast
<ConstantSDNode
> (AddArg
);
2698 SDValue IndOp1
= IndirectArg
.getOperand(1);
2700 if (CN0
->isNullValue()) {
2701 // (add (SPUindirect <arg>, <arg>), 0) ->
2702 // (SPUindirect <arg>, <arg>)
2704 #if !defined(NDEBUG)
2705 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
2707 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2708 << "With: (SPUindirect <arg>, <arg>)\n";
2713 } else if (isa
<ConstantSDNode
>(IndOp1
)) {
2714 // (add (SPUindirect <arg>, <const>), <const>) ->
2715 // (SPUindirect <arg>, <const + const>)
2716 ConstantSDNode
*CN1
= cast
<ConstantSDNode
> (IndOp1
);
2717 int64_t combinedConst
= CN0
->getSExtValue() + CN1
->getSExtValue();
2718 SDValue combinedValue
= DAG
.getConstant(combinedConst
, Op0VT
);
2720 #if !defined(NDEBUG)
2721 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
2723 << "Replace: (add (SPUindirect <arg>, " << CN1
->getSExtValue()
2724 << "), " << CN0
->getSExtValue() << ")\n"
2725 << "With: (SPUindirect <arg>, "
2726 << combinedConst
<< ")\n";
2730 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, Op0VT
,
2731 IndirectArg
, combinedValue
);
2737 case ISD::SIGN_EXTEND
:
2738 case ISD::ZERO_EXTEND
:
2739 case ISD::ANY_EXTEND
: {
2740 if (Op0
.getOpcode() == SPUISD::VEC2PREFSLOT
&& NodeVT
== Op0VT
) {
2741 // (any_extend (SPUextract_elt0 <arg>)) ->
2742 // (SPUextract_elt0 <arg>)
2743 // Types must match, however...
2744 #if !defined(NDEBUG)
2745 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
2746 cerr
<< "\nReplace: ";
2749 Op0
.getNode()->dump(&DAG
);
2758 case SPUISD::IndirectAddr
: {
2759 if (!ST
->usingLargeMem() && Op0
.getOpcode() == SPUISD::AFormAddr
) {
2760 ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
2761 if (CN
!= 0 && CN
->getZExtValue() == 0) {
2762 // (SPUindirect (SPUaform <addr>, 0), 0) ->
2763 // (SPUaform <addr>, 0)
2765 DEBUG(cerr
<< "Replace: ");
2766 DEBUG(N
->dump(&DAG
));
2767 DEBUG(cerr
<< "\nWith: ");
2768 DEBUG(Op0
.getNode()->dump(&DAG
));
2769 DEBUG(cerr
<< "\n");
2773 } else if (Op0
.getOpcode() == ISD::ADD
) {
2774 SDValue Op1
= N
->getOperand(1);
2775 if (ConstantSDNode
*CN1
= dyn_cast
<ConstantSDNode
>(Op1
)) {
2776 // (SPUindirect (add <arg>, <arg>), 0) ->
2777 // (SPUindirect <arg>, <arg>)
2778 if (CN1
->isNullValue()) {
2780 #if !defined(NDEBUG)
2781 if (DebugFlag
&& isCurrentDebugType(DEBUG_TYPE
)) {
2783 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2784 << "With: (SPUindirect <arg>, <arg>)\n";
2788 return DAG
.getNode(SPUISD::IndirectAddr
, dl
, Op0VT
,
2789 Op0
.getOperand(0), Op0
.getOperand(1));
2795 case SPUISD::SHLQUAD_L_BITS
:
2796 case SPUISD::SHLQUAD_L_BYTES
:
2797 case SPUISD::VEC_SHL
:
2798 case SPUISD::VEC_SRL
:
2799 case SPUISD::VEC_SRA
:
2800 case SPUISD::ROTBYTES_LEFT
: {
2801 SDValue Op1
= N
->getOperand(1);
2803 // Kill degenerate vector shifts:
2804 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(Op1
)) {
2805 if (CN
->isNullValue()) {
2811 case SPUISD::PREFSLOT2VEC
: {
2812 switch (Op0
.getOpcode()) {
2815 case ISD::ANY_EXTEND
:
2816 case ISD::ZERO_EXTEND
:
2817 case ISD::SIGN_EXTEND
: {
2818 // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2820 // but only if the SPUprefslot2vec and <arg> types match.
2821 SDValue Op00
= Op0
.getOperand(0);
2822 if (Op00
.getOpcode() == SPUISD::VEC2PREFSLOT
) {
2823 SDValue Op000
= Op00
.getOperand(0);
2824 if (Op000
.getValueType() == NodeVT
) {
2830 case SPUISD::VEC2PREFSLOT
: {
2831 // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2833 Result
= Op0
.getOperand(0);
2841 // Otherwise, return unchanged.
2843 if (Result
.getNode()) {
2844 DEBUG(cerr
<< "\nReplace.SPU: ");
2845 DEBUG(N
->dump(&DAG
));
2846 DEBUG(cerr
<< "\nWith: ");
2847 DEBUG(Result
.getNode()->dump(&DAG
));
2848 DEBUG(cerr
<< "\n");
2855 //===----------------------------------------------------------------------===//
2856 // Inline Assembly Support
2857 //===----------------------------------------------------------------------===//
2859 /// getConstraintType - Given a constraint letter, return the type of
2860 /// constraint it is for this target.
2861 SPUTargetLowering::ConstraintType
2862 SPUTargetLowering::getConstraintType(const std::string
&ConstraintLetter
) const {
2863 if (ConstraintLetter
.size() == 1) {
2864 switch (ConstraintLetter
[0]) {
2871 return C_RegisterClass
;
2874 return TargetLowering::getConstraintType(ConstraintLetter
);
2877 std::pair
<unsigned, const TargetRegisterClass
*>
2878 SPUTargetLowering::getRegForInlineAsmConstraint(const std::string
&Constraint
,
2881 if (Constraint
.size() == 1) {
2882 // GCC RS6000 Constraint Letters
2883 switch (Constraint
[0]) {
2887 return std::make_pair(0U, SPU::R64CRegisterClass
);
2888 return std::make_pair(0U, SPU::R32CRegisterClass
);
2891 return std::make_pair(0U, SPU::R32FPRegisterClass
);
2892 else if (VT
== MVT::f64
)
2893 return std::make_pair(0U, SPU::R64FPRegisterClass
);
2896 return std::make_pair(0U, SPU::GPRCRegisterClass
);
2900 return TargetLowering::getRegForInlineAsmConstraint(Constraint
, VT
);
2903 //! Compute used/known bits for a SPU operand
2905 SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op
,
2909 const SelectionDAG
&DAG
,
2910 unsigned Depth
) const {
2912 const uint64_t uint64_sizebits
= sizeof(uint64_t) * CHAR_BIT
;
2914 switch (Op
.getOpcode()) {
2916 // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2922 case SPUISD::PREFSLOT2VEC
:
2923 case SPUISD::LDRESULT
:
2924 case SPUISD::VEC2PREFSLOT
:
2925 case SPUISD::SHLQUAD_L_BITS
:
2926 case SPUISD::SHLQUAD_L_BYTES
:
2927 case SPUISD::VEC_SHL
:
2928 case SPUISD::VEC_SRL
:
2929 case SPUISD::VEC_SRA
:
2930 case SPUISD::VEC_ROTL
:
2931 case SPUISD::VEC_ROTR
:
2932 case SPUISD::ROTBYTES_LEFT
:
2933 case SPUISD::SELECT_MASK
:
2940 SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op
,
2941 unsigned Depth
) const {
2942 switch (Op
.getOpcode()) {
2947 MVT VT
= Op
.getValueType();
2949 if (VT
!= MVT::i8
&& VT
!= MVT::i16
&& VT
!= MVT::i32
) {
2952 return VT
.getSizeInBits();
2957 // LowerAsmOperandForConstraint
2959 SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
2960 char ConstraintLetter
,
2962 std::vector
<SDValue
> &Ops
,
2963 SelectionDAG
&DAG
) const {
2964 // Default, for the time being, to the base class handler
2965 TargetLowering::LowerAsmOperandForConstraint(Op
, ConstraintLetter
, hasMemory
,
2969 /// isLegalAddressImmediate - Return true if the integer value can be used
2970 /// as the offset of the target addressing mode.
2971 bool SPUTargetLowering::isLegalAddressImmediate(int64_t V
,
2972 const Type
*Ty
) const {
2973 // SPU's addresses are 256K:
2974 return (V
> -(1 << 18) && V
< (1 << 18) - 1);
2977 bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue
* GV
) const {
2982 SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
2983 // The SPU target isn't yet aware of offsets.