1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements the PPCISelLowering class.
12 //===----------------------------------------------------------------------===//
14 #include "PPCISelLowering.h"
15 #include "PPCMachineFunctionInfo.h"
16 #include "PPCPredicates.h"
17 #include "PPCTargetMachine.h"
18 #include "PPCPerfectShuffle.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/VectorExtras.h"
21 #include "llvm/CodeGen/CallingConvLower.h"
22 #include "llvm/CodeGen/MachineFrameInfo.h"
23 #include "llvm/CodeGen/MachineFunction.h"
24 #include "llvm/CodeGen/MachineInstrBuilder.h"
25 #include "llvm/CodeGen/MachineRegisterInfo.h"
26 #include "llvm/CodeGen/PseudoSourceValue.h"
27 #include "llvm/CodeGen/SelectionDAG.h"
28 #include "llvm/CallingConv.h"
29 #include "llvm/Constants.h"
30 #include "llvm/Function.h"
31 #include "llvm/Intrinsics.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/Target/TargetOptions.h"
34 #include "llvm/Support/CommandLine.h"
37 static cl::opt
<bool> EnablePPCPreinc("enable-ppc-preinc",
38 cl::desc("enable preincrement load/store generation on PPC (experimental)"),
41 PPCTargetLowering::PPCTargetLowering(PPCTargetMachine
&TM
)
42 : TargetLowering(TM
), PPCSubTarget(*TM
.getSubtargetImpl()) {
46 // Use _setjmp/_longjmp instead of setjmp/longjmp.
47 setUseUnderscoreSetJmp(true);
48 setUseUnderscoreLongJmp(true);
50 // Set up the register classes.
51 addRegisterClass(MVT::i32
, PPC::GPRCRegisterClass
);
52 addRegisterClass(MVT::f32
, PPC::F4RCRegisterClass
);
53 addRegisterClass(MVT::f64
, PPC::F8RCRegisterClass
);
55 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD
56 setLoadExtAction(ISD::SEXTLOAD
, MVT::i1
, Promote
);
57 setLoadExtAction(ISD::SEXTLOAD
, MVT::i8
, Expand
);
59 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
61 // PowerPC has pre-inc load and store's.
62 setIndexedLoadAction(ISD::PRE_INC
, MVT::i1
, Legal
);
63 setIndexedLoadAction(ISD::PRE_INC
, MVT::i8
, Legal
);
64 setIndexedLoadAction(ISD::PRE_INC
, MVT::i16
, Legal
);
65 setIndexedLoadAction(ISD::PRE_INC
, MVT::i32
, Legal
);
66 setIndexedLoadAction(ISD::PRE_INC
, MVT::i64
, Legal
);
67 setIndexedStoreAction(ISD::PRE_INC
, MVT::i1
, Legal
);
68 setIndexedStoreAction(ISD::PRE_INC
, MVT::i8
, Legal
);
69 setIndexedStoreAction(ISD::PRE_INC
, MVT::i16
, Legal
);
70 setIndexedStoreAction(ISD::PRE_INC
, MVT::i32
, Legal
);
71 setIndexedStoreAction(ISD::PRE_INC
, MVT::i64
, Legal
);
73 // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg)
74 setConvertAction(MVT::ppcf128
, MVT::f64
, Expand
);
75 setConvertAction(MVT::ppcf128
, MVT::f32
, Expand
);
76 // This is used in the ppcf128->int sequence. Note it has different semantics
77 // from FP_ROUND: that rounds to nearest, this rounds to zero.
78 setOperationAction(ISD::FP_ROUND_INREG
, MVT::ppcf128
, Custom
);
80 // PowerPC has no SREM/UREM instructions
81 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
82 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
83 setOperationAction(ISD::SREM
, MVT::i64
, Expand
);
84 setOperationAction(ISD::UREM
, MVT::i64
, Expand
);
86 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
87 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
88 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
89 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Expand
);
90 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Expand
);
91 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
92 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
93 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
94 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
96 // We don't support sin/cos/sqrt/fmod/pow
97 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
98 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
99 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
100 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
101 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
102 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
103 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
104 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
106 setOperationAction(ISD::FLT_ROUNDS_
, MVT::i32
, Custom
);
108 // If we're enabling GP optimizations, use hardware square root
109 if (!TM
.getSubtarget
<PPCSubtarget
>().hasFSQRT()) {
110 setOperationAction(ISD::FSQRT
, MVT::f64
, Expand
);
111 setOperationAction(ISD::FSQRT
, MVT::f32
, Expand
);
114 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
115 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
117 // PowerPC does not have BSWAP, CTPOP or CTTZ
118 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
119 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
120 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
121 setOperationAction(ISD::BSWAP
, MVT::i64
, Expand
);
122 setOperationAction(ISD::CTPOP
, MVT::i64
, Expand
);
123 setOperationAction(ISD::CTTZ
, MVT::i64
, Expand
);
125 // PowerPC does not have ROTR
126 setOperationAction(ISD::ROTR
, MVT::i32
, Expand
);
127 setOperationAction(ISD::ROTR
, MVT::i64
, Expand
);
129 // PowerPC does not have Select
130 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
131 setOperationAction(ISD::SELECT
, MVT::i64
, Expand
);
132 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
133 setOperationAction(ISD::SELECT
, MVT::f64
, Expand
);
135 // PowerPC wants to turn select_cc of FP into fsel when possible.
136 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
137 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Custom
);
139 // PowerPC wants to optimize integer setcc a bit
140 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
142 // PowerPC does not have BRCOND which requires SetCC
143 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
145 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
147 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
148 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
150 // PowerPC does not have [U|S]INT_TO_FP
151 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Expand
);
152 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Expand
);
154 setOperationAction(ISD::BIT_CONVERT
, MVT::f32
, Expand
);
155 setOperationAction(ISD::BIT_CONVERT
, MVT::i32
, Expand
);
156 setOperationAction(ISD::BIT_CONVERT
, MVT::i64
, Expand
);
157 setOperationAction(ISD::BIT_CONVERT
, MVT::f64
, Expand
);
159 // We cannot sextinreg(i1). Expand to shifts.
160 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
162 // Support label based line numbers.
163 setOperationAction(ISD::DBG_STOPPOINT
, MVT::Other
, Expand
);
164 setOperationAction(ISD::DEBUG_LOC
, MVT::Other
, Expand
);
166 setOperationAction(ISD::EXCEPTIONADDR
, MVT::i64
, Expand
);
167 setOperationAction(ISD::EHSELECTION
, MVT::i64
, Expand
);
168 setOperationAction(ISD::EXCEPTIONADDR
, MVT::i32
, Expand
);
169 setOperationAction(ISD::EHSELECTION
, MVT::i32
, Expand
);
172 // We want to legalize GlobalAddress and ConstantPool nodes into the
173 // appropriate instructions to materialize the address.
174 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
175 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
176 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
177 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
178 setOperationAction(ISD::GlobalAddress
, MVT::i64
, Custom
);
179 setOperationAction(ISD::GlobalTLSAddress
, MVT::i64
, Custom
);
180 setOperationAction(ISD::ConstantPool
, MVT::i64
, Custom
);
181 setOperationAction(ISD::JumpTable
, MVT::i64
, Custom
);
183 // RET must be custom lowered, to meet ABI requirements.
184 setOperationAction(ISD::RET
, MVT::Other
, Custom
);
187 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
189 // TRAMPOLINE is custom lowered.
190 setOperationAction(ISD::TRAMPOLINE
, MVT::Other
, Custom
);
192 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
193 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
195 // VAARG is custom lowered with ELF 32 ABI
196 if (TM
.getSubtarget
<PPCSubtarget
>().isELF32_ABI())
197 setOperationAction(ISD::VAARG
, MVT::Other
, Custom
);
199 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
201 // Use the default implementation.
202 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
203 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
204 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
205 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
206 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
207 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Custom
);
209 // We want to custom lower some of our intrinsics.
210 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
212 // Comparisons that require checking two conditions.
213 setCondCodeAction(ISD::SETULT
, MVT::f32
, Expand
);
214 setCondCodeAction(ISD::SETULT
, MVT::f64
, Expand
);
215 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
216 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
217 setCondCodeAction(ISD::SETUEQ
, MVT::f32
, Expand
);
218 setCondCodeAction(ISD::SETUEQ
, MVT::f64
, Expand
);
219 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
220 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
221 setCondCodeAction(ISD::SETOLE
, MVT::f32
, Expand
);
222 setCondCodeAction(ISD::SETOLE
, MVT::f64
, Expand
);
223 setCondCodeAction(ISD::SETONE
, MVT::f32
, Expand
);
224 setCondCodeAction(ISD::SETONE
, MVT::f64
, Expand
);
226 if (TM
.getSubtarget
<PPCSubtarget
>().has64BitSupport()) {
227 // They also have instructions for converting between i64 and fp.
228 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
229 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Expand
);
230 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
231 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
232 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Expand
);
234 // FIXME: disable this lowered code. This generates 64-bit register values,
235 // and we don't model the fact that the top part is clobbered by calls. We
236 // need to flag these together so that the value isn't live across a call.
237 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
239 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
240 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Promote
);
242 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
243 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Expand
);
246 if (TM
.getSubtarget
<PPCSubtarget
>().use64BitRegs()) {
247 // 64-bit PowerPC implementations can support i64 types directly
248 addRegisterClass(MVT::i64
, PPC::G8RCRegisterClass
);
249 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
250 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
251 // 64-bit PowerPC wants to expand i128 shifts itself.
252 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Custom
);
253 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Custom
);
254 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Custom
);
256 // 32-bit PowerPC wants to expand i64 shifts itself.
257 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
258 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
259 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
262 if (TM
.getSubtarget
<PPCSubtarget
>().hasAltivec()) {
263 // First set operation action for all vector types to expand. Then we
264 // will selectively turn on ones that can be effectively codegen'd.
265 for (unsigned i
= (unsigned)MVT::FIRST_VECTOR_VALUETYPE
;
266 i
<= (unsigned)MVT::LAST_VECTOR_VALUETYPE
; ++i
) {
267 MVT VT
= (MVT::SimpleValueType
)i
;
269 // add/sub are legal for all supported vector VT's.
270 setOperationAction(ISD::ADD
, VT
, Legal
);
271 setOperationAction(ISD::SUB
, VT
, Legal
);
273 // We promote all shuffles to v16i8.
274 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Promote
);
275 AddPromotedToType (ISD::VECTOR_SHUFFLE
, VT
, MVT::v16i8
);
277 // We promote all non-typed operations to v4i32.
278 setOperationAction(ISD::AND
, VT
, Promote
);
279 AddPromotedToType (ISD::AND
, VT
, MVT::v4i32
);
280 setOperationAction(ISD::OR
, VT
, Promote
);
281 AddPromotedToType (ISD::OR
, VT
, MVT::v4i32
);
282 setOperationAction(ISD::XOR
, VT
, Promote
);
283 AddPromotedToType (ISD::XOR
, VT
, MVT::v4i32
);
284 setOperationAction(ISD::LOAD
, VT
, Promote
);
285 AddPromotedToType (ISD::LOAD
, VT
, MVT::v4i32
);
286 setOperationAction(ISD::SELECT
, VT
, Promote
);
287 AddPromotedToType (ISD::SELECT
, VT
, MVT::v4i32
);
288 setOperationAction(ISD::STORE
, VT
, Promote
);
289 AddPromotedToType (ISD::STORE
, VT
, MVT::v4i32
);
291 // No other operations are legal.
292 setOperationAction(ISD::MUL
, VT
, Expand
);
293 setOperationAction(ISD::SDIV
, VT
, Expand
);
294 setOperationAction(ISD::SREM
, VT
, Expand
);
295 setOperationAction(ISD::UDIV
, VT
, Expand
);
296 setOperationAction(ISD::UREM
, VT
, Expand
);
297 setOperationAction(ISD::FDIV
, VT
, Expand
);
298 setOperationAction(ISD::FNEG
, VT
, Expand
);
299 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Expand
);
300 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Expand
);
301 setOperationAction(ISD::BUILD_VECTOR
, VT
, Expand
);
302 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
303 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
304 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
305 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
306 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Expand
);
307 setOperationAction(ISD::FPOW
, VT
, Expand
);
308 setOperationAction(ISD::CTPOP
, VT
, Expand
);
309 setOperationAction(ISD::CTLZ
, VT
, Expand
);
310 setOperationAction(ISD::CTTZ
, VT
, Expand
);
313 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
314 // with merges, splats, etc.
315 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v16i8
, Custom
);
317 setOperationAction(ISD::AND
, MVT::v4i32
, Legal
);
318 setOperationAction(ISD::OR
, MVT::v4i32
, Legal
);
319 setOperationAction(ISD::XOR
, MVT::v4i32
, Legal
);
320 setOperationAction(ISD::LOAD
, MVT::v4i32
, Legal
);
321 setOperationAction(ISD::SELECT
, MVT::v4i32
, Expand
);
322 setOperationAction(ISD::STORE
, MVT::v4i32
, Legal
);
324 addRegisterClass(MVT::v4f32
, PPC::VRRCRegisterClass
);
325 addRegisterClass(MVT::v4i32
, PPC::VRRCRegisterClass
);
326 addRegisterClass(MVT::v8i16
, PPC::VRRCRegisterClass
);
327 addRegisterClass(MVT::v16i8
, PPC::VRRCRegisterClass
);
329 setOperationAction(ISD::MUL
, MVT::v4f32
, Legal
);
330 setOperationAction(ISD::MUL
, MVT::v4i32
, Custom
);
331 setOperationAction(ISD::MUL
, MVT::v8i16
, Custom
);
332 setOperationAction(ISD::MUL
, MVT::v16i8
, Custom
);
334 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Custom
);
335 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Custom
);
337 setOperationAction(ISD::BUILD_VECTOR
, MVT::v16i8
, Custom
);
338 setOperationAction(ISD::BUILD_VECTOR
, MVT::v8i16
, Custom
);
339 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i32
, Custom
);
340 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
343 setShiftAmountType(MVT::i32
);
344 setBooleanContents(ZeroOrOneBooleanContent
);
346 if (TM
.getSubtarget
<PPCSubtarget
>().isPPC64()) {
347 setStackPointerRegisterToSaveRestore(PPC::X1
);
348 setExceptionPointerRegister(PPC::X3
);
349 setExceptionSelectorRegister(PPC::X4
);
351 setStackPointerRegisterToSaveRestore(PPC::R1
);
352 setExceptionPointerRegister(PPC::R3
);
353 setExceptionSelectorRegister(PPC::R4
);
356 // We have target-specific dag combine patterns for the following nodes:
357 setTargetDAGCombine(ISD::SINT_TO_FP
);
358 setTargetDAGCombine(ISD::STORE
);
359 setTargetDAGCombine(ISD::BR_CC
);
360 setTargetDAGCombine(ISD::BSWAP
);
362 // Darwin long double math library functions have $LDBL128 appended.
363 if (TM
.getSubtarget
<PPCSubtarget
>().isDarwin()) {
364 setLibcallName(RTLIB::COS_PPCF128
, "cosl$LDBL128");
365 setLibcallName(RTLIB::POW_PPCF128
, "powl$LDBL128");
366 setLibcallName(RTLIB::REM_PPCF128
, "fmodl$LDBL128");
367 setLibcallName(RTLIB::SIN_PPCF128
, "sinl$LDBL128");
368 setLibcallName(RTLIB::SQRT_PPCF128
, "sqrtl$LDBL128");
369 setLibcallName(RTLIB::LOG_PPCF128
, "logl$LDBL128");
370 setLibcallName(RTLIB::LOG2_PPCF128
, "log2l$LDBL128");
371 setLibcallName(RTLIB::LOG10_PPCF128
, "log10l$LDBL128");
372 setLibcallName(RTLIB::EXP_PPCF128
, "expl$LDBL128");
373 setLibcallName(RTLIB::EXP2_PPCF128
, "exp2l$LDBL128");
376 computeRegisterProperties();
379 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
380 /// function arguments in the caller parameter area.
381 unsigned PPCTargetLowering::getByValTypeAlignment(const Type
*Ty
) const {
382 TargetMachine
&TM
= getTargetMachine();
383 // Darwin passes everything on 4 byte boundary.
384 if (TM
.getSubtarget
<PPCSubtarget
>().isDarwin())
390 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode
) const {
393 case PPCISD::FSEL
: return "PPCISD::FSEL";
394 case PPCISD::FCFID
: return "PPCISD::FCFID";
395 case PPCISD::FCTIDZ
: return "PPCISD::FCTIDZ";
396 case PPCISD::FCTIWZ
: return "PPCISD::FCTIWZ";
397 case PPCISD::STFIWX
: return "PPCISD::STFIWX";
398 case PPCISD::VMADDFP
: return "PPCISD::VMADDFP";
399 case PPCISD::VNMSUBFP
: return "PPCISD::VNMSUBFP";
400 case PPCISD::VPERM
: return "PPCISD::VPERM";
401 case PPCISD::Hi
: return "PPCISD::Hi";
402 case PPCISD::Lo
: return "PPCISD::Lo";
403 case PPCISD::DYNALLOC
: return "PPCISD::DYNALLOC";
404 case PPCISD::GlobalBaseReg
: return "PPCISD::GlobalBaseReg";
405 case PPCISD::SRL
: return "PPCISD::SRL";
406 case PPCISD::SRA
: return "PPCISD::SRA";
407 case PPCISD::SHL
: return "PPCISD::SHL";
408 case PPCISD::EXTSW_32
: return "PPCISD::EXTSW_32";
409 case PPCISD::STD_32
: return "PPCISD::STD_32";
410 case PPCISD::CALL_ELF
: return "PPCISD::CALL_ELF";
411 case PPCISD::CALL_Macho
: return "PPCISD::CALL_Macho";
412 case PPCISD::MTCTR
: return "PPCISD::MTCTR";
413 case PPCISD::BCTRL_Macho
: return "PPCISD::BCTRL_Macho";
414 case PPCISD::BCTRL_ELF
: return "PPCISD::BCTRL_ELF";
415 case PPCISD::RET_FLAG
: return "PPCISD::RET_FLAG";
416 case PPCISD::MFCR
: return "PPCISD::MFCR";
417 case PPCISD::VCMP
: return "PPCISD::VCMP";
418 case PPCISD::VCMPo
: return "PPCISD::VCMPo";
419 case PPCISD::LBRX
: return "PPCISD::LBRX";
420 case PPCISD::STBRX
: return "PPCISD::STBRX";
421 case PPCISD::LARX
: return "PPCISD::LARX";
422 case PPCISD::STCX
: return "PPCISD::STCX";
423 case PPCISD::COND_BRANCH
: return "PPCISD::COND_BRANCH";
424 case PPCISD::MFFS
: return "PPCISD::MFFS";
425 case PPCISD::MTFSB0
: return "PPCISD::MTFSB0";
426 case PPCISD::MTFSB1
: return "PPCISD::MTFSB1";
427 case PPCISD::FADDRTZ
: return "PPCISD::FADDRTZ";
428 case PPCISD::MTFSF
: return "PPCISD::MTFSF";
429 case PPCISD::TAILCALL
: return "PPCISD::TAILCALL";
430 case PPCISD::TC_RETURN
: return "PPCISD::TC_RETURN";
435 MVT
PPCTargetLowering::getSetCCResultType(MVT VT
) const {
440 //===----------------------------------------------------------------------===//
441 // Node matching predicates, for use by the tblgen matching code.
442 //===----------------------------------------------------------------------===//
444 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
445 static bool isFloatingPointZero(SDValue Op
) {
446 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(Op
))
447 return CFP
->getValueAPF().isZero();
448 else if (ISD::isEXTLoad(Op
.getNode()) || ISD::isNON_EXTLoad(Op
.getNode())) {
449 // Maybe this has already been legalized into the constant pool?
450 if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(Op
.getOperand(1)))
451 if (ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CP
->getConstVal()))
452 return CFP
->getValueAPF().isZero();
457 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
458 /// true if Op is undef or if it matches the specified value.
459 static bool isConstantOrUndef(int Op
, int Val
) {
460 return Op
< 0 || Op
== Val
;
463 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
464 /// VPKUHUM instruction.
465 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode
*N
, bool isUnary
) {
467 for (unsigned i
= 0; i
!= 16; ++i
)
468 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+1))
471 for (unsigned i
= 0; i
!= 8; ++i
)
472 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+1) ||
473 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+1))
479 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
480 /// VPKUWUM instruction.
481 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode
*N
, bool isUnary
) {
483 for (unsigned i
= 0; i
!= 16; i
+= 2)
484 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+2) ||
485 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+3))
488 for (unsigned i
= 0; i
!= 8; i
+= 2)
489 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+2) ||
490 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+3) ||
491 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+2) ||
492 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+3))
498 /// isVMerge - Common function, used to match vmrg* shuffles.
500 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
501 unsigned LHSStart
, unsigned RHSStart
) {
502 assert(N
->getValueType(0) == MVT::v16i8
&&
503 "PPC only supports shuffles by bytes!");
504 assert((UnitSize
== 1 || UnitSize
== 2 || UnitSize
== 4) &&
505 "Unsupported merge size!");
507 for (unsigned i
= 0; i
!= 8/UnitSize
; ++i
) // Step over units
508 for (unsigned j
= 0; j
!= UnitSize
; ++j
) { // Step over bytes within unit
509 if (!isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+j
),
510 LHSStart
+j
+i
*UnitSize
) ||
511 !isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+UnitSize
+j
),
512 RHSStart
+j
+i
*UnitSize
))
518 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
519 /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
520 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
523 return isVMerge(N
, UnitSize
, 8, 24);
524 return isVMerge(N
, UnitSize
, 8, 8);
527 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
528 /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
529 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
532 return isVMerge(N
, UnitSize
, 0, 16);
533 return isVMerge(N
, UnitSize
, 0, 0);
537 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
538 /// amount, otherwise return -1.
539 int PPC::isVSLDOIShuffleMask(SDNode
*N
, bool isUnary
) {
540 assert(N
->getValueType(0) == MVT::v16i8
&&
541 "PPC only supports shuffles by bytes!");
543 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
545 // Find the first non-undef value in the shuffle mask.
547 for (i
= 0; i
!= 16 && SVOp
->getMaskElt(i
) < 0; ++i
)
550 if (i
== 16) return -1; // all undef.
552 // Otherwise, check to see if the rest of the elements are consecutively
553 // numbered from this value.
554 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
555 if (ShiftAmt
< i
) return -1;
559 // Check the rest of the elements to see if they are consecutive.
560 for (++i
; i
!= 16; ++i
)
561 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
564 // Check the rest of the elements to see if they are consecutive.
565 for (++i
; i
!= 16; ++i
)
566 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), (ShiftAmt
+i
) & 15))
572 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
573 /// specifies a splat of a single element that is suitable for input to
574 /// VSPLTB/VSPLTH/VSPLTW.
575 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode
*N
, unsigned EltSize
) {
576 assert(N
->getValueType(0) == MVT::v16i8
&&
577 (EltSize
== 1 || EltSize
== 2 || EltSize
== 4));
579 // This is a splat operation if each element of the permute is the same, and
580 // if the value doesn't reference the second vector.
581 unsigned ElementBase
= N
->getMaskElt(0);
583 // FIXME: Handle UNDEF elements too!
584 if (ElementBase
>= 16)
587 // Check that the indices are consecutive, in the case of a multi-byte element
588 // splatted with a v16i8 mask.
589 for (unsigned i
= 1; i
!= EltSize
; ++i
)
590 if (N
->getMaskElt(i
) < 0 || N
->getMaskElt(i
) != (int)(i
+ElementBase
))
593 for (unsigned i
= EltSize
, e
= 16; i
!= e
; i
+= EltSize
) {
594 if (N
->getMaskElt(i
) < 0) continue;
595 for (unsigned j
= 0; j
!= EltSize
; ++j
)
596 if (N
->getMaskElt(i
+j
) != N
->getMaskElt(j
))
602 /// isAllNegativeZeroVector - Returns true if all elements of build_vector
604 bool PPC::isAllNegativeZeroVector(SDNode
*N
) {
605 BuildVectorSDNode
*BV
= cast
<BuildVectorSDNode
>(N
);
607 APInt APVal
, APUndef
;
611 if (BV
->isConstantSplat(APVal
, APUndef
, BitSize
, HasAnyUndefs
, 32))
612 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(N
->getOperand(0)))
613 return CFP
->getValueAPF().isNegZero();
618 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
619 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
620 unsigned PPC::getVSPLTImmediate(SDNode
*N
, unsigned EltSize
) {
621 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
622 assert(isSplatShuffleMask(SVOp
, EltSize
));
623 return SVOp
->getMaskElt(0) / EltSize
;
626 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
627 /// by using a vspltis[bhw] instruction of the specified element size, return
628 /// the constant being splatted. The ByteSize field indicates the number of
629 /// bytes of each element [124] -> [bhw].
630 SDValue
PPC::get_VSPLTI_elt(SDNode
*N
, unsigned ByteSize
, SelectionDAG
&DAG
) {
633 // If ByteSize of the splat is bigger than the element size of the
634 // build_vector, then we have a case where we are checking for a splat where
635 // multiple elements of the buildvector are folded together into a single
636 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
637 unsigned EltSize
= 16/N
->getNumOperands();
638 if (EltSize
< ByteSize
) {
639 unsigned Multiple
= ByteSize
/EltSize
; // Number of BV entries per spltval.
640 SDValue UniquedVals
[4];
641 assert(Multiple
> 1 && Multiple
<= 4 && "How can this happen?");
643 // See if all of the elements in the buildvector agree across.
644 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
645 if (N
->getOperand(i
).getOpcode() == ISD::UNDEF
) continue;
646 // If the element isn't a constant, bail fully out.
647 if (!isa
<ConstantSDNode
>(N
->getOperand(i
))) return SDValue();
650 if (UniquedVals
[i
&(Multiple
-1)].getNode() == 0)
651 UniquedVals
[i
&(Multiple
-1)] = N
->getOperand(i
);
652 else if (UniquedVals
[i
&(Multiple
-1)] != N
->getOperand(i
))
653 return SDValue(); // no match.
656 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
657 // either constant or undef values that are identical for each chunk. See
658 // if these chunks can form into a larger vspltis*.
660 // Check to see if all of the leading entries are either 0 or -1. If
661 // neither, then this won't fit into the immediate field.
662 bool LeadingZero
= true;
663 bool LeadingOnes
= true;
664 for (unsigned i
= 0; i
!= Multiple
-1; ++i
) {
665 if (UniquedVals
[i
].getNode() == 0) continue; // Must have been undefs.
667 LeadingZero
&= cast
<ConstantSDNode
>(UniquedVals
[i
])->isNullValue();
668 LeadingOnes
&= cast
<ConstantSDNode
>(UniquedVals
[i
])->isAllOnesValue();
670 // Finally, check the least significant entry.
672 if (UniquedVals
[Multiple
-1].getNode() == 0)
673 return DAG
.getTargetConstant(0, MVT::i32
); // 0,0,0,undef
674 int Val
= cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getZExtValue();
676 return DAG
.getTargetConstant(Val
, MVT::i32
); // 0,0,0,4 -> vspltisw(4)
679 if (UniquedVals
[Multiple
-1].getNode() == 0)
680 return DAG
.getTargetConstant(~0U, MVT::i32
); // -1,-1,-1,undef
681 int Val
=cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getSExtValue();
682 if (Val
>= -16) // -1,-1,-1,-2 -> vspltisw(-2)
683 return DAG
.getTargetConstant(Val
, MVT::i32
);
689 // Check to see if this buildvec has a single non-undef value in its elements.
690 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
691 if (N
->getOperand(i
).getOpcode() == ISD::UNDEF
) continue;
692 if (OpVal
.getNode() == 0)
693 OpVal
= N
->getOperand(i
);
694 else if (OpVal
!= N
->getOperand(i
))
698 if (OpVal
.getNode() == 0) return SDValue(); // All UNDEF: use implicit def.
700 unsigned ValSizeInBytes
= 0;
702 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(OpVal
)) {
703 Value
= CN
->getZExtValue();
704 ValSizeInBytes
= CN
->getValueType(0).getSizeInBits()/8;
705 } else if (ConstantFPSDNode
*CN
= dyn_cast
<ConstantFPSDNode
>(OpVal
)) {
706 assert(CN
->getValueType(0) == MVT::f32
&& "Only one legal FP vector type!");
707 Value
= FloatToBits(CN
->getValueAPF().convertToFloat());
711 // If the splat value is larger than the element value, then we can never do
712 // this splat. The only case that we could fit the replicated bits into our
713 // immediate field for would be zero, and we prefer to use vxor for it.
714 if (ValSizeInBytes
< ByteSize
) return SDValue();
716 // If the element value is larger than the splat value, cut it in half and
717 // check to see if the two halves are equal. Continue doing this until we
718 // get to ByteSize. This allows us to handle 0x01010101 as 0x01.
719 while (ValSizeInBytes
> ByteSize
) {
720 ValSizeInBytes
>>= 1;
722 // If the top half equals the bottom half, we're still ok.
723 if (((Value
>> (ValSizeInBytes
*8)) & ((1 << (8*ValSizeInBytes
))-1)) !=
724 (Value
& ((1 << (8*ValSizeInBytes
))-1)))
728 // Properly sign extend the value.
729 int ShAmt
= (4-ByteSize
)*8;
730 int MaskVal
= ((int)Value
<< ShAmt
) >> ShAmt
;
732 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
733 if (MaskVal
== 0) return SDValue();
735 // Finally, if this value fits in a 5 bit sext field, return it
736 if (((MaskVal
<< (32-5)) >> (32-5)) == MaskVal
)
737 return DAG
.getTargetConstant(MaskVal
, MVT::i32
);
741 //===----------------------------------------------------------------------===//
742 // Addressing Mode Selection
743 //===----------------------------------------------------------------------===//
745 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
746 /// or 64-bit immediate, and if the value can be accurately represented as a
747 /// sign extension from a 16-bit value. If so, this returns true and the
749 static bool isIntS16Immediate(SDNode
*N
, short &Imm
) {
750 if (N
->getOpcode() != ISD::Constant
)
753 Imm
= (short)cast
<ConstantSDNode
>(N
)->getZExtValue();
754 if (N
->getValueType(0) == MVT::i32
)
755 return Imm
== (int32_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
757 return Imm
== (int64_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
759 static bool isIntS16Immediate(SDValue Op
, short &Imm
) {
760 return isIntS16Immediate(Op
.getNode(), Imm
);
764 /// SelectAddressRegReg - Given the specified addressed, check to see if it
765 /// can be represented as an indexed [r+r] operation. Returns false if it
766 /// can be more efficiently represented with [r+imm].
767 bool PPCTargetLowering::SelectAddressRegReg(SDValue N
, SDValue
&Base
,
769 SelectionDAG
&DAG
) const {
771 if (N
.getOpcode() == ISD::ADD
) {
772 if (isIntS16Immediate(N
.getOperand(1), imm
))
774 if (N
.getOperand(1).getOpcode() == PPCISD::Lo
)
777 Base
= N
.getOperand(0);
778 Index
= N
.getOperand(1);
780 } else if (N
.getOpcode() == ISD::OR
) {
781 if (isIntS16Immediate(N
.getOperand(1), imm
))
782 return false; // r+i can fold it if we can.
784 // If this is an or of disjoint bitfields, we can codegen this as an add
785 // (for better address arithmetic) if the LHS and RHS of the OR are provably
787 APInt LHSKnownZero
, LHSKnownOne
;
788 APInt RHSKnownZero
, RHSKnownOne
;
789 DAG
.ComputeMaskedBits(N
.getOperand(0),
790 APInt::getAllOnesValue(N
.getOperand(0)
791 .getValueSizeInBits()),
792 LHSKnownZero
, LHSKnownOne
);
794 if (LHSKnownZero
.getBoolValue()) {
795 DAG
.ComputeMaskedBits(N
.getOperand(1),
796 APInt::getAllOnesValue(N
.getOperand(1)
797 .getValueSizeInBits()),
798 RHSKnownZero
, RHSKnownOne
);
799 // If all of the bits are known zero on the LHS or RHS, the add won't
801 if (~(LHSKnownZero
| RHSKnownZero
) == 0) {
802 Base
= N
.getOperand(0);
803 Index
= N
.getOperand(1);
812 /// Returns true if the address N can be represented by a base register plus
813 /// a signed 16-bit displacement [r+imm], and if it is not better
814 /// represented as reg+reg.
815 bool PPCTargetLowering::SelectAddressRegImm(SDValue N
, SDValue
&Disp
,
817 SelectionDAG
&DAG
) const {
818 // FIXME dl should come from parent load or store, not from address
819 DebugLoc dl
= N
.getDebugLoc();
820 // If this can be more profitably realized as r+r, fail.
821 if (SelectAddressRegReg(N
, Disp
, Base
, DAG
))
824 if (N
.getOpcode() == ISD::ADD
) {
826 if (isIntS16Immediate(N
.getOperand(1), imm
)) {
827 Disp
= DAG
.getTargetConstant((int)imm
& 0xFFFF, MVT::i32
);
828 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
829 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
831 Base
= N
.getOperand(0);
833 return true; // [r+i]
834 } else if (N
.getOperand(1).getOpcode() == PPCISD::Lo
) {
835 // Match LOAD (ADD (X, Lo(G))).
836 assert(!cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))->getZExtValue()
837 && "Cannot handle constant offsets yet!");
838 Disp
= N
.getOperand(1).getOperand(0); // The global address.
839 assert(Disp
.getOpcode() == ISD::TargetGlobalAddress
||
840 Disp
.getOpcode() == ISD::TargetConstantPool
||
841 Disp
.getOpcode() == ISD::TargetJumpTable
);
842 Base
= N
.getOperand(0);
843 return true; // [&g+r]
845 } else if (N
.getOpcode() == ISD::OR
) {
847 if (isIntS16Immediate(N
.getOperand(1), imm
)) {
848 // If this is an or of disjoint bitfields, we can codegen this as an add
849 // (for better address arithmetic) if the LHS and RHS of the OR are
850 // provably disjoint.
851 APInt LHSKnownZero
, LHSKnownOne
;
852 DAG
.ComputeMaskedBits(N
.getOperand(0),
853 APInt::getAllOnesValue(N
.getOperand(0)
854 .getValueSizeInBits()),
855 LHSKnownZero
, LHSKnownOne
);
857 if ((LHSKnownZero
.getZExtValue()|~(uint64_t)imm
) == ~0ULL) {
858 // If all of the bits are known zero on the LHS or RHS, the add won't
860 Base
= N
.getOperand(0);
861 Disp
= DAG
.getTargetConstant((int)imm
& 0xFFFF, MVT::i32
);
865 } else if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
)) {
866 // Loading from a constant address.
868 // If this address fits entirely in a 16-bit sext immediate field, codegen
871 if (isIntS16Immediate(CN
, Imm
)) {
872 Disp
= DAG
.getTargetConstant(Imm
, CN
->getValueType(0));
873 Base
= DAG
.getRegister(PPC::R0
, CN
->getValueType(0));
877 // Handle 32-bit sext immediates with LIS + addr mode.
878 if (CN
->getValueType(0) == MVT::i32
||
879 (int64_t)CN
->getZExtValue() == (int)CN
->getZExtValue()) {
880 int Addr
= (int)CN
->getZExtValue();
882 // Otherwise, break this down into an LIS + disp.
883 Disp
= DAG
.getTargetConstant((short)Addr
, MVT::i32
);
885 Base
= DAG
.getTargetConstant((Addr
- (signed short)Addr
) >> 16, MVT::i32
);
886 unsigned Opc
= CN
->getValueType(0) == MVT::i32
? PPC::LIS
: PPC::LIS8
;
887 Base
= SDValue(DAG
.getTargetNode(Opc
, dl
, CN
->getValueType(0), Base
), 0);
892 Disp
= DAG
.getTargetConstant(0, getPointerTy());
893 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
))
894 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
897 return true; // [r+0]
900 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
901 /// represented as an indexed [r+r] operation.
902 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N
, SDValue
&Base
,
904 SelectionDAG
&DAG
) const {
905 // Check to see if we can easily represent this as an [r+r] address. This
906 // will fail if it thinks that the address is more profitably represented as
907 // reg+imm, e.g. where imm = 0.
908 if (SelectAddressRegReg(N
, Base
, Index
, DAG
))
911 // If the operand is an addition, always emit this as [r+r], since this is
912 // better (for code size, and execution, as the memop does the add for free)
913 // than emitting an explicit add.
914 if (N
.getOpcode() == ISD::ADD
) {
915 Base
= N
.getOperand(0);
916 Index
= N
.getOperand(1);
920 // Otherwise, do it the hard way, using R0 as the base register.
921 Base
= DAG
.getRegister(PPC::R0
, N
.getValueType());
926 /// SelectAddressRegImmShift - Returns true if the address N can be
927 /// represented by a base register plus a signed 14-bit displacement
928 /// [r+imm*4]. Suitable for use by STD and friends.
929 bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N
, SDValue
&Disp
,
931 SelectionDAG
&DAG
) const {
932 // FIXME dl should come from the parent load or store, not the address
933 DebugLoc dl
= N
.getDebugLoc();
934 // If this can be more profitably realized as r+r, fail.
935 if (SelectAddressRegReg(N
, Disp
, Base
, DAG
))
938 if (N
.getOpcode() == ISD::ADD
) {
940 if (isIntS16Immediate(N
.getOperand(1), imm
) && (imm
& 3) == 0) {
941 Disp
= DAG
.getTargetConstant(((int)imm
& 0xFFFF) >> 2, MVT::i32
);
942 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
943 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
945 Base
= N
.getOperand(0);
947 return true; // [r+i]
948 } else if (N
.getOperand(1).getOpcode() == PPCISD::Lo
) {
949 // Match LOAD (ADD (X, Lo(G))).
950 assert(!cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))->getZExtValue()
951 && "Cannot handle constant offsets yet!");
952 Disp
= N
.getOperand(1).getOperand(0); // The global address.
953 assert(Disp
.getOpcode() == ISD::TargetGlobalAddress
||
954 Disp
.getOpcode() == ISD::TargetConstantPool
||
955 Disp
.getOpcode() == ISD::TargetJumpTable
);
956 Base
= N
.getOperand(0);
957 return true; // [&g+r]
959 } else if (N
.getOpcode() == ISD::OR
) {
961 if (isIntS16Immediate(N
.getOperand(1), imm
) && (imm
& 3) == 0) {
962 // If this is an or of disjoint bitfields, we can codegen this as an add
963 // (for better address arithmetic) if the LHS and RHS of the OR are
964 // provably disjoint.
965 APInt LHSKnownZero
, LHSKnownOne
;
966 DAG
.ComputeMaskedBits(N
.getOperand(0),
967 APInt::getAllOnesValue(N
.getOperand(0)
968 .getValueSizeInBits()),
969 LHSKnownZero
, LHSKnownOne
);
970 if ((LHSKnownZero
.getZExtValue()|~(uint64_t)imm
) == ~0ULL) {
971 // If all of the bits are known zero on the LHS or RHS, the add won't
973 Base
= N
.getOperand(0);
974 Disp
= DAG
.getTargetConstant(((int)imm
& 0xFFFF) >> 2, MVT::i32
);
978 } else if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
)) {
979 // Loading from a constant address. Verify low two bits are clear.
980 if ((CN
->getZExtValue() & 3) == 0) {
981 // If this address fits entirely in a 14-bit sext immediate field, codegen
984 if (isIntS16Immediate(CN
, Imm
)) {
985 Disp
= DAG
.getTargetConstant((unsigned short)Imm
>> 2, getPointerTy());
986 Base
= DAG
.getRegister(PPC::R0
, CN
->getValueType(0));
990 // Fold the low-part of 32-bit absolute addresses into addr mode.
991 if (CN
->getValueType(0) == MVT::i32
||
992 (int64_t)CN
->getZExtValue() == (int)CN
->getZExtValue()) {
993 int Addr
= (int)CN
->getZExtValue();
995 // Otherwise, break this down into an LIS + disp.
996 Disp
= DAG
.getTargetConstant((short)Addr
>> 2, MVT::i32
);
997 Base
= DAG
.getTargetConstant((Addr
-(signed short)Addr
) >> 16, MVT::i32
);
998 unsigned Opc
= CN
->getValueType(0) == MVT::i32
? PPC::LIS
: PPC::LIS8
;
999 Base
= SDValue(DAG
.getTargetNode(Opc
, dl
, CN
->getValueType(0), Base
),0);
1005 Disp
= DAG
.getTargetConstant(0, getPointerTy());
1006 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
))
1007 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
1010 return true; // [r+0]
1014 /// getPreIndexedAddressParts - returns true by value, base pointer and
1015 /// offset pointer and addressing mode by reference if the node's address
1016 /// can be legally represented as pre-indexed load / store address.
1017 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
1019 ISD::MemIndexedMode
&AM
,
1020 SelectionDAG
&DAG
) const {
1021 // Disabled by default for now.
1022 if (!EnablePPCPreinc
) return false;
1026 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
1027 Ptr
= LD
->getBasePtr();
1028 VT
= LD
->getMemoryVT();
1030 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
1032 Ptr
= ST
->getBasePtr();
1033 VT
= ST
->getMemoryVT();
1037 // PowerPC doesn't have preinc load/store instructions for vectors.
1041 // TODO: Check reg+reg first.
1043 // LDU/STU use reg+imm*4, others use reg+imm.
1044 if (VT
!= MVT::i64
) {
1046 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
))
1050 if (!SelectAddressRegImmShift(Ptr
, Offset
, Base
, DAG
))
1054 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
1055 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
1056 // sext i32 to i64 when addr mode is r+i.
1057 if (LD
->getValueType(0) == MVT::i64
&& LD
->getMemoryVT() == MVT::i32
&&
1058 LD
->getExtensionType() == ISD::SEXTLOAD
&&
1059 isa
<ConstantSDNode
>(Offset
))
1067 //===----------------------------------------------------------------------===//
1068 // LowerOperation implementation
1069 //===----------------------------------------------------------------------===//
1071 SDValue
PPCTargetLowering::LowerConstantPool(SDValue Op
,
1072 SelectionDAG
&DAG
) {
1073 MVT PtrVT
= Op
.getValueType();
1074 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
1075 Constant
*C
= CP
->getConstVal();
1076 SDValue CPI
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment());
1077 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
1078 // FIXME there isn't really any debug info here
1079 DebugLoc dl
= Op
.getDebugLoc();
1081 const TargetMachine
&TM
= DAG
.getTarget();
1083 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, dl
, PtrVT
, CPI
, Zero
);
1084 SDValue Lo
= DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, CPI
, Zero
);
1086 // If this is a non-darwin platform, we don't support non-static relo models
1088 if (TM
.getRelocationModel() == Reloc::Static
||
1089 !TM
.getSubtarget
<PPCSubtarget
>().isDarwin()) {
1090 // Generate non-pic code that has direct accesses to the constant pool.
1091 // The address of the global is just (hi(&g)+lo(&g)).
1092 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Hi
, Lo
);
1095 if (TM
.getRelocationModel() == Reloc::PIC_
) {
1096 // With PIC, the first instruction is actually "GR+hi(&G)".
1097 Hi
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
,
1098 DAG
.getNode(PPCISD::GlobalBaseReg
,
1099 DebugLoc::getUnknownLoc(), PtrVT
), Hi
);
1102 Lo
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Hi
, Lo
);
1106 SDValue
PPCTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) {
1107 MVT PtrVT
= Op
.getValueType();
1108 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
1109 SDValue JTI
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
1110 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
1111 // FIXME there isn't really any debug loc here
1112 DebugLoc dl
= Op
.getDebugLoc();
1114 const TargetMachine
&TM
= DAG
.getTarget();
1116 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, dl
, PtrVT
, JTI
, Zero
);
1117 SDValue Lo
= DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, JTI
, Zero
);
1119 // If this is a non-darwin platform, we don't support non-static relo models
1121 if (TM
.getRelocationModel() == Reloc::Static
||
1122 !TM
.getSubtarget
<PPCSubtarget
>().isDarwin()) {
1123 // Generate non-pic code that has direct accesses to the constant pool.
1124 // The address of the global is just (hi(&g)+lo(&g)).
1125 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Hi
, Lo
);
1128 if (TM
.getRelocationModel() == Reloc::PIC_
) {
1129 // With PIC, the first instruction is actually "GR+hi(&G)".
1130 Hi
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
,
1131 DAG
.getNode(PPCISD::GlobalBaseReg
,
1132 DebugLoc::getUnknownLoc(), PtrVT
), Hi
);
1135 Lo
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Hi
, Lo
);
1139 SDValue
PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
1140 SelectionDAG
&DAG
) {
1141 assert(0 && "TLS not implemented for PPC.");
1142 return SDValue(); // Not reached
1145 SDValue
PPCTargetLowering::LowerGlobalAddress(SDValue Op
,
1146 SelectionDAG
&DAG
) {
1147 MVT PtrVT
= Op
.getValueType();
1148 GlobalAddressSDNode
*GSDN
= cast
<GlobalAddressSDNode
>(Op
);
1149 GlobalValue
*GV
= GSDN
->getGlobal();
1150 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, PtrVT
, GSDN
->getOffset());
1151 SDValue Zero
= DAG
.getConstant(0, PtrVT
);
1152 // FIXME there isn't really any debug info here
1153 DebugLoc dl
= GSDN
->getDebugLoc();
1155 const TargetMachine
&TM
= DAG
.getTarget();
1157 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, dl
, PtrVT
, GA
, Zero
);
1158 SDValue Lo
= DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, GA
, Zero
);
1160 // If this is a non-darwin platform, we don't support non-static relo models
1162 if (TM
.getRelocationModel() == Reloc::Static
||
1163 !TM
.getSubtarget
<PPCSubtarget
>().isDarwin()) {
1164 // Generate non-pic code that has direct accesses to globals.
1165 // The address of the global is just (hi(&g)+lo(&g)).
1166 return DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Hi
, Lo
);
1169 if (TM
.getRelocationModel() == Reloc::PIC_
) {
1170 // With PIC, the first instruction is actually "GR+hi(&G)".
1171 Hi
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
,
1172 DAG
.getNode(PPCISD::GlobalBaseReg
,
1173 DebugLoc::getUnknownLoc(), PtrVT
), Hi
);
1176 Lo
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Hi
, Lo
);
1178 if (!TM
.getSubtarget
<PPCSubtarget
>().hasLazyResolverStub(GV
))
1181 // If the global is weak or external, we have to go through the lazy
1183 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), Lo
, NULL
, 0);
1186 SDValue
PPCTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) {
1187 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
1188 DebugLoc dl
= Op
.getDebugLoc();
1190 // If we're comparing for equality to zero, expose the fact that this is
1191 // implented as a ctlz/srl pair on ppc, so that the dag combiner can
1192 // fold the new nodes.
1193 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
1194 if (C
->isNullValue() && CC
== ISD::SETEQ
) {
1195 MVT VT
= Op
.getOperand(0).getValueType();
1196 SDValue Zext
= Op
.getOperand(0);
1197 if (VT
.bitsLT(MVT::i32
)) {
1199 Zext
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VT
, Op
.getOperand(0));
1201 unsigned Log2b
= Log2_32(VT
.getSizeInBits());
1202 SDValue Clz
= DAG
.getNode(ISD::CTLZ
, dl
, VT
, Zext
);
1203 SDValue Scc
= DAG
.getNode(ISD::SRL
, dl
, VT
, Clz
,
1204 DAG
.getConstant(Log2b
, MVT::i32
));
1205 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, Scc
);
1207 // Leave comparisons against 0 and -1 alone for now, since they're usually
1208 // optimized. FIXME: revisit this when we can custom lower all setcc
1210 if (C
->isAllOnesValue() || C
->isNullValue())
1214 // If we have an integer seteq/setne, turn it into a compare against zero
1215 // by xor'ing the rhs with the lhs, which is faster than setting a
1216 // condition register, reading it back out, and masking the correct bit. The
1217 // normal approach here uses sub to do this instead of xor. Using xor exposes
1218 // the result to other bit-twiddling opportunities.
1219 MVT LHSVT
= Op
.getOperand(0).getValueType();
1220 if (LHSVT
.isInteger() && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
)) {
1221 MVT VT
= Op
.getValueType();
1222 SDValue Sub
= DAG
.getNode(ISD::XOR
, dl
, LHSVT
, Op
.getOperand(0),
1224 return DAG
.getSetCC(dl
, VT
, Sub
, DAG
.getConstant(0, LHSVT
), CC
);
1229 SDValue
PPCTargetLowering::LowerVAARG(SDValue Op
, SelectionDAG
&DAG
,
1230 int VarArgsFrameIndex
,
1231 int VarArgsStackOffset
,
1232 unsigned VarArgsNumGPR
,
1233 unsigned VarArgsNumFPR
,
1234 const PPCSubtarget
&Subtarget
) {
1236 assert(0 && "VAARG in ELF32 ABI not implemented yet!");
1237 return SDValue(); // Not reached
1240 SDValue
PPCTargetLowering::LowerTRAMPOLINE(SDValue Op
, SelectionDAG
&DAG
) {
1241 SDValue Chain
= Op
.getOperand(0);
1242 SDValue Trmp
= Op
.getOperand(1); // trampoline
1243 SDValue FPtr
= Op
.getOperand(2); // nested function
1244 SDValue Nest
= Op
.getOperand(3); // 'nest' parameter value
1245 DebugLoc dl
= Op
.getDebugLoc();
1247 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1248 bool isPPC64
= (PtrVT
== MVT::i64
);
1249 const Type
*IntPtrTy
=
1250 DAG
.getTargetLoweringInfo().getTargetData()->getIntPtrType();
1252 TargetLowering::ArgListTy Args
;
1253 TargetLowering::ArgListEntry Entry
;
1255 Entry
.Ty
= IntPtrTy
;
1256 Entry
.Node
= Trmp
; Args
.push_back(Entry
);
1258 // TrampSize == (isPPC64 ? 48 : 40);
1259 Entry
.Node
= DAG
.getConstant(isPPC64
? 48 : 40,
1260 isPPC64
? MVT::i64
: MVT::i32
);
1261 Args
.push_back(Entry
);
1263 Entry
.Node
= FPtr
; Args
.push_back(Entry
);
1264 Entry
.Node
= Nest
; Args
.push_back(Entry
);
1266 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
1267 std::pair
<SDValue
, SDValue
> CallResult
=
1268 LowerCallTo(Chain
, Op
.getValueType().getTypeForMVT(), false, false,
1269 false, false, CallingConv::C
, false,
1270 DAG
.getExternalSymbol("__trampoline_setup", PtrVT
),
1274 { CallResult
.first
, CallResult
.second
};
1276 return DAG
.getMergeValues(Ops
, 2, dl
);
1279 SDValue
PPCTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
,
1280 int VarArgsFrameIndex
,
1281 int VarArgsStackOffset
,
1282 unsigned VarArgsNumGPR
,
1283 unsigned VarArgsNumFPR
,
1284 const PPCSubtarget
&Subtarget
) {
1285 DebugLoc dl
= Op
.getDebugLoc();
1287 if (Subtarget
.isMachoABI()) {
1288 // vastart just stores the address of the VarArgsFrameIndex slot into the
1289 // memory location argument.
1290 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1291 SDValue FR
= DAG
.getFrameIndex(VarArgsFrameIndex
, PtrVT
);
1292 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
1293 return DAG
.getStore(Op
.getOperand(0), dl
, FR
, Op
.getOperand(1), SV
, 0);
1296 // For ELF 32 ABI we follow the layout of the va_list struct.
1297 // We suppose the given va_list is already allocated.
1300 // char gpr; /* index into the array of 8 GPRs
1301 // * stored in the register save area
1302 // * gpr=0 corresponds to r3,
1303 // * gpr=1 to r4, etc.
1305 // char fpr; /* index into the array of 8 FPRs
1306 // * stored in the register save area
1307 // * fpr=0 corresponds to f1,
1308 // * fpr=1 to f2, etc.
1310 // char *overflow_arg_area;
1311 // /* location on stack that holds
1312 // * the next overflow argument
1314 // char *reg_save_area;
1315 // /* where r3:r10 and f1:f8 (if saved)
1321 SDValue ArgGPR
= DAG
.getConstant(VarArgsNumGPR
, MVT::i8
);
1322 SDValue ArgFPR
= DAG
.getConstant(VarArgsNumFPR
, MVT::i8
);
1325 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1327 SDValue StackOffsetFI
= DAG
.getFrameIndex(VarArgsStackOffset
, PtrVT
);
1328 SDValue FR
= DAG
.getFrameIndex(VarArgsFrameIndex
, PtrVT
);
1330 uint64_t FrameOffset
= PtrVT
.getSizeInBits()/8;
1331 SDValue ConstFrameOffset
= DAG
.getConstant(FrameOffset
, PtrVT
);
1333 uint64_t StackOffset
= PtrVT
.getSizeInBits()/8 - 1;
1334 SDValue ConstStackOffset
= DAG
.getConstant(StackOffset
, PtrVT
);
1336 uint64_t FPROffset
= 1;
1337 SDValue ConstFPROffset
= DAG
.getConstant(FPROffset
, PtrVT
);
1339 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
1341 // Store first byte : number of int regs
1342 SDValue firstStore
= DAG
.getStore(Op
.getOperand(0), dl
, ArgGPR
,
1343 Op
.getOperand(1), SV
, 0);
1344 uint64_t nextOffset
= FPROffset
;
1345 SDValue nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Op
.getOperand(1),
1348 // Store second byte : number of float regs
1349 SDValue secondStore
=
1350 DAG
.getStore(firstStore
, dl
, ArgFPR
, nextPtr
, SV
, nextOffset
);
1351 nextOffset
+= StackOffset
;
1352 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstStackOffset
);
1354 // Store second word : arguments given on stack
1355 SDValue thirdStore
=
1356 DAG
.getStore(secondStore
, dl
, StackOffsetFI
, nextPtr
, SV
, nextOffset
);
1357 nextOffset
+= FrameOffset
;
1358 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstFrameOffset
);
1360 // Store third word : arguments given in registers
1361 return DAG
.getStore(thirdStore
, dl
, FR
, nextPtr
, SV
, nextOffset
);
1365 #include "PPCGenCallingConv.inc"
1367 /// GetFPR - Get the set of FP registers that should be allocated for arguments,
1368 /// depending on which subtarget is selected.
1369 static const unsigned *GetFPR(const PPCSubtarget
&Subtarget
) {
1370 if (Subtarget
.isMachoABI()) {
1371 static const unsigned FPR
[] = {
1372 PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
, PPC::F6
, PPC::F7
,
1373 PPC::F8
, PPC::F9
, PPC::F10
, PPC::F11
, PPC::F12
, PPC::F13
1379 static const unsigned FPR
[] = {
1380 PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
, PPC::F6
, PPC::F7
,
1386 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
1388 static unsigned CalculateStackSlotSize(SDValue Arg
, ISD::ArgFlagsTy Flags
,
1389 bool isVarArg
, unsigned PtrByteSize
) {
1390 MVT ArgVT
= Arg
.getValueType();
1391 unsigned ArgSize
=ArgVT
.getSizeInBits()/8;
1392 if (Flags
.isByVal())
1393 ArgSize
= Flags
.getByValSize();
1394 ArgSize
= ((ArgSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
1400 PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op
,
1402 int &VarArgsFrameIndex
,
1403 int &VarArgsStackOffset
,
1404 unsigned &VarArgsNumGPR
,
1405 unsigned &VarArgsNumFPR
,
1406 const PPCSubtarget
&Subtarget
) {
1407 // TODO: add description of PPC stack frame format, or at least some docs.
1409 MachineFunction
&MF
= DAG
.getMachineFunction();
1410 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
1411 MachineRegisterInfo
&RegInfo
= MF
.getRegInfo();
1412 SmallVector
<SDValue
, 8> ArgValues
;
1413 SDValue Root
= Op
.getOperand(0);
1414 bool isVarArg
= cast
<ConstantSDNode
>(Op
.getOperand(2))->getZExtValue() != 0;
1415 DebugLoc dl
= Op
.getDebugLoc();
1417 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
1418 bool isPPC64
= PtrVT
== MVT::i64
;
1419 bool isMachoABI
= Subtarget
.isMachoABI();
1420 bool isELF32_ABI
= Subtarget
.isELF32_ABI();
1421 // Potential tail calls could cause overwriting of argument stack slots.
1422 unsigned CC
= MF
.getFunction()->getCallingConv();
1423 bool isImmutable
= !(PerformTailCallOpt
&& (CC
==CallingConv::Fast
));
1424 unsigned PtrByteSize
= isPPC64
? 8 : 4;
1426 unsigned ArgOffset
= PPCFrameInfo::getLinkageSize(isPPC64
, isMachoABI
);
1427 // Area that is at least reserved in caller of this function.
1428 unsigned MinReservedArea
= ArgOffset
;
1430 static const unsigned GPR_32
[] = { // 32-bit registers.
1431 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
1432 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
1434 static const unsigned GPR_64
[] = { // 64-bit registers.
1435 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
1436 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
1439 static const unsigned *FPR
= GetFPR(Subtarget
);
1441 static const unsigned VR
[] = {
1442 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
1443 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
1446 const unsigned Num_GPR_Regs
= array_lengthof(GPR_32
);
1447 const unsigned Num_FPR_Regs
= isMachoABI
? 13 : 8;
1448 const unsigned Num_VR_Regs
= array_lengthof( VR
);
1450 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
1452 const unsigned *GPR
= isPPC64
? GPR_64
: GPR_32
;
1454 // In 32-bit non-varargs functions, the stack space for vectors is after the
1455 // stack space for non-vectors. We do not use this space unless we have
1456 // too many vectors to fit in registers, something that only occurs in
1457 // constructed examples:), but we have to walk the arglist to figure
1458 // that out...for the pathological case, compute VecArgOffset as the
1459 // start of the vector parameter area. Computing VecArgOffset is the
1460 // entire point of the following loop.
1461 // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying
1462 // to handle Elf here.
1463 unsigned VecArgOffset
= ArgOffset
;
1464 if (!isVarArg
&& !isPPC64
) {
1465 for (unsigned ArgNo
= 0, e
= Op
.getNode()->getNumValues()-1; ArgNo
!= e
;
1467 MVT ObjectVT
= Op
.getValue(ArgNo
).getValueType();
1468 unsigned ObjSize
= ObjectVT
.getSizeInBits()/8;
1469 ISD::ArgFlagsTy Flags
=
1470 cast
<ARG_FLAGSSDNode
>(Op
.getOperand(ArgNo
+3))->getArgFlags();
1472 if (Flags
.isByVal()) {
1473 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
1474 ObjSize
= Flags
.getByValSize();
1476 ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
1477 VecArgOffset
+= ArgSize
;
1481 switch(ObjectVT
.getSimpleVT()) {
1482 default: assert(0 && "Unhandled argument type!");
1485 VecArgOffset
+= isPPC64
? 8 : 4;
1487 case MVT::i64
: // PPC64
1495 // Nothing to do, we're only looking at Nonvector args here.
1500 // We've found where the vector parameter area in memory is. Skip the
1501 // first 12 parameters; these don't use that memory.
1502 VecArgOffset
= ((VecArgOffset
+15)/16)*16;
1503 VecArgOffset
+= 12*16;
1505 // Add DAG nodes to load the arguments or copy them out of registers. On
1506 // entry to a function on PPC, the arguments start after the linkage area,
1507 // although the first ones are often in registers.
1509 // In the ELF 32 ABI, GPRs and stack are double word align: an argument
1510 // represented with two words (long long or double) must be copied to an
1511 // even GPR_idx value or to an even ArgOffset value.
1513 SmallVector
<SDValue
, 8> MemOps
;
1514 unsigned nAltivecParamsAtEnd
= 0;
1515 for (unsigned ArgNo
= 0, e
= Op
.getNode()->getNumValues() - 1;
1516 ArgNo
!= e
; ++ArgNo
) {
1518 bool needsLoad
= false;
1519 MVT ObjectVT
= Op
.getValue(ArgNo
).getValueType();
1520 unsigned ObjSize
= ObjectVT
.getSizeInBits()/8;
1521 unsigned ArgSize
= ObjSize
;
1522 ISD::ArgFlagsTy Flags
=
1523 cast
<ARG_FLAGSSDNode
>(Op
.getOperand(ArgNo
+3))->getArgFlags();
1524 // See if next argument requires stack alignment in ELF
1525 bool Align
= Flags
.isSplit();
1527 unsigned CurArgOffset
= ArgOffset
;
1529 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
1530 if (ObjectVT
==MVT::v4f32
|| ObjectVT
==MVT::v4i32
||
1531 ObjectVT
==MVT::v8i16
|| ObjectVT
==MVT::v16i8
) {
1532 if (isVarArg
|| isPPC64
) {
1533 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
1534 MinReservedArea
+= CalculateStackSlotSize(Op
.getValue(ArgNo
),
1538 } else nAltivecParamsAtEnd
++;
1540 // Calculate min reserved area.
1541 MinReservedArea
+= CalculateStackSlotSize(Op
.getValue(ArgNo
),
1546 // FIXME alignment for ELF may not be right
1547 // FIXME the codegen can be much improved in some cases.
1548 // We do not have to keep everything in memory.
1549 if (Flags
.isByVal()) {
1550 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
1551 ObjSize
= Flags
.getByValSize();
1552 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
1553 // Double word align in ELF
1554 if (Align
&& isELF32_ABI
) GPR_idx
+= (GPR_idx
% 2);
1555 // Objects of size 1 and 2 are right justified, everything else is
1556 // left justified. This means the memory address is adjusted forwards.
1557 if (ObjSize
==1 || ObjSize
==2) {
1558 CurArgOffset
= CurArgOffset
+ (4 - ObjSize
);
1560 // The value of the object is its address.
1561 int FI
= MFI
->CreateFixedObject(ObjSize
, CurArgOffset
);
1562 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1563 ArgValues
.push_back(FIN
);
1564 if (ObjSize
==1 || ObjSize
==2) {
1565 if (GPR_idx
!= Num_GPR_Regs
) {
1566 unsigned VReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
1567 RegInfo
.addLiveIn(GPR
[GPR_idx
], VReg
);
1568 SDValue Val
= DAG
.getCopyFromReg(Root
, dl
, VReg
, PtrVT
);
1569 SDValue Store
= DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, FIN
,
1570 NULL
, 0, ObjSize
==1 ? MVT::i8
: MVT::i16
);
1571 MemOps
.push_back(Store
);
1573 if (isMachoABI
) ArgOffset
+= PtrByteSize
;
1575 ArgOffset
+= PtrByteSize
;
1579 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
1580 // Store whatever pieces of the object are in registers
1581 // to memory. ArgVal will be address of the beginning of
1583 if (GPR_idx
!= Num_GPR_Regs
) {
1584 unsigned VReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
1585 RegInfo
.addLiveIn(GPR
[GPR_idx
], VReg
);
1586 int FI
= MFI
->CreateFixedObject(PtrByteSize
, ArgOffset
);
1587 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1588 SDValue Val
= DAG
.getCopyFromReg(Root
, dl
, VReg
, PtrVT
);
1589 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, NULL
, 0);
1590 MemOps
.push_back(Store
);
1592 if (isMachoABI
) ArgOffset
+= PtrByteSize
;
1594 ArgOffset
+= ArgSize
- (ArgOffset
-CurArgOffset
);
1601 switch (ObjectVT
.getSimpleVT()) {
1602 default: assert(0 && "Unhandled argument type!");
1605 // Double word align in ELF
1606 if (Align
&& isELF32_ABI
) GPR_idx
+= (GPR_idx
% 2);
1608 if (GPR_idx
!= Num_GPR_Regs
) {
1609 unsigned VReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
1610 RegInfo
.addLiveIn(GPR
[GPR_idx
], VReg
);
1611 ArgVal
= DAG
.getCopyFromReg(Root
, dl
, VReg
, MVT::i32
);
1615 ArgSize
= PtrByteSize
;
1617 // Stack align in ELF
1618 if (needsLoad
&& Align
&& isELF32_ABI
)
1619 ArgOffset
+= ((ArgOffset
/4) % 2) * PtrByteSize
;
1620 // All int arguments reserve stack space in Macho ABI.
1621 if (isMachoABI
|| needsLoad
) ArgOffset
+= PtrByteSize
;
1625 case MVT::i64
: // PPC64
1626 if (GPR_idx
!= Num_GPR_Regs
) {
1627 unsigned VReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
1628 RegInfo
.addLiveIn(GPR
[GPR_idx
], VReg
);
1629 ArgVal
= DAG
.getCopyFromReg(Root
, dl
, VReg
, MVT::i64
);
1631 if (ObjectVT
== MVT::i32
) {
1632 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
1633 // value to MVT::i64 and then truncate to the correct register size.
1635 ArgVal
= DAG
.getNode(ISD::AssertSext
, dl
, MVT::i64
, ArgVal
,
1636 DAG
.getValueType(ObjectVT
));
1637 else if (Flags
.isZExt())
1638 ArgVal
= DAG
.getNode(ISD::AssertZext
, dl
, MVT::i64
, ArgVal
,
1639 DAG
.getValueType(ObjectVT
));
1641 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, ArgVal
);
1647 ArgSize
= PtrByteSize
;
1649 // All int arguments reserve stack space in Macho ABI.
1650 if (isMachoABI
|| needsLoad
) ArgOffset
+= 8;
1655 // Every 4 bytes of argument space consumes one of the GPRs available for
1656 // argument passing.
1657 if (GPR_idx
!= Num_GPR_Regs
&& isMachoABI
) {
1659 if (ObjSize
== 8 && GPR_idx
!= Num_GPR_Regs
&& !isPPC64
)
1662 if (FPR_idx
!= Num_FPR_Regs
) {
1664 if (ObjectVT
== MVT::f32
)
1665 VReg
= RegInfo
.createVirtualRegister(&PPC::F4RCRegClass
);
1667 VReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
1668 RegInfo
.addLiveIn(FPR
[FPR_idx
], VReg
);
1669 ArgVal
= DAG
.getCopyFromReg(Root
, dl
, VReg
, ObjectVT
);
1675 // Stack align in ELF
1676 if (needsLoad
&& Align
&& isELF32_ABI
)
1677 ArgOffset
+= ((ArgOffset
/4) % 2) * PtrByteSize
;
1678 // All FP arguments reserve stack space in Macho ABI.
1679 if (isMachoABI
|| needsLoad
) ArgOffset
+= isPPC64
? 8 : ObjSize
;
1685 // Note that vector arguments in registers don't reserve stack space,
1686 // except in varargs functions.
1687 if (VR_idx
!= Num_VR_Regs
) {
1688 unsigned VReg
= RegInfo
.createVirtualRegister(&PPC::VRRCRegClass
);
1689 RegInfo
.addLiveIn(VR
[VR_idx
], VReg
);
1690 ArgVal
= DAG
.getCopyFromReg(Root
, dl
, VReg
, ObjectVT
);
1692 while ((ArgOffset
% 16) != 0) {
1693 ArgOffset
+= PtrByteSize
;
1694 if (GPR_idx
!= Num_GPR_Regs
)
1698 GPR_idx
= std::min(GPR_idx
+4, Num_GPR_Regs
);
1702 if (!isVarArg
&& !isPPC64
) {
1703 // Vectors go after all the nonvectors.
1704 CurArgOffset
= VecArgOffset
;
1707 // Vectors are aligned.
1708 ArgOffset
= ((ArgOffset
+15)/16)*16;
1709 CurArgOffset
= ArgOffset
;
1717 // We need to load the argument to a virtual register if we determined above
1718 // that we ran out of physical registers of the appropriate type.
1720 int FI
= MFI
->CreateFixedObject(ObjSize
,
1721 CurArgOffset
+ (ArgSize
- ObjSize
),
1723 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
1724 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Root
, FIN
, NULL
, 0);
1727 ArgValues
.push_back(ArgVal
);
1730 // Set the size that is at least reserved in caller of this function. Tail
1731 // call optimized function's reserved stack space needs to be aligned so that
1732 // taking the difference between two stack areas will result in an aligned
1734 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
1735 // Add the Altivec parameters at the end, if needed.
1736 if (nAltivecParamsAtEnd
) {
1737 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
1738 MinReservedArea
+= 16*nAltivecParamsAtEnd
;
1741 std::max(MinReservedArea
,
1742 PPCFrameInfo::getMinCallFrameSize(isPPC64
, isMachoABI
));
1743 unsigned TargetAlign
= DAG
.getMachineFunction().getTarget().getFrameInfo()->
1744 getStackAlignment();
1745 unsigned AlignMask
= TargetAlign
-1;
1746 MinReservedArea
= (MinReservedArea
+ AlignMask
) & ~AlignMask
;
1747 FI
->setMinReservedArea(MinReservedArea
);
1749 // If the function takes variable number of arguments, make a frame index for
1750 // the start of the first vararg value... for expansion of llvm.va_start.
1755 VarArgsNumGPR
= GPR_idx
;
1756 VarArgsNumFPR
= FPR_idx
;
1758 // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame
1760 depth
= -(Num_GPR_Regs
* PtrVT
.getSizeInBits()/8 +
1761 Num_FPR_Regs
* MVT(MVT::f64
).getSizeInBits()/8 +
1762 PtrVT
.getSizeInBits()/8);
1764 VarArgsStackOffset
= MFI
->CreateFixedObject(PtrVT
.getSizeInBits()/8,
1771 VarArgsFrameIndex
= MFI
->CreateFixedObject(PtrVT
.getSizeInBits()/8,
1773 SDValue FIN
= DAG
.getFrameIndex(VarArgsFrameIndex
, PtrVT
);
1775 // In ELF 32 ABI, the fixed integer arguments of a variadic function are
1776 // stored to the VarArgsFrameIndex on the stack.
1778 for (GPR_idx
= 0; GPR_idx
!= VarArgsNumGPR
; ++GPR_idx
) {
1779 SDValue Val
= DAG
.getRegister(GPR
[GPR_idx
], PtrVT
);
1780 SDValue Store
= DAG
.getStore(Root
, dl
, Val
, FIN
, NULL
, 0);
1781 MemOps
.push_back(Store
);
1782 // Increment the address by four for the next argument to store
1783 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, PtrVT
);
1784 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
1788 // If this function is vararg, store any remaining integer argument regs
1789 // to their spots on the stack so that they may be loaded by deferencing the
1790 // result of va_next.
1791 for (; GPR_idx
!= Num_GPR_Regs
; ++GPR_idx
) {
1794 VReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
1796 VReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
1798 RegInfo
.addLiveIn(GPR
[GPR_idx
], VReg
);
1799 SDValue Val
= DAG
.getCopyFromReg(Root
, dl
, VReg
, PtrVT
);
1800 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, NULL
, 0);
1801 MemOps
.push_back(Store
);
1802 // Increment the address by four for the next argument to store
1803 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, PtrVT
);
1804 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
1807 // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex
1810 for (FPR_idx
= 0; FPR_idx
!= VarArgsNumFPR
; ++FPR_idx
) {
1811 SDValue Val
= DAG
.getRegister(FPR
[FPR_idx
], MVT::f64
);
1812 SDValue Store
= DAG
.getStore(Root
, dl
, Val
, FIN
, NULL
, 0);
1813 MemOps
.push_back(Store
);
1814 // Increment the address by eight for the next argument to store
1815 SDValue PtrOff
= DAG
.getConstant(MVT(MVT::f64
).getSizeInBits()/8,
1817 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
1820 for (; FPR_idx
!= Num_FPR_Regs
; ++FPR_idx
) {
1822 VReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
1824 RegInfo
.addLiveIn(FPR
[FPR_idx
], VReg
);
1825 SDValue Val
= DAG
.getCopyFromReg(Root
, dl
, VReg
, MVT::f64
);
1826 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, NULL
, 0);
1827 MemOps
.push_back(Store
);
1828 // Increment the address by eight for the next argument to store
1829 SDValue PtrOff
= DAG
.getConstant(MVT(MVT::f64
).getSizeInBits()/8,
1831 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
1836 if (!MemOps
.empty())
1837 Root
= DAG
.getNode(ISD::TokenFactor
, dl
,
1838 MVT::Other
, &MemOps
[0], MemOps
.size());
1840 ArgValues
.push_back(Root
);
1842 // Return the new list of results.
1843 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, Op
.getNode()->getVTList(),
1844 &ArgValues
[0], ArgValues
.size());
1847 /// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus
1850 CalculateParameterAndLinkageAreaSize(SelectionDAG
&DAG
,
1855 CallSDNode
*TheCall
,
1856 unsigned &nAltivecParamsAtEnd
) {
1857 // Count how many bytes are to be pushed on the stack, including the linkage
1858 // area, and parameter passing area. We start with 24/48 bytes, which is
1859 // prereserved space for [SP][CR][LR][3 x unused].
1860 unsigned NumBytes
= PPCFrameInfo::getLinkageSize(isPPC64
, isMachoABI
);
1861 unsigned NumOps
= TheCall
->getNumArgs();
1862 unsigned PtrByteSize
= isPPC64
? 8 : 4;
1864 // Add up all the space actually used.
1865 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
1866 // they all go in registers, but we must reserve stack space for them for
1867 // possible use by the caller. In varargs or 64-bit calls, parameters are
1868 // assigned stack space in order, with padding so Altivec parameters are
1870 nAltivecParamsAtEnd
= 0;
1871 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
1872 SDValue Arg
= TheCall
->getArg(i
);
1873 ISD::ArgFlagsTy Flags
= TheCall
->getArgFlags(i
);
1874 MVT ArgVT
= Arg
.getValueType();
1875 // Varargs Altivec parameters are padded to a 16 byte boundary.
1876 if (ArgVT
==MVT::v4f32
|| ArgVT
==MVT::v4i32
||
1877 ArgVT
==MVT::v8i16
|| ArgVT
==MVT::v16i8
) {
1878 if (!isVarArg
&& !isPPC64
) {
1879 // Non-varargs Altivec parameters go after all the non-Altivec
1880 // parameters; handle those later so we know how much padding we need.
1881 nAltivecParamsAtEnd
++;
1884 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
1885 NumBytes
= ((NumBytes
+15)/16)*16;
1887 NumBytes
+= CalculateStackSlotSize(Arg
, Flags
, isVarArg
, PtrByteSize
);
1890 // Allow for Altivec parameters at the end, if needed.
1891 if (nAltivecParamsAtEnd
) {
1892 NumBytes
= ((NumBytes
+15)/16)*16;
1893 NumBytes
+= 16*nAltivecParamsAtEnd
;
1896 // The prolog code of the callee may store up to 8 GPR argument registers to
1897 // the stack, allowing va_start to index over them in memory if its varargs.
1898 // Because we cannot tell if this is needed on the caller side, we have to
1899 // conservatively assume that it is needed. As such, make sure we have at
1900 // least enough stack space for the caller to store the 8 GPRs.
1901 NumBytes
= std::max(NumBytes
,
1902 PPCFrameInfo::getMinCallFrameSize(isPPC64
, isMachoABI
));
1904 // Tail call needs the stack to be aligned.
1905 if (CC
==CallingConv::Fast
&& PerformTailCallOpt
) {
1906 unsigned TargetAlign
= DAG
.getMachineFunction().getTarget().getFrameInfo()->
1907 getStackAlignment();
1908 unsigned AlignMask
= TargetAlign
-1;
1909 NumBytes
= (NumBytes
+ AlignMask
) & ~AlignMask
;
1915 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
1916 /// adjusted to accomodate the arguments for the tailcall.
1917 static int CalculateTailCallSPDiff(SelectionDAG
& DAG
, bool IsTailCall
,
1918 unsigned ParamSize
) {
1920 if (!IsTailCall
) return 0;
1922 PPCFunctionInfo
*FI
= DAG
.getMachineFunction().getInfo
<PPCFunctionInfo
>();
1923 unsigned CallerMinReservedArea
= FI
->getMinReservedArea();
1924 int SPDiff
= (int)CallerMinReservedArea
- (int)ParamSize
;
1925 // Remember only if the new adjustement is bigger.
1926 if (SPDiff
< FI
->getTailCallSPDelta())
1927 FI
->setTailCallSPDelta(SPDiff
);
1932 /// IsEligibleForTailCallElimination - Check to see whether the next instruction
1933 /// following the call is a return. A function is eligible if caller/callee
1934 /// calling conventions match, currently only fastcc supports tail calls, and
1935 /// the function CALL is immediatly followed by a RET.
1937 PPCTargetLowering::IsEligibleForTailCallOptimization(CallSDNode
*TheCall
,
1939 SelectionDAG
& DAG
) const {
1940 // Variable argument functions are not supported.
1941 if (!PerformTailCallOpt
|| TheCall
->isVarArg())
1944 if (CheckTailCallReturnConstraints(TheCall
, Ret
)) {
1945 MachineFunction
&MF
= DAG
.getMachineFunction();
1946 unsigned CallerCC
= MF
.getFunction()->getCallingConv();
1947 unsigned CalleeCC
= TheCall
->getCallingConv();
1948 if (CalleeCC
== CallingConv::Fast
&& CallerCC
== CalleeCC
) {
1949 // Functions containing by val parameters are not supported.
1950 for (unsigned i
= 0; i
!= TheCall
->getNumArgs(); i
++) {
1951 ISD::ArgFlagsTy Flags
= TheCall
->getArgFlags(i
);
1952 if (Flags
.isByVal()) return false;
1955 SDValue Callee
= TheCall
->getCallee();
1956 // Non PIC/GOT tail calls are supported.
1957 if (getTargetMachine().getRelocationModel() != Reloc::PIC_
)
1960 // At the moment we can only do local tail calls (in same module, hidden
1961 // or protected) if we are generating PIC.
1962 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
1963 return G
->getGlobal()->hasHiddenVisibility()
1964 || G
->getGlobal()->hasProtectedVisibility();
1971 /// isCallCompatibleAddress - Return the immediate to use if the specified
1972 /// 32-bit value is representable in the immediate field of a BxA instruction.
1973 static SDNode
*isBLACompatibleAddress(SDValue Op
, SelectionDAG
&DAG
) {
1974 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
1977 int Addr
= C
->getZExtValue();
1978 if ((Addr
& 3) != 0 || // Low 2 bits are implicitly zero.
1979 (Addr
<< 6 >> 6) != Addr
)
1980 return 0; // Top 6 bits have to be sext of immediate.
1982 return DAG
.getConstant((int)C
->getZExtValue() >> 2,
1983 DAG
.getTargetLoweringInfo().getPointerTy()).getNode();
1988 struct TailCallArgumentInfo
{
1993 TailCallArgumentInfo() : FrameIdx(0) {}
1998 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
2000 StoreTailCallArgumentsToStackSlot(SelectionDAG
&DAG
,
2002 const SmallVector
<TailCallArgumentInfo
, 8> &TailCallArgs
,
2003 SmallVector
<SDValue
, 8> &MemOpChains
,
2005 for (unsigned i
= 0, e
= TailCallArgs
.size(); i
!= e
; ++i
) {
2006 SDValue Arg
= TailCallArgs
[i
].Arg
;
2007 SDValue FIN
= TailCallArgs
[i
].FrameIdxOp
;
2008 int FI
= TailCallArgs
[i
].FrameIdx
;
2009 // Store relative to framepointer.
2010 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, FIN
,
2011 PseudoSourceValue::getFixedStack(FI
),
2016 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
2017 /// the appropriate stack slot for the tail call optimized function call.
2018 static SDValue
EmitTailCallStoreFPAndRetAddr(SelectionDAG
&DAG
,
2019 MachineFunction
&MF
,
2028 // Calculate the new stack slot for the return address.
2029 int SlotSize
= isPPC64
? 8 : 4;
2030 int NewRetAddrLoc
= SPDiff
+ PPCFrameInfo::getReturnSaveOffset(isPPC64
,
2032 int NewRetAddr
= MF
.getFrameInfo()->CreateFixedObject(SlotSize
,
2034 int NewFPLoc
= SPDiff
+ PPCFrameInfo::getFramePointerSaveOffset(isPPC64
,
2036 int NewFPIdx
= MF
.getFrameInfo()->CreateFixedObject(SlotSize
, NewFPLoc
);
2038 MVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
2039 SDValue NewRetAddrFrIdx
= DAG
.getFrameIndex(NewRetAddr
, VT
);
2040 Chain
= DAG
.getStore(Chain
, dl
, OldRetAddr
, NewRetAddrFrIdx
,
2041 PseudoSourceValue::getFixedStack(NewRetAddr
), 0);
2042 SDValue NewFramePtrIdx
= DAG
.getFrameIndex(NewFPIdx
, VT
);
2043 Chain
= DAG
.getStore(Chain
, dl
, OldFP
, NewFramePtrIdx
,
2044 PseudoSourceValue::getFixedStack(NewFPIdx
), 0);
2049 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
2050 /// the position of the argument.
2052 CalculateTailCallArgDest(SelectionDAG
&DAG
, MachineFunction
&MF
, bool isPPC64
,
2053 SDValue Arg
, int SPDiff
, unsigned ArgOffset
,
2054 SmallVector
<TailCallArgumentInfo
, 8>& TailCallArguments
) {
2055 int Offset
= ArgOffset
+ SPDiff
;
2056 uint32_t OpSize
= (Arg
.getValueType().getSizeInBits()+7)/8;
2057 int FI
= MF
.getFrameInfo()->CreateFixedObject(OpSize
, Offset
);
2058 MVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
2059 SDValue FIN
= DAG
.getFrameIndex(FI
, VT
);
2060 TailCallArgumentInfo Info
;
2062 Info
.FrameIdxOp
= FIN
;
2064 TailCallArguments
.push_back(Info
);
2067 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
2068 /// stack slot. Returns the chain as result and the loaded frame pointers in
2069 /// LROpOut/FPOpout. Used when tail calling.
2070 SDValue
PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG
& DAG
,
2077 // Load the LR and FP stack slot for later adjusting.
2078 MVT VT
= PPCSubTarget
.isPPC64() ? MVT::i64
: MVT::i32
;
2079 LROpOut
= getReturnAddrFrameIndex(DAG
);
2080 LROpOut
= DAG
.getLoad(VT
, dl
, Chain
, LROpOut
, NULL
, 0);
2081 Chain
= SDValue(LROpOut
.getNode(), 1);
2082 FPOpOut
= getFramePointerFrameIndex(DAG
);
2083 FPOpOut
= DAG
.getLoad(VT
, dl
, Chain
, FPOpOut
, NULL
, 0);
2084 Chain
= SDValue(FPOpOut
.getNode(), 1);
2089 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2090 /// by "Src" to address "Dst" of size "Size". Alignment information is
2091 /// specified by the specific parameter attribute. The copy will be passed as
2092 /// a byval function parameter.
2093 /// Sometimes what we are copying is the end of a larger object, the part that
2094 /// does not fit in registers.
2096 CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
, SDValue Chain
,
2097 ISD::ArgFlagsTy Flags
, SelectionDAG
&DAG
,
2098 unsigned Size
, DebugLoc dl
) {
2099 SDValue SizeNode
= DAG
.getConstant(Size
, MVT::i32
);
2100 return DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
2101 false, NULL
, 0, NULL
, 0);
2104 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
2107 LowerMemOpCallTo(SelectionDAG
&DAG
, MachineFunction
&MF
, SDValue Chain
,
2108 SDValue Arg
, SDValue PtrOff
, int SPDiff
,
2109 unsigned ArgOffset
, bool isPPC64
, bool isTailCall
,
2110 bool isVector
, SmallVector
<SDValue
, 8> &MemOpChains
,
2111 SmallVector
<TailCallArgumentInfo
, 8>& TailCallArguments
,
2113 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2118 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
2120 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
2121 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
2122 DAG
.getConstant(ArgOffset
, PtrVT
));
2124 MemOpChains
.push_back(DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, NULL
, 0));
2125 // Calculate and remember argument location.
2126 } else CalculateTailCallArgDest(DAG
, MF
, isPPC64
, Arg
, SPDiff
, ArgOffset
,
2130 SDValue
PPCTargetLowering::LowerCALL(SDValue Op
, SelectionDAG
&DAG
,
2131 const PPCSubtarget
&Subtarget
,
2132 TargetMachine
&TM
) {
2133 CallSDNode
*TheCall
= cast
<CallSDNode
>(Op
.getNode());
2134 SDValue Chain
= TheCall
->getChain();
2135 bool isVarArg
= TheCall
->isVarArg();
2136 unsigned CC
= TheCall
->getCallingConv();
2137 bool isTailCall
= TheCall
->isTailCall()
2138 && CC
== CallingConv::Fast
&& PerformTailCallOpt
;
2139 SDValue Callee
= TheCall
->getCallee();
2140 unsigned NumOps
= TheCall
->getNumArgs();
2141 DebugLoc dl
= TheCall
->getDebugLoc();
2143 bool isMachoABI
= Subtarget
.isMachoABI();
2144 bool isELF32_ABI
= Subtarget
.isELF32_ABI();
2146 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2147 bool isPPC64
= PtrVT
== MVT::i64
;
2148 unsigned PtrByteSize
= isPPC64
? 8 : 4;
2150 MachineFunction
&MF
= DAG
.getMachineFunction();
2152 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in
2153 // SelectExpr to use to put the arguments in the appropriate registers.
2154 std::vector
<SDValue
> args_to_use
;
2156 // Mark this function as potentially containing a function that contains a
2157 // tail call. As a consequence the frame pointer will be used for dynamicalloc
2158 // and restoring the callers stack pointer in this functions epilog. This is
2159 // done because by tail calling the called function might overwrite the value
2160 // in this function's (MF) stack pointer stack slot 0(SP).
2161 if (PerformTailCallOpt
&& CC
==CallingConv::Fast
)
2162 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
2164 unsigned nAltivecParamsAtEnd
= 0;
2166 // Count how many bytes are to be pushed on the stack, including the linkage
2167 // area, and parameter passing area. We start with 24/48 bytes, which is
2168 // prereserved space for [SP][CR][LR][3 x unused].
2170 CalculateParameterAndLinkageAreaSize(DAG
, isPPC64
, isMachoABI
, isVarArg
, CC
,
2171 TheCall
, nAltivecParamsAtEnd
);
2173 // Calculate by how many bytes the stack has to be adjusted in case of tail
2174 // call optimization.
2175 int SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
2177 // Adjust the stack pointer for the new arguments...
2178 // These operations are automatically eliminated by the prolog/epilog pass
2179 Chain
= DAG
.getCALLSEQ_START(Chain
, DAG
.getIntPtrConstant(NumBytes
, true));
2180 SDValue CallSeqStart
= Chain
;
2182 // Load the return address and frame pointer so it can be move somewhere else
2185 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
2187 // Set up a copy of the stack pointer for use loading and storing any
2188 // arguments that may not fit in the registers available for argument
2192 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
2194 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
2196 // Figure out which arguments are going to go in registers, and which in
2197 // memory. Also, if this is a vararg function, floating point operations
2198 // must be stored to our stack, and loaded into integer regs as well, if
2199 // any integer regs are available for argument passing.
2200 unsigned ArgOffset
= PPCFrameInfo::getLinkageSize(isPPC64
, isMachoABI
);
2201 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
2203 static const unsigned GPR_32
[] = { // 32-bit registers.
2204 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
2205 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
2207 static const unsigned GPR_64
[] = { // 64-bit registers.
2208 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
2209 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
2211 static const unsigned *FPR
= GetFPR(Subtarget
);
2213 static const unsigned VR
[] = {
2214 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
2215 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
2217 const unsigned NumGPRs
= array_lengthof(GPR_32
);
2218 const unsigned NumFPRs
= isMachoABI
? 13 : 8;
2219 const unsigned NumVRs
= array_lengthof( VR
);
2221 const unsigned *GPR
= isPPC64
? GPR_64
: GPR_32
;
2223 std::vector
<std::pair
<unsigned, SDValue
> > RegsToPass
;
2224 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
2226 SmallVector
<SDValue
, 8> MemOpChains
;
2227 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
2229 SDValue Arg
= TheCall
->getArg(i
);
2230 ISD::ArgFlagsTy Flags
= TheCall
->getArgFlags(i
);
2231 // See if next argument requires stack alignment in ELF
2232 bool Align
= Flags
.isSplit();
2234 // PtrOff will be used to store the current argument to the stack if a
2235 // register cannot be found for it.
2238 // Stack align in ELF 32
2239 if (isELF32_ABI
&& Align
)
2240 PtrOff
= DAG
.getConstant(ArgOffset
+ ((ArgOffset
/4) % 2) * PtrByteSize
,
2241 StackPtr
.getValueType());
2243 PtrOff
= DAG
.getConstant(ArgOffset
, StackPtr
.getValueType());
2245 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
2247 // On PPC64, promote integers to 64-bit values.
2248 if (isPPC64
&& Arg
.getValueType() == MVT::i32
) {
2249 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
2250 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
2251 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
2254 // FIXME Elf untested, what are alignment rules?
2255 // FIXME memcpy is used way more than necessary. Correctness first.
2256 if (Flags
.isByVal()) {
2257 unsigned Size
= Flags
.getByValSize();
2258 if (isELF32_ABI
&& Align
) GPR_idx
+= (GPR_idx
% 2);
2259 if (Size
==1 || Size
==2) {
2260 // Very small objects are passed right-justified.
2261 // Everything else is passed left-justified.
2262 MVT VT
= (Size
==1) ? MVT::i8
: MVT::i16
;
2263 if (GPR_idx
!= NumGPRs
) {
2264 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
2266 MemOpChains
.push_back(Load
.getValue(1));
2267 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
2269 ArgOffset
+= PtrByteSize
;
2271 SDValue Const
= DAG
.getConstant(4 - Size
, PtrOff
.getValueType());
2272 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
2273 SDValue MemcpyCall
= CreateCopyOfByValArgument(Arg
, AddPtr
,
2274 CallSeqStart
.getNode()->getOperand(0),
2275 Flags
, DAG
, Size
, dl
);
2276 // This must go outside the CALLSEQ_START..END.
2277 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
,
2278 CallSeqStart
.getNode()->getOperand(1));
2279 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
2280 NewCallSeqStart
.getNode());
2281 Chain
= CallSeqStart
= NewCallSeqStart
;
2282 ArgOffset
+= PtrByteSize
;
2286 // Copy entire object into memory. There are cases where gcc-generated
2287 // code assumes it is there, even if it could be put entirely into
2288 // registers. (This is not what the doc says.)
2289 SDValue MemcpyCall
= CreateCopyOfByValArgument(Arg
, PtrOff
,
2290 CallSeqStart
.getNode()->getOperand(0),
2291 Flags
, DAG
, Size
, dl
);
2292 // This must go outside the CALLSEQ_START..END.
2293 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
,
2294 CallSeqStart
.getNode()->getOperand(1));
2295 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(), NewCallSeqStart
.getNode());
2296 Chain
= CallSeqStart
= NewCallSeqStart
;
2297 // And copy the pieces of it that fit into registers.
2298 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
2299 SDValue Const
= DAG
.getConstant(j
, PtrOff
.getValueType());
2300 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
2301 if (GPR_idx
!= NumGPRs
) {
2302 SDValue Load
= DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
, NULL
, 0);
2303 MemOpChains
.push_back(Load
.getValue(1));
2304 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
2306 ArgOffset
+= PtrByteSize
;
2308 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
2315 switch (Arg
.getValueType().getSimpleVT()) {
2316 default: assert(0 && "Unexpected ValueType for argument!");
2319 // Double word align in ELF
2320 if (isELF32_ABI
&& Align
) GPR_idx
+= (GPR_idx
% 2);
2321 if (GPR_idx
!= NumGPRs
) {
2322 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
2324 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
2325 isPPC64
, isTailCall
, false, MemOpChains
,
2326 TailCallArguments
, dl
);
2329 if (inMem
|| isMachoABI
) {
2330 // Stack align in ELF
2331 if (isELF32_ABI
&& Align
)
2332 ArgOffset
+= ((ArgOffset
/4) % 2) * PtrByteSize
;
2334 ArgOffset
+= PtrByteSize
;
2339 if (FPR_idx
!= NumFPRs
) {
2340 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
2343 SDValue Store
= DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, NULL
, 0);
2344 MemOpChains
.push_back(Store
);
2346 // Float varargs are always shadowed in available integer registers
2347 if (GPR_idx
!= NumGPRs
) {
2348 SDValue Load
= DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, NULL
, 0);
2349 MemOpChains
.push_back(Load
.getValue(1));
2350 if (isMachoABI
) RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++],
2353 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&& !isPPC64
){
2354 SDValue ConstFour
= DAG
.getConstant(4, PtrOff
.getValueType());
2355 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
2356 SDValue Load
= DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, NULL
, 0);
2357 MemOpChains
.push_back(Load
.getValue(1));
2358 if (isMachoABI
) RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++],
2362 // If we have any FPRs remaining, we may also have GPRs remaining.
2363 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
2366 if (GPR_idx
!= NumGPRs
)
2368 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&&
2369 !isPPC64
) // PPC64 has 64-bit GPR's obviously :)
2374 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
2375 isPPC64
, isTailCall
, false, MemOpChains
,
2376 TailCallArguments
, dl
);
2379 if (inMem
|| isMachoABI
) {
2380 // Stack align in ELF
2381 if (isELF32_ABI
&& Align
)
2382 ArgOffset
+= ((ArgOffset
/4) % 2) * PtrByteSize
;
2386 ArgOffset
+= Arg
.getValueType() == MVT::f32
? 4 : 8;
2394 // These go aligned on the stack, or in the corresponding R registers
2395 // when within range. The Darwin PPC ABI doc claims they also go in
2396 // V registers; in fact gcc does this only for arguments that are
2397 // prototyped, not for those that match the ... We do it for all
2398 // arguments, seems to work.
2399 while (ArgOffset
% 16 !=0) {
2400 ArgOffset
+= PtrByteSize
;
2401 if (GPR_idx
!= NumGPRs
)
2404 // We could elide this store in the case where the object fits
2405 // entirely in R registers. Maybe later.
2406 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
2407 DAG
.getConstant(ArgOffset
, PtrVT
));
2408 SDValue Store
= DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, NULL
, 0);
2409 MemOpChains
.push_back(Store
);
2410 if (VR_idx
!= NumVRs
) {
2411 SDValue Load
= DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, NULL
, 0);
2412 MemOpChains
.push_back(Load
.getValue(1));
2413 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
2416 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
2417 if (GPR_idx
== NumGPRs
)
2419 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
2420 DAG
.getConstant(i
, PtrVT
));
2421 SDValue Load
= DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, NULL
, 0);
2422 MemOpChains
.push_back(Load
.getValue(1));
2423 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
2428 // Non-varargs Altivec params generally go in registers, but have
2429 // stack space allocated at the end.
2430 if (VR_idx
!= NumVRs
) {
2431 // Doesn't have GPR space allocated.
2432 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
2433 } else if (nAltivecParamsAtEnd
==0) {
2434 // We are emitting Altivec params in order.
2435 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
2436 isPPC64
, isTailCall
, true, MemOpChains
,
2437 TailCallArguments
, dl
);
2443 // If all Altivec parameters fit in registers, as they usually do,
2444 // they get stack space following the non-Altivec parameters. We
2445 // don't track this here because nobody below needs it.
2446 // If there are more Altivec parameters than fit in registers emit
2448 if (!isVarArg
&& nAltivecParamsAtEnd
> NumVRs
) {
2450 // Offset is aligned; skip 1st 12 params which go in V registers.
2451 ArgOffset
= ((ArgOffset
+15)/16)*16;
2453 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
2454 SDValue Arg
= TheCall
->getArg(i
);
2455 MVT ArgType
= Arg
.getValueType();
2456 if (ArgType
==MVT::v4f32
|| ArgType
==MVT::v4i32
||
2457 ArgType
==MVT::v8i16
|| ArgType
==MVT::v16i8
) {
2460 // We are emitting Altivec params in order.
2461 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
2462 isPPC64
, isTailCall
, true, MemOpChains
,
2463 TailCallArguments
, dl
);
2470 if (!MemOpChains
.empty())
2471 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
2472 &MemOpChains
[0], MemOpChains
.size());
2474 // Build a sequence of copy-to-reg nodes chained together with token chain
2475 // and flag operands which copy the outgoing args into the appropriate regs.
2477 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
2478 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
2479 RegsToPass
[i
].second
, InFlag
);
2480 InFlag
= Chain
.getValue(1);
2483 // With the ELF 32 ABI, set CR6 to true if this is a vararg call.
2484 if (isVarArg
&& isELF32_ABI
) {
2485 SDValue
SetCR(DAG
.getTargetNode(PPC::CRSET
, dl
, MVT::i32
), 0);
2486 Chain
= DAG
.getCopyToReg(Chain
, dl
, PPC::CR1EQ
, SetCR
, InFlag
);
2487 InFlag
= Chain
.getValue(1);
2490 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
2491 // might overwrite each other in case of tail call optimization.
2493 SmallVector
<SDValue
, 8> MemOpChains2
;
2494 // Do not flag preceeding copytoreg stuff together with the following stuff.
2496 StoreTailCallArgumentsToStackSlot(DAG
, Chain
, TailCallArguments
,
2498 if (!MemOpChains2
.empty())
2499 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
2500 &MemOpChains2
[0], MemOpChains2
.size());
2502 // Store the return address to the appropriate stack slot.
2503 Chain
= EmitTailCallStoreFPAndRetAddr(DAG
, MF
, Chain
, LROp
, FPOp
, SPDiff
,
2504 isPPC64
, isMachoABI
, dl
);
2507 // Emit callseq_end just before tailcall node.
2509 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, true),
2510 DAG
.getIntPtrConstant(0, true), InFlag
);
2511 InFlag
= Chain
.getValue(1);
2514 std::vector
<MVT
> NodeTys
;
2515 NodeTys
.push_back(MVT::Other
); // Returns a chain
2516 NodeTys
.push_back(MVT::Flag
); // Returns a flag for retval copy to use.
2518 SmallVector
<SDValue
, 8> Ops
;
2519 unsigned CallOpc
= isMachoABI
? PPCISD::CALL_Macho
: PPCISD::CALL_ELF
;
2521 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
2522 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
2523 // node so that legalize doesn't hack it.
2524 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
2525 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), Callee
.getValueType());
2526 else if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
))
2527 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), Callee
.getValueType());
2528 else if (SDNode
*Dest
= isBLACompatibleAddress(Callee
, DAG
))
2529 // If this is an absolute destination address, use the munged value.
2530 Callee
= SDValue(Dest
, 0);
2532 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
2533 // to do the call, we can't use PPCISD::CALL.
2534 SDValue MTCTROps
[] = {Chain
, Callee
, InFlag
};
2535 Chain
= DAG
.getNode(PPCISD::MTCTR
, dl
, NodeTys
, MTCTROps
,
2536 2 + (InFlag
.getNode() != 0));
2537 InFlag
= Chain
.getValue(1);
2539 // Copy the callee address into R12/X12 on darwin.
2541 unsigned Reg
= Callee
.getValueType() == MVT::i32
? PPC::R12
: PPC::X12
;
2542 Chain
= DAG
.getCopyToReg(Chain
, dl
, Reg
, Callee
, InFlag
);
2543 InFlag
= Chain
.getValue(1);
2547 NodeTys
.push_back(MVT::Other
);
2548 NodeTys
.push_back(MVT::Flag
);
2549 Ops
.push_back(Chain
);
2550 CallOpc
= isMachoABI
? PPCISD::BCTRL_Macho
: PPCISD::BCTRL_ELF
;
2552 // Add CTR register as callee so a bctr can be emitted later.
2554 Ops
.push_back(DAG
.getRegister(PPC::CTR
, getPointerTy()));
2557 // If this is a direct call, pass the chain and the callee.
2558 if (Callee
.getNode()) {
2559 Ops
.push_back(Chain
);
2560 Ops
.push_back(Callee
);
2562 // If this is a tail call add stack pointer delta.
2564 Ops
.push_back(DAG
.getConstant(SPDiff
, MVT::i32
));
2566 // Add argument registers to the end of the list so that they are known live
2568 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
2569 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
2570 RegsToPass
[i
].second
.getValueType()));
2572 // When performing tail call optimization the callee pops its arguments off
2573 // the stack. Account for this here so these bytes can be pushed back on in
2574 // PPCRegisterInfo::eliminateCallFramePseudoInstr.
2575 int BytesCalleePops
=
2576 (CC
==CallingConv::Fast
&& PerformTailCallOpt
) ? NumBytes
: 0;
2578 if (InFlag
.getNode())
2579 Ops
.push_back(InFlag
);
2583 assert(InFlag
.getNode() &&
2584 "Flag must be set. Depend on flag being set in LowerRET");
2585 Chain
= DAG
.getNode(PPCISD::TAILCALL
, dl
,
2586 TheCall
->getVTList(), &Ops
[0], Ops
.size());
2587 return SDValue(Chain
.getNode(), Op
.getResNo());
2590 Chain
= DAG
.getNode(CallOpc
, dl
, NodeTys
, &Ops
[0], Ops
.size());
2591 InFlag
= Chain
.getValue(1);
2593 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, true),
2594 DAG
.getIntPtrConstant(BytesCalleePops
, true),
2596 if (TheCall
->getValueType(0) != MVT::Other
)
2597 InFlag
= Chain
.getValue(1);
2599 SmallVector
<SDValue
, 16> ResultVals
;
2600 SmallVector
<CCValAssign
, 16> RVLocs
;
2601 unsigned CallerCC
= DAG
.getMachineFunction().getFunction()->getCallingConv();
2602 CCState
CCInfo(CallerCC
, isVarArg
, TM
, RVLocs
);
2603 CCInfo
.AnalyzeCallResult(TheCall
, RetCC_PPC
);
2605 // Copy all of the result registers out of their specified physreg.
2606 for (unsigned i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
2607 CCValAssign
&VA
= RVLocs
[i
];
2608 MVT VT
= VA
.getValVT();
2609 assert(VA
.isRegLoc() && "Can only return in registers!");
2610 Chain
= DAG
.getCopyFromReg(Chain
, dl
,
2611 VA
.getLocReg(), VT
, InFlag
).getValue(1);
2612 ResultVals
.push_back(Chain
.getValue(0));
2613 InFlag
= Chain
.getValue(2);
2616 // If the function returns void, just return the chain.
2620 // Otherwise, merge everything together with a MERGE_VALUES node.
2621 ResultVals
.push_back(Chain
);
2622 SDValue Res
= DAG
.getNode(ISD::MERGE_VALUES
, dl
, TheCall
->getVTList(),
2623 &ResultVals
[0], ResultVals
.size());
2624 return Res
.getValue(Op
.getResNo());
2627 SDValue
PPCTargetLowering::LowerRET(SDValue Op
, SelectionDAG
&DAG
,
2628 TargetMachine
&TM
) {
2629 SmallVector
<CCValAssign
, 16> RVLocs
;
2630 unsigned CC
= DAG
.getMachineFunction().getFunction()->getCallingConv();
2631 bool isVarArg
= DAG
.getMachineFunction().getFunction()->isVarArg();
2632 DebugLoc dl
= Op
.getDebugLoc();
2633 CCState
CCInfo(CC
, isVarArg
, TM
, RVLocs
);
2634 CCInfo
.AnalyzeReturn(Op
.getNode(), RetCC_PPC
);
2636 // If this is the first return lowered for this function, add the regs to the
2637 // liveout set for the function.
2638 if (DAG
.getMachineFunction().getRegInfo().liveout_empty()) {
2639 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
)
2640 DAG
.getMachineFunction().getRegInfo().addLiveOut(RVLocs
[i
].getLocReg());
2643 SDValue Chain
= Op
.getOperand(0);
2645 Chain
= GetPossiblePreceedingTailCall(Chain
, PPCISD::TAILCALL
);
2646 if (Chain
.getOpcode() == PPCISD::TAILCALL
) {
2647 SDValue TailCall
= Chain
;
2648 SDValue TargetAddress
= TailCall
.getOperand(1);
2649 SDValue StackAdjustment
= TailCall
.getOperand(2);
2651 assert(((TargetAddress
.getOpcode() == ISD::Register
&&
2652 cast
<RegisterSDNode
>(TargetAddress
)->getReg() == PPC::CTR
) ||
2653 TargetAddress
.getOpcode() == ISD::TargetExternalSymbol
||
2654 TargetAddress
.getOpcode() == ISD::TargetGlobalAddress
||
2655 isa
<ConstantSDNode
>(TargetAddress
)) &&
2656 "Expecting an global address, external symbol, absolute value or register");
2658 assert(StackAdjustment
.getOpcode() == ISD::Constant
&&
2659 "Expecting a const value");
2661 SmallVector
<SDValue
,8> Operands
;
2662 Operands
.push_back(Chain
.getOperand(0));
2663 Operands
.push_back(TargetAddress
);
2664 Operands
.push_back(StackAdjustment
);
2665 // Copy registers used by the call. Last operand is a flag so it is not
2667 for (unsigned i
=3; i
< TailCall
.getNumOperands()-1; i
++) {
2668 Operands
.push_back(Chain
.getOperand(i
));
2670 return DAG
.getNode(PPCISD::TC_RETURN
, dl
, MVT::Other
, &Operands
[0],
2676 // Copy the result values into the output registers.
2677 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
2678 CCValAssign
&VA
= RVLocs
[i
];
2679 assert(VA
.isRegLoc() && "Can only return in registers!");
2680 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(),
2681 Op
.getOperand(i
*2+1), Flag
);
2682 Flag
= Chain
.getValue(1);
2686 return DAG
.getNode(PPCISD::RET_FLAG
, dl
, MVT::Other
, Chain
, Flag
);
2688 return DAG
.getNode(PPCISD::RET_FLAG
, dl
, MVT::Other
, Chain
);
2691 SDValue
PPCTargetLowering::LowerSTACKRESTORE(SDValue Op
, SelectionDAG
&DAG
,
2692 const PPCSubtarget
&Subtarget
) {
2693 // When we pop the dynamic allocation we need to restore the SP link.
2694 DebugLoc dl
= Op
.getDebugLoc();
2696 // Get the corect type for pointers.
2697 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2699 // Construct the stack pointer operand.
2700 bool IsPPC64
= Subtarget
.isPPC64();
2701 unsigned SP
= IsPPC64
? PPC::X1
: PPC::R1
;
2702 SDValue StackPtr
= DAG
.getRegister(SP
, PtrVT
);
2704 // Get the operands for the STACKRESTORE.
2705 SDValue Chain
= Op
.getOperand(0);
2706 SDValue SaveSP
= Op
.getOperand(1);
2708 // Load the old link SP.
2709 SDValue LoadLinkSP
= DAG
.getLoad(PtrVT
, dl
, Chain
, StackPtr
, NULL
, 0);
2711 // Restore the stack pointer.
2712 Chain
= DAG
.getCopyToReg(LoadLinkSP
.getValue(1), dl
, SP
, SaveSP
);
2714 // Store the old link SP.
2715 return DAG
.getStore(Chain
, dl
, LoadLinkSP
, StackPtr
, NULL
, 0);
2721 PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG
& DAG
) const {
2722 MachineFunction
&MF
= DAG
.getMachineFunction();
2723 bool IsPPC64
= PPCSubTarget
.isPPC64();
2724 bool isMachoABI
= PPCSubTarget
.isMachoABI();
2725 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2727 // Get current frame pointer save index. The users of this index will be
2728 // primarily DYNALLOC instructions.
2729 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
2730 int RASI
= FI
->getReturnAddrSaveIndex();
2732 // If the frame pointer save index hasn't been defined yet.
2734 // Find out what the fix offset of the frame pointer save area.
2735 int LROffset
= PPCFrameInfo::getReturnSaveOffset(IsPPC64
, isMachoABI
);
2736 // Allocate the frame index for frame pointer save area.
2737 RASI
= MF
.getFrameInfo()->CreateFixedObject(IsPPC64
? 8 : 4, LROffset
);
2739 FI
->setReturnAddrSaveIndex(RASI
);
2741 return DAG
.getFrameIndex(RASI
, PtrVT
);
2745 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG
& DAG
) const {
2746 MachineFunction
&MF
= DAG
.getMachineFunction();
2747 bool IsPPC64
= PPCSubTarget
.isPPC64();
2748 bool isMachoABI
= PPCSubTarget
.isMachoABI();
2749 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2751 // Get current frame pointer save index. The users of this index will be
2752 // primarily DYNALLOC instructions.
2753 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
2754 int FPSI
= FI
->getFramePointerSaveIndex();
2756 // If the frame pointer save index hasn't been defined yet.
2758 // Find out what the fix offset of the frame pointer save area.
2759 int FPOffset
= PPCFrameInfo::getFramePointerSaveOffset(IsPPC64
, isMachoABI
);
2761 // Allocate the frame index for frame pointer save area.
2762 FPSI
= MF
.getFrameInfo()->CreateFixedObject(IsPPC64
? 8 : 4, FPOffset
);
2764 FI
->setFramePointerSaveIndex(FPSI
);
2766 return DAG
.getFrameIndex(FPSI
, PtrVT
);
2769 SDValue
PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
2771 const PPCSubtarget
&Subtarget
) {
2773 SDValue Chain
= Op
.getOperand(0);
2774 SDValue Size
= Op
.getOperand(1);
2775 DebugLoc dl
= Op
.getDebugLoc();
2777 // Get the corect type for pointers.
2778 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2780 SDValue NegSize
= DAG
.getNode(ISD::SUB
, dl
, PtrVT
,
2781 DAG
.getConstant(0, PtrVT
), Size
);
2782 // Construct a node for the frame pointer save index.
2783 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
2784 // Build a DYNALLOC node.
2785 SDValue Ops
[3] = { Chain
, NegSize
, FPSIdx
};
2786 SDVTList VTs
= DAG
.getVTList(PtrVT
, MVT::Other
);
2787 return DAG
.getNode(PPCISD::DYNALLOC
, dl
, VTs
, Ops
, 3);
2790 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
2792 SDValue
PPCTargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) {
2793 // Not FP? Not a fsel.
2794 if (!Op
.getOperand(0).getValueType().isFloatingPoint() ||
2795 !Op
.getOperand(2).getValueType().isFloatingPoint())
2798 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
2800 // Cannot handle SETEQ/SETNE.
2801 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) return SDValue();
2803 MVT ResVT
= Op
.getValueType();
2804 MVT CmpVT
= Op
.getOperand(0).getValueType();
2805 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
2806 SDValue TV
= Op
.getOperand(2), FV
= Op
.getOperand(3);
2807 DebugLoc dl
= Op
.getDebugLoc();
2809 // If the RHS of the comparison is a 0.0, we don't need to do the
2810 // subtraction at all.
2811 if (isFloatingPointZero(RHS
))
2813 default: break; // SETUO etc aren't handled by fsel.
2816 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
2819 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
2820 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
2821 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
2824 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
2827 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
2828 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
2829 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
2830 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), TV
, FV
);
2835 default: break; // SETUO etc aren't handled by fsel.
2838 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
);
2839 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
2840 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
2841 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
2844 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
);
2845 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
2846 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
2847 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
2850 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
);
2851 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
2852 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
2853 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
2856 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
);
2857 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
2858 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
2859 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
2864 // FIXME: Split this code up when LegalizeDAGTypes lands.
2865 SDValue
PPCTargetLowering::LowerFP_TO_SINT(SDValue Op
, SelectionDAG
&DAG
,
2867 assert(Op
.getOperand(0).getValueType().isFloatingPoint());
2868 SDValue Src
= Op
.getOperand(0);
2869 if (Src
.getValueType() == MVT::f32
)
2870 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
2873 switch (Op
.getValueType().getSimpleVT()) {
2874 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!");
2876 Tmp
= DAG
.getNode(PPCISD::FCTIWZ
, dl
, MVT::f64
, Src
);
2879 Tmp
= DAG
.getNode(PPCISD::FCTIDZ
, dl
, MVT::f64
, Src
);
2883 // Convert the FP value to an int value through memory.
2884 SDValue FIPtr
= DAG
.CreateStackTemporary(MVT::f64
);
2886 // Emit a store to the stack slot.
2887 SDValue Chain
= DAG
.getStore(DAG
.getEntryNode(), dl
, Tmp
, FIPtr
, NULL
, 0);
2889 // Result is a load from the stack slot. If loading 4 bytes, make sure to
2891 if (Op
.getValueType() == MVT::i32
)
2892 FIPtr
= DAG
.getNode(ISD::ADD
, dl
, FIPtr
.getValueType(), FIPtr
,
2893 DAG
.getConstant(4, FIPtr
.getValueType()));
2894 return DAG
.getLoad(Op
.getValueType(), dl
, Chain
, FIPtr
, NULL
, 0);
2897 SDValue
PPCTargetLowering::LowerSINT_TO_FP(SDValue Op
, SelectionDAG
&DAG
) {
2898 DebugLoc dl
= Op
.getDebugLoc();
2899 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
2900 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
2903 if (Op
.getOperand(0).getValueType() == MVT::i64
) {
2904 SDValue Bits
= DAG
.getNode(ISD::BIT_CONVERT
, dl
,
2905 MVT::f64
, Op
.getOperand(0));
2906 SDValue FP
= DAG
.getNode(PPCISD::FCFID
, dl
, MVT::f64
, Bits
);
2907 if (Op
.getValueType() == MVT::f32
)
2908 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
,
2909 MVT::f32
, FP
, DAG
.getIntPtrConstant(0));
2913 assert(Op
.getOperand(0).getValueType() == MVT::i32
&&
2914 "Unhandled SINT_TO_FP type in custom expander!");
2915 // Since we only generate this in 64-bit mode, we can take advantage of
2916 // 64-bit registers. In particular, sign extend the input value into the
2917 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
2918 // then lfd it and fcfid it.
2919 MachineFrameInfo
*FrameInfo
= DAG
.getMachineFunction().getFrameInfo();
2920 int FrameIdx
= FrameInfo
->CreateStackObject(8, 8);
2921 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2922 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
2924 SDValue Ext64
= DAG
.getNode(PPCISD::EXTSW_32
, dl
, MVT::i32
,
2927 // STD the extended value into the stack slot.
2928 MachineMemOperand
MO(PseudoSourceValue::getFixedStack(FrameIdx
),
2929 MachineMemOperand::MOStore
, 0, 8, 8);
2930 SDValue Store
= DAG
.getNode(PPCISD::STD_32
, dl
, MVT::Other
,
2931 DAG
.getEntryNode(), Ext64
, FIdx
,
2932 DAG
.getMemOperand(MO
));
2933 // Load the value as a double.
2934 SDValue Ld
= DAG
.getLoad(MVT::f64
, dl
, Store
, FIdx
, NULL
, 0);
2936 // FCFID it and return it.
2937 SDValue FP
= DAG
.getNode(PPCISD::FCFID
, dl
, MVT::f64
, Ld
);
2938 if (Op
.getValueType() == MVT::f32
)
2939 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, FP
, DAG
.getIntPtrConstant(0));
2943 SDValue
PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op
, SelectionDAG
&DAG
) {
2944 DebugLoc dl
= Op
.getDebugLoc();
2946 The rounding mode is in bits 30:31 of FPSR, and has the following
2953 FLT_ROUNDS, on the other hand, expects the following:
2960 To perform the conversion, we do:
2961 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
2964 MachineFunction
&MF
= DAG
.getMachineFunction();
2965 MVT VT
= Op
.getValueType();
2966 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
2967 std::vector
<MVT
> NodeTys
;
2968 SDValue MFFSreg
, InFlag
;
2970 // Save FP Control Word to register
2971 NodeTys
.push_back(MVT::f64
); // return register
2972 NodeTys
.push_back(MVT::Flag
); // unused in this context
2973 SDValue Chain
= DAG
.getNode(PPCISD::MFFS
, dl
, NodeTys
, &InFlag
, 0);
2975 // Save FP register to stack slot
2976 int SSFI
= MF
.getFrameInfo()->CreateStackObject(8, 8);
2977 SDValue StackSlot
= DAG
.getFrameIndex(SSFI
, PtrVT
);
2978 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Chain
,
2979 StackSlot
, NULL
, 0);
2981 // Load FP Control Word from low 32 bits of stack slot.
2982 SDValue Four
= DAG
.getConstant(4, PtrVT
);
2983 SDValue Addr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackSlot
, Four
);
2984 SDValue CWD
= DAG
.getLoad(MVT::i32
, dl
, Store
, Addr
, NULL
, 0);
2986 // Transform as necessary
2988 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
2989 CWD
, DAG
.getConstant(3, MVT::i32
));
2991 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
2992 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
2993 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
,
2994 CWD
, DAG
.getConstant(3, MVT::i32
)),
2995 DAG
.getConstant(3, MVT::i32
)),
2996 DAG
.getConstant(1, MVT::i32
));
2999 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, CWD1
, CWD2
);
3001 return DAG
.getNode((VT
.getSizeInBits() < 16 ?
3002 ISD::TRUNCATE
: ISD::ZERO_EXTEND
), dl
, VT
, RetVal
);
3005 SDValue
PPCTargetLowering::LowerSHL_PARTS(SDValue Op
, SelectionDAG
&DAG
) {
3006 MVT VT
= Op
.getValueType();
3007 unsigned BitWidth
= VT
.getSizeInBits();
3008 DebugLoc dl
= Op
.getDebugLoc();
3009 assert(Op
.getNumOperands() == 3 &&
3010 VT
== Op
.getOperand(1).getValueType() &&
3013 // Expand into a bunch of logical ops. Note that these ops
3014 // depend on the PPC behavior for oversized shift amounts.
3015 SDValue Lo
= Op
.getOperand(0);
3016 SDValue Hi
= Op
.getOperand(1);
3017 SDValue Amt
= Op
.getOperand(2);
3018 MVT AmtVT
= Amt
.getValueType();
3020 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
3021 DAG
.getConstant(BitWidth
, AmtVT
), Amt
);
3022 SDValue Tmp2
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Amt
);
3023 SDValue Tmp3
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Tmp1
);
3024 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
3025 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
3026 DAG
.getConstant(-BitWidth
, AmtVT
));
3027 SDValue Tmp6
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Tmp5
);
3028 SDValue OutHi
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
3029 SDValue OutLo
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Amt
);
3030 SDValue OutOps
[] = { OutLo
, OutHi
};
3031 return DAG
.getMergeValues(OutOps
, 2, dl
);
3034 SDValue
PPCTargetLowering::LowerSRL_PARTS(SDValue Op
, SelectionDAG
&DAG
) {
3035 MVT VT
= Op
.getValueType();
3036 DebugLoc dl
= Op
.getDebugLoc();
3037 unsigned BitWidth
= VT
.getSizeInBits();
3038 assert(Op
.getNumOperands() == 3 &&
3039 VT
== Op
.getOperand(1).getValueType() &&
3042 // Expand into a bunch of logical ops. Note that these ops
3043 // depend on the PPC behavior for oversized shift amounts.
3044 SDValue Lo
= Op
.getOperand(0);
3045 SDValue Hi
= Op
.getOperand(1);
3046 SDValue Amt
= Op
.getOperand(2);
3047 MVT AmtVT
= Amt
.getValueType();
3049 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
3050 DAG
.getConstant(BitWidth
, AmtVT
), Amt
);
3051 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
3052 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
3053 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
3054 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
3055 DAG
.getConstant(-BitWidth
, AmtVT
));
3056 SDValue Tmp6
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Tmp5
);
3057 SDValue OutLo
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
3058 SDValue OutHi
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Amt
);
3059 SDValue OutOps
[] = { OutLo
, OutHi
};
3060 return DAG
.getMergeValues(OutOps
, 2, dl
);
3063 SDValue
PPCTargetLowering::LowerSRA_PARTS(SDValue Op
, SelectionDAG
&DAG
) {
3064 DebugLoc dl
= Op
.getDebugLoc();
3065 MVT VT
= Op
.getValueType();
3066 unsigned BitWidth
= VT
.getSizeInBits();
3067 assert(Op
.getNumOperands() == 3 &&
3068 VT
== Op
.getOperand(1).getValueType() &&
3071 // Expand into a bunch of logical ops, followed by a select_cc.
3072 SDValue Lo
= Op
.getOperand(0);
3073 SDValue Hi
= Op
.getOperand(1);
3074 SDValue Amt
= Op
.getOperand(2);
3075 MVT AmtVT
= Amt
.getValueType();
3077 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
3078 DAG
.getConstant(BitWidth
, AmtVT
), Amt
);
3079 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
3080 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
3081 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
3082 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
3083 DAG
.getConstant(-BitWidth
, AmtVT
));
3084 SDValue Tmp6
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Tmp5
);
3085 SDValue OutHi
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Amt
);
3086 SDValue OutLo
= DAG
.getSelectCC(dl
, Tmp5
, DAG
.getConstant(0, AmtVT
),
3087 Tmp4
, Tmp6
, ISD::SETLE
);
3088 SDValue OutOps
[] = { OutLo
, OutHi
};
3089 return DAG
.getMergeValues(OutOps
, 2, dl
);
3092 //===----------------------------------------------------------------------===//
3093 // Vector related lowering.
3096 /// BuildSplatI - Build a canonical splati of Val with an element size of
3097 /// SplatSize. Cast the result to VT.
3098 static SDValue
BuildSplatI(int Val
, unsigned SplatSize
, MVT VT
,
3099 SelectionDAG
&DAG
, DebugLoc dl
) {
3100 assert(Val
>= -16 && Val
<= 15 && "vsplti is out of range!");
3102 static const MVT VTys
[] = { // canonical VT to use for each size.
3103 MVT::v16i8
, MVT::v8i16
, MVT::Other
, MVT::v4i32
3106 MVT ReqVT
= VT
!= MVT::Other
? VT
: VTys
[SplatSize
-1];
3108 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
3112 MVT CanonicalVT
= VTys
[SplatSize
-1];
3114 // Build a canonical splat for this value.
3115 SDValue Elt
= DAG
.getConstant(Val
, CanonicalVT
.getVectorElementType());
3116 SmallVector
<SDValue
, 8> Ops
;
3117 Ops
.assign(CanonicalVT
.getVectorNumElements(), Elt
);
3118 SDValue Res
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, CanonicalVT
,
3119 &Ops
[0], Ops
.size());
3120 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, ReqVT
, Res
);
3123 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
3124 /// specified intrinsic ID.
3125 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue LHS
, SDValue RHS
,
3126 SelectionDAG
&DAG
, DebugLoc dl
,
3127 MVT DestVT
= MVT::Other
) {
3128 if (DestVT
== MVT::Other
) DestVT
= LHS
.getValueType();
3129 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
3130 DAG
.getConstant(IID
, MVT::i32
), LHS
, RHS
);
3133 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
3134 /// specified intrinsic ID.
3135 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op0
, SDValue Op1
,
3136 SDValue Op2
, SelectionDAG
&DAG
,
3137 DebugLoc dl
, MVT DestVT
= MVT::Other
) {
3138 if (DestVT
== MVT::Other
) DestVT
= Op0
.getValueType();
3139 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
3140 DAG
.getConstant(IID
, MVT::i32
), Op0
, Op1
, Op2
);
3144 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
3145 /// amount. The result has the specified value type.
3146 static SDValue
BuildVSLDOI(SDValue LHS
, SDValue RHS
, unsigned Amt
,
3147 MVT VT
, SelectionDAG
&DAG
, DebugLoc dl
) {
3148 // Force LHS/RHS to be the right type.
3149 LHS
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v16i8
, LHS
);
3150 RHS
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v16i8
, RHS
);
3153 for (unsigned i
= 0; i
!= 16; ++i
)
3155 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, LHS
, RHS
, Ops
);
3156 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, T
);
3159 // If this is a case we can't handle, return null and let the default
3160 // expansion code take care of it. If we CAN select this case, and if it
3161 // selects to a single instruction, return Op. Otherwise, if we can codegen
3162 // this case more efficiently than a constant pool load, lower it to the
3163 // sequence of ops that should be used.
3164 SDValue
PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op
, SelectionDAG
&DAG
) {
3165 DebugLoc dl
= Op
.getDebugLoc();
3166 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
3167 assert(BVN
!= 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
3169 // Check if this is a splat of a constant value.
3170 APInt APSplatBits
, APSplatUndef
;
3171 unsigned SplatBitSize
;
3173 if (! BVN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
3174 HasAnyUndefs
) || SplatBitSize
> 32)
3177 unsigned SplatBits
= APSplatBits
.getZExtValue();
3178 unsigned SplatUndef
= APSplatUndef
.getZExtValue();
3179 unsigned SplatSize
= SplatBitSize
/ 8;
3181 // First, handle single instruction cases.
3184 if (SplatBits
== 0) {
3185 // Canonicalize all zero vectors to be v4i32.
3186 if (Op
.getValueType() != MVT::v4i32
|| HasAnyUndefs
) {
3187 SDValue Z
= DAG
.getConstant(0, MVT::i32
);
3188 Z
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v4i32
, Z
, Z
, Z
, Z
);
3189 Op
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Z
);
3194 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
3195 int32_t SextVal
= (int32_t(SplatBits
<< (32-SplatBitSize
)) >>
3197 if (SextVal
>= -16 && SextVal
<= 15)
3198 return BuildSplatI(SextVal
, SplatSize
, Op
.getValueType(), DAG
, dl
);
3201 // Two instruction sequences.
3203 // If this value is in the range [-32,30] and is even, use:
3204 // tmp = VSPLTI[bhw], result = add tmp, tmp
3205 if (SextVal
>= -32 && SextVal
<= 30 && (SextVal
& 1) == 0) {
3206 SDValue Res
= BuildSplatI(SextVal
>> 1, SplatSize
, MVT::Other
, DAG
, dl
);
3207 Res
= DAG
.getNode(ISD::ADD
, dl
, Res
.getValueType(), Res
, Res
);
3208 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Res
);
3211 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
3212 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
3214 if (SplatSize
== 4 && SplatBits
== (0x7FFFFFFF&~SplatUndef
)) {
3215 // Make -1 and vspltisw -1:
3216 SDValue OnesV
= BuildSplatI(-1, 4, MVT::v4i32
, DAG
, dl
);
3218 // Make the VSLW intrinsic, computing 0x8000_0000.
3219 SDValue Res
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, OnesV
,
3222 // xor by OnesV to invert it.
3223 Res
= DAG
.getNode(ISD::XOR
, dl
, MVT::v4i32
, Res
, OnesV
);
3224 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Res
);
3227 // Check to see if this is a wide variety of vsplti*, binop self cases.
3228 static const signed char SplatCsts
[] = {
3229 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
3230 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
3233 for (unsigned idx
= 0; idx
< array_lengthof(SplatCsts
); ++idx
) {
3234 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
3235 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
3236 int i
= SplatCsts
[idx
];
3238 // Figure out what shift amount will be used by altivec if shifted by i in
3240 unsigned TypeShiftAmt
= i
& (SplatBitSize
-1);
3242 // vsplti + shl self.
3243 if (SextVal
== (i
<< (int)TypeShiftAmt
)) {
3244 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
3245 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
3246 Intrinsic::ppc_altivec_vslb
, Intrinsic::ppc_altivec_vslh
, 0,
3247 Intrinsic::ppc_altivec_vslw
3249 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
3250 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Res
);
3253 // vsplti + srl self.
3254 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
3255 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
3256 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
3257 Intrinsic::ppc_altivec_vsrb
, Intrinsic::ppc_altivec_vsrh
, 0,
3258 Intrinsic::ppc_altivec_vsrw
3260 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
3261 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Res
);
3264 // vsplti + sra self.
3265 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
3266 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
3267 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
3268 Intrinsic::ppc_altivec_vsrab
, Intrinsic::ppc_altivec_vsrah
, 0,
3269 Intrinsic::ppc_altivec_vsraw
3271 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
3272 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Res
);
3275 // vsplti + rol self.
3276 if (SextVal
== (int)(((unsigned)i
<< TypeShiftAmt
) |
3277 ((unsigned)i
>> (SplatBitSize
-TypeShiftAmt
)))) {
3278 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
3279 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
3280 Intrinsic::ppc_altivec_vrlb
, Intrinsic::ppc_altivec_vrlh
, 0,
3281 Intrinsic::ppc_altivec_vrlw
3283 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
3284 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Res
);
3287 // t = vsplti c, result = vsldoi t, t, 1
3288 if (SextVal
== ((i
<< 8) | (i
>> (TypeShiftAmt
-8)))) {
3289 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
3290 return BuildVSLDOI(T
, T
, 1, Op
.getValueType(), DAG
, dl
);
3292 // t = vsplti c, result = vsldoi t, t, 2
3293 if (SextVal
== ((i
<< 16) | (i
>> (TypeShiftAmt
-16)))) {
3294 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
3295 return BuildVSLDOI(T
, T
, 2, Op
.getValueType(), DAG
, dl
);
3297 // t = vsplti c, result = vsldoi t, t, 3
3298 if (SextVal
== ((i
<< 24) | (i
>> (TypeShiftAmt
-24)))) {
3299 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
3300 return BuildVSLDOI(T
, T
, 3, Op
.getValueType(), DAG
, dl
);
3304 // Three instruction sequences.
3306 // Odd, in range [17,31]: (vsplti C)-(vsplti -16).
3307 if (SextVal
>= 0 && SextVal
<= 31) {
3308 SDValue LHS
= BuildSplatI(SextVal
-16, SplatSize
, MVT::Other
, DAG
, dl
);
3309 SDValue RHS
= BuildSplatI(-16, SplatSize
, MVT::Other
, DAG
, dl
);
3310 LHS
= DAG
.getNode(ISD::SUB
, dl
, LHS
.getValueType(), LHS
, RHS
);
3311 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), LHS
);
3313 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16).
3314 if (SextVal
>= -31 && SextVal
<= 0) {
3315 SDValue LHS
= BuildSplatI(SextVal
+16, SplatSize
, MVT::Other
, DAG
, dl
);
3316 SDValue RHS
= BuildSplatI(-16, SplatSize
, MVT::Other
, DAG
, dl
);
3317 LHS
= DAG
.getNode(ISD::ADD
, dl
, LHS
.getValueType(), LHS
, RHS
);
3318 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), LHS
);
3324 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
3325 /// the specified operations to build the shuffle.
3326 static SDValue
GeneratePerfectShuffle(unsigned PFEntry
, SDValue LHS
,
3327 SDValue RHS
, SelectionDAG
&DAG
,
3329 unsigned OpNum
= (PFEntry
>> 26) & 0x0F;
3330 unsigned LHSID
= (PFEntry
>> 13) & ((1 << 13)-1);
3331 unsigned RHSID
= (PFEntry
>> 0) & ((1 << 13)-1);
3334 OP_COPY
= 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
3346 if (OpNum
== OP_COPY
) {
3347 if (LHSID
== (1*9+2)*9+3) return LHS
;
3348 assert(LHSID
== ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
3352 SDValue OpLHS
, OpRHS
;
3353 OpLHS
= GeneratePerfectShuffle(PerfectShuffleTable
[LHSID
], LHS
, RHS
, DAG
, dl
);
3354 OpRHS
= GeneratePerfectShuffle(PerfectShuffleTable
[RHSID
], LHS
, RHS
, DAG
, dl
);
3358 default: assert(0 && "Unknown i32 permute!");
3360 ShufIdxs
[ 0] = 0; ShufIdxs
[ 1] = 1; ShufIdxs
[ 2] = 2; ShufIdxs
[ 3] = 3;
3361 ShufIdxs
[ 4] = 16; ShufIdxs
[ 5] = 17; ShufIdxs
[ 6] = 18; ShufIdxs
[ 7] = 19;
3362 ShufIdxs
[ 8] = 4; ShufIdxs
[ 9] = 5; ShufIdxs
[10] = 6; ShufIdxs
[11] = 7;
3363 ShufIdxs
[12] = 20; ShufIdxs
[13] = 21; ShufIdxs
[14] = 22; ShufIdxs
[15] = 23;
3366 ShufIdxs
[ 0] = 8; ShufIdxs
[ 1] = 9; ShufIdxs
[ 2] = 10; ShufIdxs
[ 3] = 11;
3367 ShufIdxs
[ 4] = 24; ShufIdxs
[ 5] = 25; ShufIdxs
[ 6] = 26; ShufIdxs
[ 7] = 27;
3368 ShufIdxs
[ 8] = 12; ShufIdxs
[ 9] = 13; ShufIdxs
[10] = 14; ShufIdxs
[11] = 15;
3369 ShufIdxs
[12] = 28; ShufIdxs
[13] = 29; ShufIdxs
[14] = 30; ShufIdxs
[15] = 31;
3372 for (unsigned i
= 0; i
!= 16; ++i
)
3373 ShufIdxs
[i
] = (i
&3)+0;
3376 for (unsigned i
= 0; i
!= 16; ++i
)
3377 ShufIdxs
[i
] = (i
&3)+4;
3380 for (unsigned i
= 0; i
!= 16; ++i
)
3381 ShufIdxs
[i
] = (i
&3)+8;
3384 for (unsigned i
= 0; i
!= 16; ++i
)
3385 ShufIdxs
[i
] = (i
&3)+12;
3388 return BuildVSLDOI(OpLHS
, OpRHS
, 4, OpLHS
.getValueType(), DAG
, dl
);
3390 return BuildVSLDOI(OpLHS
, OpRHS
, 8, OpLHS
.getValueType(), DAG
, dl
);
3392 return BuildVSLDOI(OpLHS
, OpRHS
, 12, OpLHS
.getValueType(), DAG
, dl
);
3394 MVT VT
= OpLHS
.getValueType();
3395 OpLHS
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v16i8
, OpLHS
);
3396 OpRHS
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v16i8
, OpRHS
);
3397 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, OpLHS
, OpRHS
, ShufIdxs
);
3398 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, VT
, T
);
3401 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
3402 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
3403 /// return the code it can be lowered into. Worst case, it can always be
3404 /// lowered into a vperm.
3405 SDValue
PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
,
3406 SelectionDAG
&DAG
) {
3407 DebugLoc dl
= Op
.getDebugLoc();
3408 SDValue V1
= Op
.getOperand(0);
3409 SDValue V2
= Op
.getOperand(1);
3410 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(Op
);
3411 MVT VT
= Op
.getValueType();
3413 // Cases that are handled by instructions that take permute immediates
3414 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
3415 // selected by the instruction selector.
3416 if (V2
.getOpcode() == ISD::UNDEF
) {
3417 if (PPC::isSplatShuffleMask(SVOp
, 1) ||
3418 PPC::isSplatShuffleMask(SVOp
, 2) ||
3419 PPC::isSplatShuffleMask(SVOp
, 4) ||
3420 PPC::isVPKUWUMShuffleMask(SVOp
, true) ||
3421 PPC::isVPKUHUMShuffleMask(SVOp
, true) ||
3422 PPC::isVSLDOIShuffleMask(SVOp
, true) != -1 ||
3423 PPC::isVMRGLShuffleMask(SVOp
, 1, true) ||
3424 PPC::isVMRGLShuffleMask(SVOp
, 2, true) ||
3425 PPC::isVMRGLShuffleMask(SVOp
, 4, true) ||
3426 PPC::isVMRGHShuffleMask(SVOp
, 1, true) ||
3427 PPC::isVMRGHShuffleMask(SVOp
, 2, true) ||
3428 PPC::isVMRGHShuffleMask(SVOp
, 4, true)) {
3433 // Altivec has a variety of "shuffle immediates" that take two vector inputs
3434 // and produce a fixed permutation. If any of these match, do not lower to
3436 if (PPC::isVPKUWUMShuffleMask(SVOp
, false) ||
3437 PPC::isVPKUHUMShuffleMask(SVOp
, false) ||
3438 PPC::isVSLDOIShuffleMask(SVOp
, false) != -1 ||
3439 PPC::isVMRGLShuffleMask(SVOp
, 1, false) ||
3440 PPC::isVMRGLShuffleMask(SVOp
, 2, false) ||
3441 PPC::isVMRGLShuffleMask(SVOp
, 4, false) ||
3442 PPC::isVMRGHShuffleMask(SVOp
, 1, false) ||
3443 PPC::isVMRGHShuffleMask(SVOp
, 2, false) ||
3444 PPC::isVMRGHShuffleMask(SVOp
, 4, false))
3447 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
3448 // perfect shuffle table to emit an optimal matching sequence.
3449 SmallVector
<int, 16> PermMask
;
3450 SVOp
->getMask(PermMask
);
3452 unsigned PFIndexes
[4];
3453 bool isFourElementShuffle
= true;
3454 for (unsigned i
= 0; i
!= 4 && isFourElementShuffle
; ++i
) { // Element number
3455 unsigned EltNo
= 8; // Start out undef.
3456 for (unsigned j
= 0; j
!= 4; ++j
) { // Intra-element byte.
3457 if (PermMask
[i
*4+j
] < 0)
3458 continue; // Undef, ignore it.
3460 unsigned ByteSource
= PermMask
[i
*4+j
];
3461 if ((ByteSource
& 3) != j
) {
3462 isFourElementShuffle
= false;
3467 EltNo
= ByteSource
/4;
3468 } else if (EltNo
!= ByteSource
/4) {
3469 isFourElementShuffle
= false;
3473 PFIndexes
[i
] = EltNo
;
3476 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
3477 // perfect shuffle vector to determine if it is cost effective to do this as
3478 // discrete instructions, or whether we should use a vperm.
3479 if (isFourElementShuffle
) {
3480 // Compute the index in the perfect shuffle table.
3481 unsigned PFTableIndex
=
3482 PFIndexes
[0]*9*9*9+PFIndexes
[1]*9*9+PFIndexes
[2]*9+PFIndexes
[3];
3484 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
3485 unsigned Cost
= (PFEntry
>> 30);
3487 // Determining when to avoid vperm is tricky. Many things affect the cost
3488 // of vperm, particularly how many times the perm mask needs to be computed.
3489 // For example, if the perm mask can be hoisted out of a loop or is already
3490 // used (perhaps because there are multiple permutes with the same shuffle
3491 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
3492 // the loop requires an extra register.
3494 // As a compromise, we only emit discrete instructions if the shuffle can be
3495 // generated in 3 or fewer operations. When we have loop information
3496 // available, if this block is within a loop, we should avoid using vperm
3497 // for 3-operation perms and use a constant pool load instead.
3499 return GeneratePerfectShuffle(PFEntry
, V1
, V2
, DAG
, dl
);
3502 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
3503 // vector that will get spilled to the constant pool.
3504 if (V2
.getOpcode() == ISD::UNDEF
) V2
= V1
;
3506 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
3507 // that it is in input element units, not in bytes. Convert now.
3508 MVT EltVT
= V1
.getValueType().getVectorElementType();
3509 unsigned BytesPerElement
= EltVT
.getSizeInBits()/8;
3511 SmallVector
<SDValue
, 16> ResultMask
;
3512 for (unsigned i
= 0, e
= VT
.getVectorNumElements(); i
!= e
; ++i
) {
3513 unsigned SrcElt
= PermMask
[i
] < 0 ? 0 : PermMask
[i
];
3515 for (unsigned j
= 0; j
!= BytesPerElement
; ++j
)
3516 ResultMask
.push_back(DAG
.getConstant(SrcElt
*BytesPerElement
+j
,
3520 SDValue VPermMask
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v16i8
,
3521 &ResultMask
[0], ResultMask
.size());
3522 return DAG
.getNode(PPCISD::VPERM
, dl
, V1
.getValueType(), V1
, V2
, VPermMask
);
3525 /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an
3526 /// altivec comparison. If it is, return true and fill in Opc/isDot with
3527 /// information about the intrinsic.
3528 static bool getAltivecCompareInfo(SDValue Intrin
, int &CompareOpc
,
3530 unsigned IntrinsicID
=
3531 cast
<ConstantSDNode
>(Intrin
.getOperand(0))->getZExtValue();
3534 switch (IntrinsicID
) {
3535 default: return false;
3536 // Comparison predicates.
3537 case Intrinsic::ppc_altivec_vcmpbfp_p
: CompareOpc
= 966; isDot
= 1; break;
3538 case Intrinsic::ppc_altivec_vcmpeqfp_p
: CompareOpc
= 198; isDot
= 1; break;
3539 case Intrinsic::ppc_altivec_vcmpequb_p
: CompareOpc
= 6; isDot
= 1; break;
3540 case Intrinsic::ppc_altivec_vcmpequh_p
: CompareOpc
= 70; isDot
= 1; break;
3541 case Intrinsic::ppc_altivec_vcmpequw_p
: CompareOpc
= 134; isDot
= 1; break;
3542 case Intrinsic::ppc_altivec_vcmpgefp_p
: CompareOpc
= 454; isDot
= 1; break;
3543 case Intrinsic::ppc_altivec_vcmpgtfp_p
: CompareOpc
= 710; isDot
= 1; break;
3544 case Intrinsic::ppc_altivec_vcmpgtsb_p
: CompareOpc
= 774; isDot
= 1; break;
3545 case Intrinsic::ppc_altivec_vcmpgtsh_p
: CompareOpc
= 838; isDot
= 1; break;
3546 case Intrinsic::ppc_altivec_vcmpgtsw_p
: CompareOpc
= 902; isDot
= 1; break;
3547 case Intrinsic::ppc_altivec_vcmpgtub_p
: CompareOpc
= 518; isDot
= 1; break;
3548 case Intrinsic::ppc_altivec_vcmpgtuh_p
: CompareOpc
= 582; isDot
= 1; break;
3549 case Intrinsic::ppc_altivec_vcmpgtuw_p
: CompareOpc
= 646; isDot
= 1; break;
3551 // Normal Comparisons.
3552 case Intrinsic::ppc_altivec_vcmpbfp
: CompareOpc
= 966; isDot
= 0; break;
3553 case Intrinsic::ppc_altivec_vcmpeqfp
: CompareOpc
= 198; isDot
= 0; break;
3554 case Intrinsic::ppc_altivec_vcmpequb
: CompareOpc
= 6; isDot
= 0; break;
3555 case Intrinsic::ppc_altivec_vcmpequh
: CompareOpc
= 70; isDot
= 0; break;
3556 case Intrinsic::ppc_altivec_vcmpequw
: CompareOpc
= 134; isDot
= 0; break;
3557 case Intrinsic::ppc_altivec_vcmpgefp
: CompareOpc
= 454; isDot
= 0; break;
3558 case Intrinsic::ppc_altivec_vcmpgtfp
: CompareOpc
= 710; isDot
= 0; break;
3559 case Intrinsic::ppc_altivec_vcmpgtsb
: CompareOpc
= 774; isDot
= 0; break;
3560 case Intrinsic::ppc_altivec_vcmpgtsh
: CompareOpc
= 838; isDot
= 0; break;
3561 case Intrinsic::ppc_altivec_vcmpgtsw
: CompareOpc
= 902; isDot
= 0; break;
3562 case Intrinsic::ppc_altivec_vcmpgtub
: CompareOpc
= 518; isDot
= 0; break;
3563 case Intrinsic::ppc_altivec_vcmpgtuh
: CompareOpc
= 582; isDot
= 0; break;
3564 case Intrinsic::ppc_altivec_vcmpgtuw
: CompareOpc
= 646; isDot
= 0; break;
3569 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
3570 /// lower, do it, otherwise return null.
3571 SDValue
PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
3572 SelectionDAG
&DAG
) {
3573 // If this is a lowered altivec predicate compare, CompareOpc is set to the
3574 // opcode number of the comparison.
3575 DebugLoc dl
= Op
.getDebugLoc();
3578 if (!getAltivecCompareInfo(Op
, CompareOpc
, isDot
))
3579 return SDValue(); // Don't custom lower most intrinsics.
3581 // If this is a non-dot comparison, make the VCMP node and we are done.
3583 SDValue Tmp
= DAG
.getNode(PPCISD::VCMP
, dl
, Op
.getOperand(2).getValueType(),
3584 Op
.getOperand(1), Op
.getOperand(2),
3585 DAG
.getConstant(CompareOpc
, MVT::i32
));
3586 return DAG
.getNode(ISD::BIT_CONVERT
, dl
, Op
.getValueType(), Tmp
);
3589 // Create the PPCISD altivec 'dot' comparison node.
3591 Op
.getOperand(2), // LHS
3592 Op
.getOperand(3), // RHS
3593 DAG
.getConstant(CompareOpc
, MVT::i32
)
3595 std::vector
<MVT
> VTs
;
3596 VTs
.push_back(Op
.getOperand(2).getValueType());
3597 VTs
.push_back(MVT::Flag
);
3598 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
, 3);
3600 // Now that we have the comparison, emit a copy from the CR to a GPR.
3601 // This is flagged to the above dot comparison.
3602 SDValue Flags
= DAG
.getNode(PPCISD::MFCR
, dl
, MVT::i32
,
3603 DAG
.getRegister(PPC::CR6
, MVT::i32
),
3604 CompNode
.getValue(1));
3606 // Unpack the result based on how the target uses it.
3607 unsigned BitNo
; // Bit # of CR6.
3608 bool InvertBit
; // Invert result?
3609 switch (cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue()) {
3610 default: // Can't happen, don't crash on invalid number though.
3611 case 0: // Return the value of the EQ bit of CR6.
3612 BitNo
= 0; InvertBit
= false;
3614 case 1: // Return the inverted value of the EQ bit of CR6.
3615 BitNo
= 0; InvertBit
= true;
3617 case 2: // Return the value of the LT bit of CR6.
3618 BitNo
= 2; InvertBit
= false;
3620 case 3: // Return the inverted value of the LT bit of CR6.
3621 BitNo
= 2; InvertBit
= true;
3625 // Shift the bit into the low position.
3626 Flags
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
, Flags
,
3627 DAG
.getConstant(8-(3-BitNo
), MVT::i32
));
3629 Flags
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Flags
,
3630 DAG
.getConstant(1, MVT::i32
));
3632 // If we are supposed to, toggle the bit.
3634 Flags
= DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, Flags
,
3635 DAG
.getConstant(1, MVT::i32
));
3639 SDValue
PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op
,
3640 SelectionDAG
&DAG
) {
3641 DebugLoc dl
= Op
.getDebugLoc();
3642 // Create a stack slot that is 16-byte aligned.
3643 MachineFrameInfo
*FrameInfo
= DAG
.getMachineFunction().getFrameInfo();
3644 int FrameIdx
= FrameInfo
->CreateStackObject(16, 16);
3645 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
3646 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
3648 // Store the input value into Value#0 of the stack slot.
3649 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
,
3650 Op
.getOperand(0), FIdx
, NULL
, 0);
3652 return DAG
.getLoad(Op
.getValueType(), dl
, Store
, FIdx
, NULL
, 0);
3655 SDValue
PPCTargetLowering::LowerMUL(SDValue Op
, SelectionDAG
&DAG
) {
3656 DebugLoc dl
= Op
.getDebugLoc();
3657 if (Op
.getValueType() == MVT::v4i32
) {
3658 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
3660 SDValue Zero
= BuildSplatI( 0, 1, MVT::v4i32
, DAG
, dl
);
3661 SDValue Neg16
= BuildSplatI(-16, 4, MVT::v4i32
, DAG
, dl
);//+16 as shift amt.
3663 SDValue RHSSwap
= // = vrlw RHS, 16
3664 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw
, RHS
, Neg16
, DAG
, dl
);
3666 // Shrinkify inputs to v8i16.
3667 LHS
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v8i16
, LHS
);
3668 RHS
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v8i16
, RHS
);
3669 RHSSwap
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v8i16
, RHSSwap
);
3671 // Low parts multiplied together, generating 32-bit results (we ignore the
3673 SDValue LoProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh
,
3674 LHS
, RHS
, DAG
, dl
, MVT::v4i32
);
3676 SDValue HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm
,
3677 LHS
, RHSSwap
, Zero
, DAG
, dl
, MVT::v4i32
);
3678 // Shift the high parts up 16 bits.
3679 HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, HiProd
,
3681 return DAG
.getNode(ISD::ADD
, dl
, MVT::v4i32
, LoProd
, HiProd
);
3682 } else if (Op
.getValueType() == MVT::v8i16
) {
3683 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
3685 SDValue Zero
= BuildSplatI(0, 1, MVT::v8i16
, DAG
, dl
);
3687 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm
,
3688 LHS
, RHS
, Zero
, DAG
, dl
);
3689 } else if (Op
.getValueType() == MVT::v16i8
) {
3690 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
3692 // Multiply the even 8-bit parts, producing 16-bit sums.
3693 SDValue EvenParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub
,
3694 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
3695 EvenParts
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v16i8
, EvenParts
);
3697 // Multiply the odd 8-bit parts, producing 16-bit sums.
3698 SDValue OddParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub
,
3699 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
3700 OddParts
= DAG
.getNode(ISD::BIT_CONVERT
, dl
, MVT::v16i8
, OddParts
);
3702 // Merge the results together.
3704 for (unsigned i
= 0; i
!= 8; ++i
) {
3706 Ops
[i
*2+1] = 2*i
+1+16;
3708 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, EvenParts
, OddParts
, Ops
);
3710 assert(0 && "Unknown mul to lower!");
3715 /// LowerOperation - Provide custom lowering hooks for some operations.
3717 SDValue
PPCTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) {
3718 switch (Op
.getOpcode()) {
3719 default: assert(0 && "Wasn't expecting to be able to lower this!");
3720 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
3721 case ISD::GlobalAddress
: return LowerGlobalAddress(Op
, DAG
);
3722 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
3723 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
3724 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
3725 case ISD::TRAMPOLINE
: return LowerTRAMPOLINE(Op
, DAG
);
3727 return LowerVASTART(Op
, DAG
, VarArgsFrameIndex
, VarArgsStackOffset
,
3728 VarArgsNumGPR
, VarArgsNumFPR
, PPCSubTarget
);
3731 return LowerVAARG(Op
, DAG
, VarArgsFrameIndex
, VarArgsStackOffset
,
3732 VarArgsNumGPR
, VarArgsNumFPR
, PPCSubTarget
);
3734 case ISD::FORMAL_ARGUMENTS
:
3735 return LowerFORMAL_ARGUMENTS(Op
, DAG
, VarArgsFrameIndex
,
3736 VarArgsStackOffset
, VarArgsNumGPR
,
3737 VarArgsNumFPR
, PPCSubTarget
);
3739 case ISD::CALL
: return LowerCALL(Op
, DAG
, PPCSubTarget
,
3740 getTargetMachine());
3741 case ISD::RET
: return LowerRET(Op
, DAG
, getTargetMachine());
3742 case ISD::STACKRESTORE
: return LowerSTACKRESTORE(Op
, DAG
, PPCSubTarget
);
3743 case ISD::DYNAMIC_STACKALLOC
:
3744 return LowerDYNAMIC_STACKALLOC(Op
, DAG
, PPCSubTarget
);
3746 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
3747 case ISD::FP_TO_SINT
: return LowerFP_TO_SINT(Op
, DAG
,
3749 case ISD::SINT_TO_FP
: return LowerSINT_TO_FP(Op
, DAG
);
3750 case ISD::FLT_ROUNDS_
: return LowerFLT_ROUNDS_(Op
, DAG
);
3752 // Lower 64-bit shifts.
3753 case ISD::SHL_PARTS
: return LowerSHL_PARTS(Op
, DAG
);
3754 case ISD::SRL_PARTS
: return LowerSRL_PARTS(Op
, DAG
);
3755 case ISD::SRA_PARTS
: return LowerSRA_PARTS(Op
, DAG
);
3757 // Vector-related lowering.
3758 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
3759 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
3760 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
3761 case ISD::SCALAR_TO_VECTOR
: return LowerSCALAR_TO_VECTOR(Op
, DAG
);
3762 case ISD::MUL
: return LowerMUL(Op
, DAG
);
3764 // Frame & Return address.
3765 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
3766 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
3771 void PPCTargetLowering::ReplaceNodeResults(SDNode
*N
,
3772 SmallVectorImpl
<SDValue
>&Results
,
3773 SelectionDAG
&DAG
) {
3774 DebugLoc dl
= N
->getDebugLoc();
3775 switch (N
->getOpcode()) {
3777 assert(false && "Do not know how to custom type legalize this operation!");
3779 case ISD::FP_ROUND_INREG
: {
3780 assert(N
->getValueType(0) == MVT::ppcf128
);
3781 assert(N
->getOperand(0).getValueType() == MVT::ppcf128
);
3782 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
3783 MVT::f64
, N
->getOperand(0),
3784 DAG
.getIntPtrConstant(0));
3785 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
3786 MVT::f64
, N
->getOperand(0),
3787 DAG
.getIntPtrConstant(1));
3789 // This sequence changes FPSCR to do round-to-zero, adds the two halves
3790 // of the long double, and puts FPSCR back the way it was. We do not
3791 // actually model FPSCR.
3792 std::vector
<MVT
> NodeTys
;
3793 SDValue Ops
[4], Result
, MFFSreg
, InFlag
, FPreg
;
3795 NodeTys
.push_back(MVT::f64
); // Return register
3796 NodeTys
.push_back(MVT::Flag
); // Returns a flag for later insns
3797 Result
= DAG
.getNode(PPCISD::MFFS
, dl
, NodeTys
, &InFlag
, 0);
3798 MFFSreg
= Result
.getValue(0);
3799 InFlag
= Result
.getValue(1);
3802 NodeTys
.push_back(MVT::Flag
); // Returns a flag
3803 Ops
[0] = DAG
.getConstant(31, MVT::i32
);
3805 Result
= DAG
.getNode(PPCISD::MTFSB1
, dl
, NodeTys
, Ops
, 2);
3806 InFlag
= Result
.getValue(0);
3809 NodeTys
.push_back(MVT::Flag
); // Returns a flag
3810 Ops
[0] = DAG
.getConstant(30, MVT::i32
);
3812 Result
= DAG
.getNode(PPCISD::MTFSB0
, dl
, NodeTys
, Ops
, 2);
3813 InFlag
= Result
.getValue(0);
3816 NodeTys
.push_back(MVT::f64
); // result of add
3817 NodeTys
.push_back(MVT::Flag
); // Returns a flag
3821 Result
= DAG
.getNode(PPCISD::FADDRTZ
, dl
, NodeTys
, Ops
, 3);
3822 FPreg
= Result
.getValue(0);
3823 InFlag
= Result
.getValue(1);
3826 NodeTys
.push_back(MVT::f64
);
3827 Ops
[0] = DAG
.getConstant(1, MVT::i32
);
3831 Result
= DAG
.getNode(PPCISD::MTFSF
, dl
, NodeTys
, Ops
, 4);
3832 FPreg
= Result
.getValue(0);
3834 // We know the low half is about to be thrown away, so just use something
3836 Results
.push_back(DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::ppcf128
,
3840 case ISD::FP_TO_SINT
:
3841 Results
.push_back(LowerFP_TO_SINT(SDValue(N
, 0), DAG
, dl
));
3847 //===----------------------------------------------------------------------===//
3848 // Other Lowering Code
3849 //===----------------------------------------------------------------------===//
3852 PPCTargetLowering::EmitAtomicBinary(MachineInstr
*MI
, MachineBasicBlock
*BB
,
3853 bool is64bit
, unsigned BinOpcode
) const {
3854 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3855 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
3857 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
3858 MachineFunction
*F
= BB
->getParent();
3859 MachineFunction::iterator It
= BB
;
3862 unsigned dest
= MI
->getOperand(0).getReg();
3863 unsigned ptrA
= MI
->getOperand(1).getReg();
3864 unsigned ptrB
= MI
->getOperand(2).getReg();
3865 unsigned incr
= MI
->getOperand(3).getReg();
3866 DebugLoc dl
= MI
->getDebugLoc();
3868 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3869 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3870 F
->insert(It
, loopMBB
);
3871 F
->insert(It
, exitMBB
);
3872 exitMBB
->transferSuccessors(BB
);
3874 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
3875 unsigned TmpReg
= (!BinOpcode
) ? incr
:
3876 RegInfo
.createVirtualRegister(
3877 is64bit
? (const TargetRegisterClass
*) &PPC::G8RCRegClass
:
3878 (const TargetRegisterClass
*) &PPC::GPRCRegClass
);
3882 // fallthrough --> loopMBB
3883 BB
->addSuccessor(loopMBB
);
3886 // l[wd]arx dest, ptr
3887 // add r0, dest, incr
3888 // st[wd]cx. r0, ptr
3890 // fallthrough --> exitMBB
3892 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::LDARX
: PPC::LWARX
), dest
)
3893 .addReg(ptrA
).addReg(ptrB
);
3895 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
).addReg(incr
).addReg(dest
);
3896 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::STDCX
: PPC::STWCX
))
3897 .addReg(TmpReg
).addReg(ptrA
).addReg(ptrB
);
3898 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
3899 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(loopMBB
);
3900 BB
->addSuccessor(loopMBB
);
3901 BB
->addSuccessor(exitMBB
);
3910 PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr
*MI
,
3911 MachineBasicBlock
*BB
,
3912 bool is8bit
, // operation
3913 unsigned BinOpcode
) const {
3914 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
3915 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
3916 // In 64 bit mode we have to use 64 bits for addresses, even though the
3917 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
3918 // registers without caring whether they're 32 or 64, but here we're
3919 // doing actual arithmetic on the addresses.
3920 bool is64bit
= PPCSubTarget
.isPPC64();
3922 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
3923 MachineFunction
*F
= BB
->getParent();
3924 MachineFunction::iterator It
= BB
;
3927 unsigned dest
= MI
->getOperand(0).getReg();
3928 unsigned ptrA
= MI
->getOperand(1).getReg();
3929 unsigned ptrB
= MI
->getOperand(2).getReg();
3930 unsigned incr
= MI
->getOperand(3).getReg();
3931 DebugLoc dl
= MI
->getDebugLoc();
3933 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3934 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
3935 F
->insert(It
, loopMBB
);
3936 F
->insert(It
, exitMBB
);
3937 exitMBB
->transferSuccessors(BB
);
3939 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
3940 const TargetRegisterClass
*RC
=
3941 is64bit
? (const TargetRegisterClass
*) &PPC::G8RCRegClass
:
3942 (const TargetRegisterClass
*) &PPC::GPRCRegClass
;
3943 unsigned PtrReg
= RegInfo
.createVirtualRegister(RC
);
3944 unsigned Shift1Reg
= RegInfo
.createVirtualRegister(RC
);
3945 unsigned ShiftReg
= RegInfo
.createVirtualRegister(RC
);
3946 unsigned Incr2Reg
= RegInfo
.createVirtualRegister(RC
);
3947 unsigned MaskReg
= RegInfo
.createVirtualRegister(RC
);
3948 unsigned Mask2Reg
= RegInfo
.createVirtualRegister(RC
);
3949 unsigned Mask3Reg
= RegInfo
.createVirtualRegister(RC
);
3950 unsigned Tmp2Reg
= RegInfo
.createVirtualRegister(RC
);
3951 unsigned Tmp3Reg
= RegInfo
.createVirtualRegister(RC
);
3952 unsigned Tmp4Reg
= RegInfo
.createVirtualRegister(RC
);
3953 unsigned TmpDestReg
= RegInfo
.createVirtualRegister(RC
);
3955 unsigned TmpReg
= (!BinOpcode
) ? Incr2Reg
: RegInfo
.createVirtualRegister(RC
);
3959 // fallthrough --> loopMBB
3960 BB
->addSuccessor(loopMBB
);
3962 // The 4-byte load must be aligned, while a char or short may be
3963 // anywhere in the word. Hence all this nasty bookkeeping code.
3964 // add ptr1, ptrA, ptrB [copy if ptrA==0]
3965 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
3966 // xori shift, shift1, 24 [16]
3967 // rlwinm ptr, ptr1, 0, 0, 29
3968 // slw incr2, incr, shift
3969 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
3970 // slw mask, mask2, shift
3972 // lwarx tmpDest, ptr
3973 // add tmp, tmpDest, incr2
3974 // andc tmp2, tmpDest, mask
3975 // and tmp3, tmp, mask
3976 // or tmp4, tmp3, tmp2
3979 // fallthrough --> exitMBB
3980 // srw dest, tmpDest, shift
3982 if (ptrA
!=PPC::R0
) {
3983 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
3984 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
3985 .addReg(ptrA
).addReg(ptrB
);
3989 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
).addReg(Ptr1Reg
)
3990 .addImm(3).addImm(27).addImm(is8bit
? 28 : 27);
3991 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::XORI8
: PPC::XORI
), ShiftReg
)
3992 .addReg(Shift1Reg
).addImm(is8bit
? 24 : 16);
3994 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
3995 .addReg(Ptr1Reg
).addImm(0).addImm(61);
3997 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
3998 .addReg(Ptr1Reg
).addImm(0).addImm(0).addImm(29);
3999 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), Incr2Reg
)
4000 .addReg(incr
).addReg(ShiftReg
);
4002 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
4004 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
4005 BuildMI(BB
, dl
, TII
->get(PPC::ORI
),Mask2Reg
).addReg(Mask3Reg
).addImm(65535);
4007 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
4008 .addReg(Mask2Reg
).addReg(ShiftReg
);
4011 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
4012 .addReg(PPC::R0
).addReg(PtrReg
);
4014 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
)
4015 .addReg(Incr2Reg
).addReg(TmpDestReg
);
4016 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ANDC8
: PPC::ANDC
), Tmp2Reg
)
4017 .addReg(TmpDestReg
).addReg(MaskReg
);
4018 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::AND8
: PPC::AND
), Tmp3Reg
)
4019 .addReg(TmpReg
).addReg(MaskReg
);
4020 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::OR8
: PPC::OR
), Tmp4Reg
)
4021 .addReg(Tmp3Reg
).addReg(Tmp2Reg
);
4022 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
4023 .addReg(Tmp4Reg
).addReg(PPC::R0
).addReg(PtrReg
);
4024 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
4025 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(loopMBB
);
4026 BB
->addSuccessor(loopMBB
);
4027 BB
->addSuccessor(exitMBB
);
4032 BuildMI(BB
, dl
, TII
->get(PPC::SRW
), dest
).addReg(TmpDestReg
).addReg(ShiftReg
);
4037 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr
*MI
,
4038 MachineBasicBlock
*BB
) const {
4039 const TargetInstrInfo
*TII
= getTargetMachine().getInstrInfo();
4041 // To "insert" these instructions we actually have to insert their
4042 // control-flow patterns.
4043 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
4044 MachineFunction::iterator It
= BB
;
4047 MachineFunction
*F
= BB
->getParent();
4049 if (MI
->getOpcode() == PPC::SELECT_CC_I4
||
4050 MI
->getOpcode() == PPC::SELECT_CC_I8
||
4051 MI
->getOpcode() == PPC::SELECT_CC_F4
||
4052 MI
->getOpcode() == PPC::SELECT_CC_F8
||
4053 MI
->getOpcode() == PPC::SELECT_CC_VRRC
) {
4055 // The incoming instruction knows the destination vreg to set, the
4056 // condition code register to branch on, the true/false values to
4057 // select between, and a branch opcode to use.
4062 // cmpTY ccX, r1, r2
4064 // fallthrough --> copy0MBB
4065 MachineBasicBlock
*thisMBB
= BB
;
4066 MachineBasicBlock
*copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4067 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4068 unsigned SelectPred
= MI
->getOperand(4).getImm();
4069 DebugLoc dl
= MI
->getDebugLoc();
4070 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
4071 .addImm(SelectPred
).addReg(MI
->getOperand(1).getReg()).addMBB(sinkMBB
);
4072 F
->insert(It
, copy0MBB
);
4073 F
->insert(It
, sinkMBB
);
4074 // Update machine-CFG edges by transferring all successors of the current
4075 // block to the new block which will contain the Phi node for the select.
4076 sinkMBB
->transferSuccessors(BB
);
4077 // Next, add the true and fallthrough blocks as its successors.
4078 BB
->addSuccessor(copy0MBB
);
4079 BB
->addSuccessor(sinkMBB
);
4082 // %FalseValue = ...
4083 // # fallthrough to sinkMBB
4086 // Update machine-CFG edges
4087 BB
->addSuccessor(sinkMBB
);
4090 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
4093 BuildMI(BB
, dl
, TII
->get(PPC::PHI
), MI
->getOperand(0).getReg())
4094 .addReg(MI
->getOperand(3).getReg()).addMBB(copy0MBB
)
4095 .addReg(MI
->getOperand(2).getReg()).addMBB(thisMBB
);
4097 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8
)
4098 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::ADD4
);
4099 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16
)
4100 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::ADD4
);
4101 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32
)
4102 BB
= EmitAtomicBinary(MI
, BB
, false, PPC::ADD4
);
4103 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64
)
4104 BB
= EmitAtomicBinary(MI
, BB
, true, PPC::ADD8
);
4106 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_AND_I8
)
4107 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::AND
);
4108 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_AND_I16
)
4109 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::AND
);
4110 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_AND_I32
)
4111 BB
= EmitAtomicBinary(MI
, BB
, false, PPC::AND
);
4112 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_AND_I64
)
4113 BB
= EmitAtomicBinary(MI
, BB
, true, PPC::AND8
);
4115 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_OR_I8
)
4116 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::OR
);
4117 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_OR_I16
)
4118 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::OR
);
4119 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_OR_I32
)
4120 BB
= EmitAtomicBinary(MI
, BB
, false, PPC::OR
);
4121 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_OR_I64
)
4122 BB
= EmitAtomicBinary(MI
, BB
, true, PPC::OR8
);
4124 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8
)
4125 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::XOR
);
4126 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16
)
4127 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::XOR
);
4128 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32
)
4129 BB
= EmitAtomicBinary(MI
, BB
, false, PPC::XOR
);
4130 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64
)
4131 BB
= EmitAtomicBinary(MI
, BB
, true, PPC::XOR8
);
4133 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8
)
4134 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::ANDC
);
4135 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16
)
4136 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::ANDC
);
4137 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32
)
4138 BB
= EmitAtomicBinary(MI
, BB
, false, PPC::ANDC
);
4139 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64
)
4140 BB
= EmitAtomicBinary(MI
, BB
, true, PPC::ANDC8
);
4142 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8
)
4143 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::SUBF
);
4144 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16
)
4145 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::SUBF
);
4146 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32
)
4147 BB
= EmitAtomicBinary(MI
, BB
, false, PPC::SUBF
);
4148 else if (MI
->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64
)
4149 BB
= EmitAtomicBinary(MI
, BB
, true, PPC::SUBF8
);
4151 else if (MI
->getOpcode() == PPC::ATOMIC_SWAP_I8
)
4152 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0);
4153 else if (MI
->getOpcode() == PPC::ATOMIC_SWAP_I16
)
4154 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0);
4155 else if (MI
->getOpcode() == PPC::ATOMIC_SWAP_I32
)
4156 BB
= EmitAtomicBinary(MI
, BB
, false, 0);
4157 else if (MI
->getOpcode() == PPC::ATOMIC_SWAP_I64
)
4158 BB
= EmitAtomicBinary(MI
, BB
, true, 0);
4160 else if (MI
->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32
||
4161 MI
->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
) {
4162 bool is64bit
= MI
->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
;
4164 unsigned dest
= MI
->getOperand(0).getReg();
4165 unsigned ptrA
= MI
->getOperand(1).getReg();
4166 unsigned ptrB
= MI
->getOperand(2).getReg();
4167 unsigned oldval
= MI
->getOperand(3).getReg();
4168 unsigned newval
= MI
->getOperand(4).getReg();
4169 DebugLoc dl
= MI
->getDebugLoc();
4171 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4172 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4173 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4174 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4175 F
->insert(It
, loop1MBB
);
4176 F
->insert(It
, loop2MBB
);
4177 F
->insert(It
, midMBB
);
4178 F
->insert(It
, exitMBB
);
4179 exitMBB
->transferSuccessors(BB
);
4183 // fallthrough --> loopMBB
4184 BB
->addSuccessor(loop1MBB
);
4187 // l[wd]arx dest, ptr
4188 // cmp[wd] dest, oldval
4191 // st[wd]cx. newval, ptr
4195 // st[wd]cx. dest, ptr
4198 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::LDARX
: PPC::LWARX
), dest
)
4199 .addReg(ptrA
).addReg(ptrB
);
4200 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::CMPD
: PPC::CMPW
), PPC::CR0
)
4201 .addReg(oldval
).addReg(dest
);
4202 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
4203 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(midMBB
);
4204 BB
->addSuccessor(loop2MBB
);
4205 BB
->addSuccessor(midMBB
);
4208 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::STDCX
: PPC::STWCX
))
4209 .addReg(newval
).addReg(ptrA
).addReg(ptrB
);
4210 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
4211 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(loop1MBB
);
4212 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
4213 BB
->addSuccessor(loop1MBB
);
4214 BB
->addSuccessor(exitMBB
);
4217 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::STDCX
: PPC::STWCX
))
4218 .addReg(dest
).addReg(ptrA
).addReg(ptrB
);
4219 BB
->addSuccessor(exitMBB
);
4224 } else if (MI
->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
||
4225 MI
->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
) {
4226 // We must use 64-bit registers for addresses when targeting 64-bit,
4227 // since we're actually doing arithmetic on them. Other registers
4229 bool is64bit
= PPCSubTarget
.isPPC64();
4230 bool is8bit
= MI
->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
;
4232 unsigned dest
= MI
->getOperand(0).getReg();
4233 unsigned ptrA
= MI
->getOperand(1).getReg();
4234 unsigned ptrB
= MI
->getOperand(2).getReg();
4235 unsigned oldval
= MI
->getOperand(3).getReg();
4236 unsigned newval
= MI
->getOperand(4).getReg();
4237 DebugLoc dl
= MI
->getDebugLoc();
4239 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4240 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4241 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4242 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
4243 F
->insert(It
, loop1MBB
);
4244 F
->insert(It
, loop2MBB
);
4245 F
->insert(It
, midMBB
);
4246 F
->insert(It
, exitMBB
);
4247 exitMBB
->transferSuccessors(BB
);
4249 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
4250 const TargetRegisterClass
*RC
=
4251 is64bit
? (const TargetRegisterClass
*) &PPC::G8RCRegClass
:
4252 (const TargetRegisterClass
*) &PPC::GPRCRegClass
;
4253 unsigned PtrReg
= RegInfo
.createVirtualRegister(RC
);
4254 unsigned Shift1Reg
= RegInfo
.createVirtualRegister(RC
);
4255 unsigned ShiftReg
= RegInfo
.createVirtualRegister(RC
);
4256 unsigned NewVal2Reg
= RegInfo
.createVirtualRegister(RC
);
4257 unsigned NewVal3Reg
= RegInfo
.createVirtualRegister(RC
);
4258 unsigned OldVal2Reg
= RegInfo
.createVirtualRegister(RC
);
4259 unsigned OldVal3Reg
= RegInfo
.createVirtualRegister(RC
);
4260 unsigned MaskReg
= RegInfo
.createVirtualRegister(RC
);
4261 unsigned Mask2Reg
= RegInfo
.createVirtualRegister(RC
);
4262 unsigned Mask3Reg
= RegInfo
.createVirtualRegister(RC
);
4263 unsigned Tmp2Reg
= RegInfo
.createVirtualRegister(RC
);
4264 unsigned Tmp4Reg
= RegInfo
.createVirtualRegister(RC
);
4265 unsigned TmpDestReg
= RegInfo
.createVirtualRegister(RC
);
4267 unsigned TmpReg
= RegInfo
.createVirtualRegister(RC
);
4270 // fallthrough --> loopMBB
4271 BB
->addSuccessor(loop1MBB
);
4273 // The 4-byte load must be aligned, while a char or short may be
4274 // anywhere in the word. Hence all this nasty bookkeeping code.
4275 // add ptr1, ptrA, ptrB [copy if ptrA==0]
4276 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
4277 // xori shift, shift1, 24 [16]
4278 // rlwinm ptr, ptr1, 0, 0, 29
4279 // slw newval2, newval, shift
4280 // slw oldval2, oldval,shift
4281 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
4282 // slw mask, mask2, shift
4283 // and newval3, newval2, mask
4284 // and oldval3, oldval2, mask
4286 // lwarx tmpDest, ptr
4287 // and tmp, tmpDest, mask
4288 // cmpw tmp, oldval3
4291 // andc tmp2, tmpDest, mask
4292 // or tmp4, tmp2, newval3
4297 // stwcx. tmpDest, ptr
4299 // srw dest, tmpDest, shift
4300 if (ptrA
!=PPC::R0
) {
4301 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
4302 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
4303 .addReg(ptrA
).addReg(ptrB
);
4307 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
).addReg(Ptr1Reg
)
4308 .addImm(3).addImm(27).addImm(is8bit
? 28 : 27);
4309 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::XORI8
: PPC::XORI
), ShiftReg
)
4310 .addReg(Shift1Reg
).addImm(is8bit
? 24 : 16);
4312 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
4313 .addReg(Ptr1Reg
).addImm(0).addImm(61);
4315 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
4316 .addReg(Ptr1Reg
).addImm(0).addImm(0).addImm(29);
4317 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), NewVal2Reg
)
4318 .addReg(newval
).addReg(ShiftReg
);
4319 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), OldVal2Reg
)
4320 .addReg(oldval
).addReg(ShiftReg
);
4322 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
4324 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
4325 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
4326 .addReg(Mask3Reg
).addImm(65535);
4328 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
4329 .addReg(Mask2Reg
).addReg(ShiftReg
);
4330 BuildMI(BB
, dl
, TII
->get(PPC::AND
), NewVal3Reg
)
4331 .addReg(NewVal2Reg
).addReg(MaskReg
);
4332 BuildMI(BB
, dl
, TII
->get(PPC::AND
), OldVal3Reg
)
4333 .addReg(OldVal2Reg
).addReg(MaskReg
);
4336 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
4337 .addReg(PPC::R0
).addReg(PtrReg
);
4338 BuildMI(BB
, dl
, TII
->get(PPC::AND
),TmpReg
)
4339 .addReg(TmpDestReg
).addReg(MaskReg
);
4340 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), PPC::CR0
)
4341 .addReg(TmpReg
).addReg(OldVal3Reg
);
4342 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
4343 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(midMBB
);
4344 BB
->addSuccessor(loop2MBB
);
4345 BB
->addSuccessor(midMBB
);
4348 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
),Tmp2Reg
)
4349 .addReg(TmpDestReg
).addReg(MaskReg
);
4350 BuildMI(BB
, dl
, TII
->get(PPC::OR
),Tmp4Reg
)
4351 .addReg(Tmp2Reg
).addReg(NewVal3Reg
);
4352 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
)).addReg(Tmp4Reg
)
4353 .addReg(PPC::R0
).addReg(PtrReg
);
4354 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
4355 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(loop1MBB
);
4356 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
4357 BB
->addSuccessor(loop1MBB
);
4358 BB
->addSuccessor(exitMBB
);
4361 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
)).addReg(TmpDestReg
)
4362 .addReg(PPC::R0
).addReg(PtrReg
);
4363 BB
->addSuccessor(exitMBB
);
4368 BuildMI(BB
, dl
, TII
->get(PPC::SRW
),dest
).addReg(TmpReg
).addReg(ShiftReg
);
4370 assert(0 && "Unexpected instr type to insert");
4373 F
->DeleteMachineInstr(MI
); // The pseudo instruction is gone now.
4377 //===----------------------------------------------------------------------===//
4378 // Target Optimization Hooks
4379 //===----------------------------------------------------------------------===//
4381 SDValue
PPCTargetLowering::PerformDAGCombine(SDNode
*N
,
4382 DAGCombinerInfo
&DCI
) const {
4383 TargetMachine
&TM
= getTargetMachine();
4384 SelectionDAG
&DAG
= DCI
.DAG
;
4385 DebugLoc dl
= N
->getDebugLoc();
4386 switch (N
->getOpcode()) {
4389 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(0))) {
4390 if (C
->getZExtValue() == 0) // 0 << V -> 0.
4391 return N
->getOperand(0);
4395 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(0))) {
4396 if (C
->getZExtValue() == 0) // 0 >>u V -> 0.
4397 return N
->getOperand(0);
4401 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(0))) {
4402 if (C
->getZExtValue() == 0 || // 0 >>s V -> 0.
4403 C
->isAllOnesValue()) // -1 >>s V -> -1.
4404 return N
->getOperand(0);
4408 case ISD::SINT_TO_FP
:
4409 if (TM
.getSubtarget
<PPCSubtarget
>().has64BitSupport()) {
4410 if (N
->getOperand(0).getOpcode() == ISD::FP_TO_SINT
) {
4411 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores.
4412 // We allow the src/dst to be either f32/f64, but the intermediate
4413 // type must be i64.
4414 if (N
->getOperand(0).getValueType() == MVT::i64
&&
4415 N
->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128
) {
4416 SDValue Val
= N
->getOperand(0).getOperand(0);
4417 if (Val
.getValueType() == MVT::f32
) {
4418 Val
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Val
);
4419 DCI
.AddToWorklist(Val
.getNode());
4422 Val
= DAG
.getNode(PPCISD::FCTIDZ
, dl
, MVT::f64
, Val
);
4423 DCI
.AddToWorklist(Val
.getNode());
4424 Val
= DAG
.getNode(PPCISD::FCFID
, dl
, MVT::f64
, Val
);
4425 DCI
.AddToWorklist(Val
.getNode());
4426 if (N
->getValueType(0) == MVT::f32
) {
4427 Val
= DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, Val
,
4428 DAG
.getIntPtrConstant(0));
4429 DCI
.AddToWorklist(Val
.getNode());
4432 } else if (N
->getOperand(0).getValueType() == MVT::i32
) {
4433 // If the intermediate type is i32, we can avoid the load/store here
4440 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
4441 if (TM
.getSubtarget
<PPCSubtarget
>().hasSTFIWX() &&
4442 !cast
<StoreSDNode
>(N
)->isTruncatingStore() &&
4443 N
->getOperand(1).getOpcode() == ISD::FP_TO_SINT
&&
4444 N
->getOperand(1).getValueType() == MVT::i32
&&
4445 N
->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128
) {
4446 SDValue Val
= N
->getOperand(1).getOperand(0);
4447 if (Val
.getValueType() == MVT::f32
) {
4448 Val
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Val
);
4449 DCI
.AddToWorklist(Val
.getNode());
4451 Val
= DAG
.getNode(PPCISD::FCTIWZ
, dl
, MVT::f64
, Val
);
4452 DCI
.AddToWorklist(Val
.getNode());
4454 Val
= DAG
.getNode(PPCISD::STFIWX
, dl
, MVT::Other
, N
->getOperand(0), Val
,
4455 N
->getOperand(2), N
->getOperand(3));
4456 DCI
.AddToWorklist(Val
.getNode());
4460 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
4461 if (N
->getOperand(1).getOpcode() == ISD::BSWAP
&&
4462 N
->getOperand(1).getNode()->hasOneUse() &&
4463 (N
->getOperand(1).getValueType() == MVT::i32
||
4464 N
->getOperand(1).getValueType() == MVT::i16
)) {
4465 SDValue BSwapOp
= N
->getOperand(1).getOperand(0);
4466 // Do an any-extend to 32-bits if this is a half-word input.
4467 if (BSwapOp
.getValueType() == MVT::i16
)
4468 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, BSwapOp
);
4470 return DAG
.getNode(PPCISD::STBRX
, dl
, MVT::Other
, N
->getOperand(0),
4471 BSwapOp
, N
->getOperand(2), N
->getOperand(3),
4472 DAG
.getValueType(N
->getOperand(1).getValueType()));
4476 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
4477 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
4478 N
->getOperand(0).hasOneUse() &&
4479 (N
->getValueType(0) == MVT::i32
|| N
->getValueType(0) == MVT::i16
)) {
4480 SDValue Load
= N
->getOperand(0);
4481 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
4482 // Create the byte-swapping load.
4483 std::vector
<MVT
> VTs
;
4484 VTs
.push_back(MVT::i32
);
4485 VTs
.push_back(MVT::Other
);
4486 SDValue MO
= DAG
.getMemOperand(LD
->getMemOperand());
4488 LD
->getChain(), // Chain
4489 LD
->getBasePtr(), // Ptr
4491 DAG
.getValueType(N
->getValueType(0)) // VT
4493 SDValue BSLoad
= DAG
.getNode(PPCISD::LBRX
, dl
, VTs
, Ops
, 4);
4495 // If this is an i16 load, insert the truncate.
4496 SDValue ResVal
= BSLoad
;
4497 if (N
->getValueType(0) == MVT::i16
)
4498 ResVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i16
, BSLoad
);
4500 // First, combine the bswap away. This makes the value produced by the
4502 DCI
.CombineTo(N
, ResVal
);
4504 // Next, combine the load away, we give it a bogus result value but a real
4505 // chain result. The result value is dead because the bswap is dead.
4506 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
4508 // Return N so it doesn't get rechecked!
4509 return SDValue(N
, 0);
4513 case PPCISD::VCMP
: {
4514 // If a VCMPo node already exists with exactly the same operands as this
4515 // node, use its result instead of this node (VCMPo computes both a CR6 and
4516 // a normal output).
4518 if (!N
->getOperand(0).hasOneUse() &&
4519 !N
->getOperand(1).hasOneUse() &&
4520 !N
->getOperand(2).hasOneUse()) {
4522 // Scan all of the users of the LHS, looking for VCMPo's that match.
4523 SDNode
*VCMPoNode
= 0;
4525 SDNode
*LHSN
= N
->getOperand(0).getNode();
4526 for (SDNode::use_iterator UI
= LHSN
->use_begin(), E
= LHSN
->use_end();
4528 if (UI
->getOpcode() == PPCISD::VCMPo
&&
4529 UI
->getOperand(1) == N
->getOperand(1) &&
4530 UI
->getOperand(2) == N
->getOperand(2) &&
4531 UI
->getOperand(0) == N
->getOperand(0)) {
4536 // If there is no VCMPo node, or if the flag value has a single use, don't
4538 if (!VCMPoNode
|| VCMPoNode
->hasNUsesOfValue(0, 1))
4541 // Look at the (necessarily single) use of the flag value. If it has a
4542 // chain, this transformation is more complex. Note that multiple things
4543 // could use the value result, which we should ignore.
4544 SDNode
*FlagUser
= 0;
4545 for (SDNode::use_iterator UI
= VCMPoNode
->use_begin();
4546 FlagUser
== 0; ++UI
) {
4547 assert(UI
!= VCMPoNode
->use_end() && "Didn't find user!");
4549 for (unsigned i
= 0, e
= User
->getNumOperands(); i
!= e
; ++i
) {
4550 if (User
->getOperand(i
) == SDValue(VCMPoNode
, 1)) {
4557 // If the user is a MFCR instruction, we know this is safe. Otherwise we
4558 // give up for right now.
4559 if (FlagUser
->getOpcode() == PPCISD::MFCR
)
4560 return SDValue(VCMPoNode
, 0);
4565 // If this is a branch on an altivec predicate comparison, lower this so
4566 // that we don't have to do a MFCR: instead, branch directly on CR6. This
4567 // lowering is done pre-legalize, because the legalizer lowers the predicate
4568 // compare down to code that is difficult to reassemble.
4569 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
4570 SDValue LHS
= N
->getOperand(2), RHS
= N
->getOperand(3);
4574 if (LHS
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
4575 isa
<ConstantSDNode
>(RHS
) && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) &&
4576 getAltivecCompareInfo(LHS
, CompareOpc
, isDot
)) {
4577 assert(isDot
&& "Can't compare against a vector result!");
4579 // If this is a comparison against something other than 0/1, then we know
4580 // that the condition is never/always true.
4581 unsigned Val
= cast
<ConstantSDNode
>(RHS
)->getZExtValue();
4582 if (Val
!= 0 && Val
!= 1) {
4583 if (CC
== ISD::SETEQ
) // Cond never true, remove branch.
4584 return N
->getOperand(0);
4585 // Always !=, turn it into an unconditional branch.
4586 return DAG
.getNode(ISD::BR
, dl
, MVT::Other
,
4587 N
->getOperand(0), N
->getOperand(4));
4590 bool BranchOnWhenPredTrue
= (CC
== ISD::SETEQ
) ^ (Val
== 0);
4592 // Create the PPCISD altivec 'dot' comparison node.
4593 std::vector
<MVT
> VTs
;
4595 LHS
.getOperand(2), // LHS of compare
4596 LHS
.getOperand(3), // RHS of compare
4597 DAG
.getConstant(CompareOpc
, MVT::i32
)
4599 VTs
.push_back(LHS
.getOperand(2).getValueType());
4600 VTs
.push_back(MVT::Flag
);
4601 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
, 3);
4603 // Unpack the result based on how the target uses it.
4604 PPC::Predicate CompOpc
;
4605 switch (cast
<ConstantSDNode
>(LHS
.getOperand(1))->getZExtValue()) {
4606 default: // Can't happen, don't crash on invalid number though.
4607 case 0: // Branch on the value of the EQ bit of CR6.
4608 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_EQ
: PPC::PRED_NE
;
4610 case 1: // Branch on the inverted value of the EQ bit of CR6.
4611 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_NE
: PPC::PRED_EQ
;
4613 case 2: // Branch on the value of the LT bit of CR6.
4614 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_LT
: PPC::PRED_GE
;
4616 case 3: // Branch on the inverted value of the LT bit of CR6.
4617 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_GE
: PPC::PRED_LT
;
4621 return DAG
.getNode(PPCISD::COND_BRANCH
, dl
, MVT::Other
, N
->getOperand(0),
4622 DAG
.getConstant(CompOpc
, MVT::i32
),
4623 DAG
.getRegister(PPC::CR6
, MVT::i32
),
4624 N
->getOperand(4), CompNode
.getValue(1));
4633 //===----------------------------------------------------------------------===//
4634 // Inline Assembly Support
4635 //===----------------------------------------------------------------------===//
4637 void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op
,
4641 const SelectionDAG
&DAG
,
4642 unsigned Depth
) const {
4643 KnownZero
= KnownOne
= APInt(Mask
.getBitWidth(), 0);
4644 switch (Op
.getOpcode()) {
4646 case PPCISD::LBRX
: {
4647 // lhbrx is known to have the top bits cleared out.
4648 if (cast
<VTSDNode
>(Op
.getOperand(3))->getVT() == MVT::i16
)
4649 KnownZero
= 0xFFFF0000;
4652 case ISD::INTRINSIC_WO_CHAIN
: {
4653 switch (cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue()) {
4655 case Intrinsic::ppc_altivec_vcmpbfp_p
:
4656 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
4657 case Intrinsic::ppc_altivec_vcmpequb_p
:
4658 case Intrinsic::ppc_altivec_vcmpequh_p
:
4659 case Intrinsic::ppc_altivec_vcmpequw_p
:
4660 case Intrinsic::ppc_altivec_vcmpgefp_p
:
4661 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
4662 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
4663 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
4664 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
4665 case Intrinsic::ppc_altivec_vcmpgtub_p
:
4666 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
4667 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
4668 KnownZero
= ~1U; // All bits but the low one are known to be zero.
4676 /// getConstraintType - Given a constraint, return the type of
4677 /// constraint it is for this target.
4678 PPCTargetLowering::ConstraintType
4679 PPCTargetLowering::getConstraintType(const std::string
&Constraint
) const {
4680 if (Constraint
.size() == 1) {
4681 switch (Constraint
[0]) {
4688 return C_RegisterClass
;
4691 return TargetLowering::getConstraintType(Constraint
);
4694 std::pair
<unsigned, const TargetRegisterClass
*>
4695 PPCTargetLowering::getRegForInlineAsmConstraint(const std::string
&Constraint
,
4697 if (Constraint
.size() == 1) {
4698 // GCC RS6000 Constraint Letters
4699 switch (Constraint
[0]) {
4702 if (VT
== MVT::i64
&& PPCSubTarget
.isPPC64())
4703 return std::make_pair(0U, PPC::G8RCRegisterClass
);
4704 return std::make_pair(0U, PPC::GPRCRegisterClass
);
4707 return std::make_pair(0U, PPC::F4RCRegisterClass
);
4708 else if (VT
== MVT::f64
)
4709 return std::make_pair(0U, PPC::F8RCRegisterClass
);
4712 return std::make_pair(0U, PPC::VRRCRegisterClass
);
4714 return std::make_pair(0U, PPC::CRRCRegisterClass
);
4718 return TargetLowering::getRegForInlineAsmConstraint(Constraint
, VT
);
4722 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
4723 /// vector. If it is invalid, don't add anything to Ops. If hasMemory is true
4724 /// it means one of the asm constraint of the inline asm instruction being
4725 /// processed is 'm'.
4726 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op
, char Letter
,
4728 std::vector
<SDValue
>&Ops
,
4729 SelectionDAG
&DAG
) const {
4730 SDValue
Result(0,0);
4741 ConstantSDNode
*CST
= dyn_cast
<ConstantSDNode
>(Op
);
4742 if (!CST
) return; // Must be an immediate to match.
4743 unsigned Value
= CST
->getZExtValue();
4745 default: assert(0 && "Unknown constraint letter!");
4746 case 'I': // "I" is a signed 16-bit constant.
4747 if ((short)Value
== (int)Value
)
4748 Result
= DAG
.getTargetConstant(Value
, Op
.getValueType());
4750 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
4751 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
4752 if ((short)Value
== 0)
4753 Result
= DAG
.getTargetConstant(Value
, Op
.getValueType());
4755 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
4756 if ((Value
>> 16) == 0)
4757 Result
= DAG
.getTargetConstant(Value
, Op
.getValueType());
4759 case 'M': // "M" is a constant that is greater than 31.
4761 Result
= DAG
.getTargetConstant(Value
, Op
.getValueType());
4763 case 'N': // "N" is a positive constant that is an exact power of two.
4764 if ((int)Value
> 0 && isPowerOf2_32(Value
))
4765 Result
= DAG
.getTargetConstant(Value
, Op
.getValueType());
4767 case 'O': // "O" is the constant zero.
4769 Result
= DAG
.getTargetConstant(Value
, Op
.getValueType());
4771 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
4772 if ((short)-Value
== (int)-Value
)
4773 Result
= DAG
.getTargetConstant(Value
, Op
.getValueType());
4780 if (Result
.getNode()) {
4781 Ops
.push_back(Result
);
4785 // Handle standard constraint letters.
4786 TargetLowering::LowerAsmOperandForConstraint(Op
, Letter
, hasMemory
, Ops
, DAG
);
4789 // isLegalAddressingMode - Return true if the addressing mode represented
4790 // by AM is legal for this target, for a load/store of the specified type.
4791 bool PPCTargetLowering::isLegalAddressingMode(const AddrMode
&AM
,
4792 const Type
*Ty
) const {
4793 // FIXME: PPC does not allow r+i addressing modes for vectors!
4795 // PPC allows a sign-extended 16-bit immediate field.
4796 if (AM
.BaseOffs
<= -(1LL << 16) || AM
.BaseOffs
>= (1LL << 16)-1)
4799 // No global is ever allowed as a base.
4803 // PPC only support r+r,
4805 case 0: // "r+i" or just "i", depending on HasBaseReg.
4808 if (AM
.HasBaseReg
&& AM
.BaseOffs
) // "r+r+i" is not allowed.
4810 // Otherwise we have r+r or r+i.
4813 if (AM
.HasBaseReg
|| AM
.BaseOffs
) // 2*r+r or 2*r+i is not allowed.
4815 // Allow 2*r as r+r.
4818 // No other scales are supported.
4825 /// isLegalAddressImmediate - Return true if the integer value can be used
4826 /// as the offset of the target addressing mode for load / store of the
4828 bool PPCTargetLowering::isLegalAddressImmediate(int64_t V
,const Type
*Ty
) const{
4829 // PPC allows a sign-extended 16-bit immediate field.
4830 return (V
> -(1 << 16) && V
< (1 << 16)-1);
4833 bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue
* GV
) const {
4837 SDValue
PPCTargetLowering::LowerRETURNADDR(SDValue Op
, SelectionDAG
&DAG
) {
4838 DebugLoc dl
= Op
.getDebugLoc();
4839 // Depths > 0 not supported yet!
4840 if (cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue() > 0)
4843 MachineFunction
&MF
= DAG
.getMachineFunction();
4844 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
4846 // Just load the return address off the stack.
4847 SDValue RetAddrFI
= getReturnAddrFrameIndex(DAG
);
4849 // Make sure the function really does not optimize away the store of the RA
4851 FuncInfo
->setLRStoreRequired();
4852 return DAG
.getLoad(getPointerTy(), dl
,
4853 DAG
.getEntryNode(), RetAddrFI
, NULL
, 0);
4856 SDValue
PPCTargetLowering::LowerFRAMEADDR(SDValue Op
, SelectionDAG
&DAG
) {
4857 DebugLoc dl
= Op
.getDebugLoc();
4858 // Depths > 0 not supported yet!
4859 if (cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue() > 0)
4862 MVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy();
4863 bool isPPC64
= PtrVT
== MVT::i64
;
4865 MachineFunction
&MF
= DAG
.getMachineFunction();
4866 MachineFrameInfo
*MFI
= MF
.getFrameInfo();
4867 bool is31
= (NoFramePointerElim
|| MFI
->hasVarSizedObjects())
4868 && MFI
->getStackSize();
4871 return DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, is31
? PPC::X31
: PPC::X1
,
4874 return DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, is31
? PPC::R31
: PPC::R1
,
4879 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
4880 // The PowerPC target isn't yet aware of offsets.