1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the PPCISelLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineOperand.h"
48 #include "llvm/CodeGen/MachineRegisterInfo.h"
49 #include "llvm/CodeGen/RuntimeLibcalls.h"
50 #include "llvm/CodeGen/SelectionDAG.h"
51 #include "llvm/CodeGen/SelectionDAGNodes.h"
52 #include "llvm/CodeGen/TargetInstrInfo.h"
53 #include "llvm/CodeGen/TargetLowering.h"
54 #include "llvm/CodeGen/TargetRegisterInfo.h"
55 #include "llvm/CodeGen/ValueTypes.h"
56 #include "llvm/IR/CallSite.h"
57 #include "llvm/IR/CallingConv.h"
58 #include "llvm/IR/Constant.h"
59 #include "llvm/IR/Constants.h"
60 #include "llvm/IR/DataLayout.h"
61 #include "llvm/IR/DebugLoc.h"
62 #include "llvm/IR/DerivedTypes.h"
63 #include "llvm/IR/Function.h"
64 #include "llvm/IR/GlobalValue.h"
65 #include "llvm/IR/IRBuilder.h"
66 #include "llvm/IR/Instructions.h"
67 #include "llvm/IR/Intrinsics.h"
68 #include "llvm/IR/Module.h"
69 #include "llvm/IR/Type.h"
70 #include "llvm/IR/Use.h"
71 #include "llvm/IR/Value.h"
72 #include "llvm/MC/MCExpr.h"
73 #include "llvm/MC/MCRegisterInfo.h"
74 #include "llvm/Support/AtomicOrdering.h"
75 #include "llvm/Support/BranchProbability.h"
76 #include "llvm/Support/Casting.h"
77 #include "llvm/Support/CodeGen.h"
78 #include "llvm/Support/CommandLine.h"
79 #include "llvm/Support/Compiler.h"
80 #include "llvm/Support/Debug.h"
81 #include "llvm/Support/ErrorHandling.h"
82 #include "llvm/Support/Format.h"
83 #include "llvm/Support/KnownBits.h"
84 #include "llvm/Support/MachineValueType.h"
85 #include "llvm/Support/MathExtras.h"
86 #include "llvm/Support/raw_ostream.h"
87 #include "llvm/Target/TargetMachine.h"
88 #include "llvm/Target/TargetOptions.h"
99 #define DEBUG_TYPE "ppc-lowering"
101 static cl::opt
<bool> DisablePPCPreinc("disable-ppc-preinc",
102 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden
);
104 static cl::opt
<bool> DisableILPPref("disable-ppc-ilp-pref",
105 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden
);
107 static cl::opt
<bool> DisablePPCUnaligned("disable-ppc-unaligned",
108 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden
);
110 static cl::opt
<bool> DisableSCO("disable-ppc-sco",
111 cl::desc("disable sibling call optimization on ppc"), cl::Hidden
);
113 static cl::opt
<bool> EnableQuadPrecision("enable-ppc-quad-precision",
114 cl::desc("enable quad precision float support on ppc"), cl::Hidden
);
116 STATISTIC(NumTailCalls
, "Number of tail calls");
117 STATISTIC(NumSiblingCalls
, "Number of sibling calls");
119 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*, unsigned, int);
121 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
);
123 // FIXME: Remove this once the bug has been fixed!
124 extern cl::opt
<bool> ANDIGlueBug
;
126 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine
&TM
,
127 const PPCSubtarget
&STI
)
128 : TargetLowering(TM
), Subtarget(STI
) {
129 // Use _setjmp/_longjmp instead of setjmp/longjmp.
130 setUseUnderscoreSetJmp(true);
131 setUseUnderscoreLongJmp(true);
133 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
134 // arguments are at least 4/8 bytes aligned.
135 bool isPPC64
= Subtarget
.isPPC64();
136 setMinStackArgumentAlignment(isPPC64
? 8:4);
138 // Set up the register classes.
139 addRegisterClass(MVT::i32
, &PPC::GPRCRegClass
);
140 if (!useSoftFloat()) {
142 addRegisterClass(MVT::f32
, &PPC::SPE4RCRegClass
);
143 addRegisterClass(MVT::f64
, &PPC::SPERCRegClass
);
145 addRegisterClass(MVT::f32
, &PPC::F4RCRegClass
);
146 addRegisterClass(MVT::f64
, &PPC::F8RCRegClass
);
150 // Match BITREVERSE to customized fast code sequence in the td file.
151 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Legal
);
152 setOperationAction(ISD::BITREVERSE
, MVT::i64
, Legal
);
154 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
155 setOperationAction(ISD::ATOMIC_CMP_SWAP
, MVT::i32
, Custom
);
157 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
158 for (MVT VT
: MVT::integer_valuetypes()) {
159 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
160 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i8
, Expand
);
163 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
165 // PowerPC has pre-inc load and store's.
166 setIndexedLoadAction(ISD::PRE_INC
, MVT::i1
, Legal
);
167 setIndexedLoadAction(ISD::PRE_INC
, MVT::i8
, Legal
);
168 setIndexedLoadAction(ISD::PRE_INC
, MVT::i16
, Legal
);
169 setIndexedLoadAction(ISD::PRE_INC
, MVT::i32
, Legal
);
170 setIndexedLoadAction(ISD::PRE_INC
, MVT::i64
, Legal
);
171 setIndexedStoreAction(ISD::PRE_INC
, MVT::i1
, Legal
);
172 setIndexedStoreAction(ISD::PRE_INC
, MVT::i8
, Legal
);
173 setIndexedStoreAction(ISD::PRE_INC
, MVT::i16
, Legal
);
174 setIndexedStoreAction(ISD::PRE_INC
, MVT::i32
, Legal
);
175 setIndexedStoreAction(ISD::PRE_INC
, MVT::i64
, Legal
);
176 if (!Subtarget
.hasSPE()) {
177 setIndexedLoadAction(ISD::PRE_INC
, MVT::f32
, Legal
);
178 setIndexedLoadAction(ISD::PRE_INC
, MVT::f64
, Legal
);
179 setIndexedStoreAction(ISD::PRE_INC
, MVT::f32
, Legal
);
180 setIndexedStoreAction(ISD::PRE_INC
, MVT::f64
, Legal
);
183 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
184 const MVT ScalarIntVTs
[] = { MVT::i32
, MVT::i64
};
185 for (MVT VT
: ScalarIntVTs
) {
186 setOperationAction(ISD::ADDC
, VT
, Legal
);
187 setOperationAction(ISD::ADDE
, VT
, Legal
);
188 setOperationAction(ISD::SUBC
, VT
, Legal
);
189 setOperationAction(ISD::SUBE
, VT
, Legal
);
192 if (Subtarget
.useCRBits()) {
193 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
195 if (isPPC64
|| Subtarget
.hasFPCVT()) {
196 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
197 AddPromotedToType (ISD::SINT_TO_FP
, MVT::i1
,
198 isPPC64
? MVT::i64
: MVT::i32
);
199 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
200 AddPromotedToType(ISD::UINT_TO_FP
, MVT::i1
,
201 isPPC64
? MVT::i64
: MVT::i32
);
203 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Custom
);
204 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Custom
);
207 // PowerPC does not support direct load/store of condition registers.
208 setOperationAction(ISD::LOAD
, MVT::i1
, Custom
);
209 setOperationAction(ISD::STORE
, MVT::i1
, Custom
);
211 // FIXME: Remove this once the ANDI glue bug is fixed:
213 setOperationAction(ISD::TRUNCATE
, MVT::i1
, Custom
);
215 for (MVT VT
: MVT::integer_valuetypes()) {
216 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
217 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
218 setTruncStoreAction(VT
, MVT::i1
, Expand
);
221 addRegisterClass(MVT::i1
, &PPC::CRBITRCRegClass
);
224 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
225 // PPC (the libcall is not available).
226 setOperationAction(ISD::FP_TO_SINT
, MVT::ppcf128
, Custom
);
227 setOperationAction(ISD::FP_TO_UINT
, MVT::ppcf128
, Custom
);
229 // We do not currently implement these libm ops for PowerPC.
230 setOperationAction(ISD::FFLOOR
, MVT::ppcf128
, Expand
);
231 setOperationAction(ISD::FCEIL
, MVT::ppcf128
, Expand
);
232 setOperationAction(ISD::FTRUNC
, MVT::ppcf128
, Expand
);
233 setOperationAction(ISD::FRINT
, MVT::ppcf128
, Expand
);
234 setOperationAction(ISD::FNEARBYINT
, MVT::ppcf128
, Expand
);
235 setOperationAction(ISD::FREM
, MVT::ppcf128
, Expand
);
237 // PowerPC has no SREM/UREM instructions unless we are on P9
238 // On P9 we may use a hardware instruction to compute the remainder.
239 // The instructions are not legalized directly because in the cases where the
240 // result of both the remainder and the division is required it is more
241 // efficient to compute the remainder from the result of the division rather
242 // than use the remainder instruction.
243 if (Subtarget
.isISA3_0()) {
244 setOperationAction(ISD::SREM
, MVT::i32
, Custom
);
245 setOperationAction(ISD::UREM
, MVT::i32
, Custom
);
246 setOperationAction(ISD::SREM
, MVT::i64
, Custom
);
247 setOperationAction(ISD::UREM
, MVT::i64
, Custom
);
249 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
250 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
251 setOperationAction(ISD::SREM
, MVT::i64
, Expand
);
252 setOperationAction(ISD::UREM
, MVT::i64
, Expand
);
255 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
256 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
257 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
258 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Expand
);
259 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Expand
);
260 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
261 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
262 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
263 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
265 // We don't support sin/cos/sqrt/fmod/pow
266 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
267 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
268 setOperationAction(ISD::FSINCOS
, MVT::f64
, Expand
);
269 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
270 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
271 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
272 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
273 setOperationAction(ISD::FSINCOS
, MVT::f32
, Expand
);
274 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
275 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
276 if (Subtarget
.hasSPE()) {
277 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
278 setOperationAction(ISD::FMA
, MVT::f32
, Expand
);
280 setOperationAction(ISD::FMA
, MVT::f64
, Legal
);
281 setOperationAction(ISD::FMA
, MVT::f32
, Legal
);
284 setOperationAction(ISD::FLT_ROUNDS_
, MVT::i32
, Custom
);
286 // If we're enabling GP optimizations, use hardware square root
287 if (!Subtarget
.hasFSQRT() &&
288 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTE() &&
290 setOperationAction(ISD::FSQRT
, MVT::f64
, Expand
);
292 if (!Subtarget
.hasFSQRT() &&
293 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTES() &&
294 Subtarget
.hasFRES()))
295 setOperationAction(ISD::FSQRT
, MVT::f32
, Expand
);
297 if (Subtarget
.hasFCPSGN()) {
298 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Legal
);
299 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Legal
);
301 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
302 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
305 if (Subtarget
.hasFPRND()) {
306 setOperationAction(ISD::FFLOOR
, MVT::f64
, Legal
);
307 setOperationAction(ISD::FCEIL
, MVT::f64
, Legal
);
308 setOperationAction(ISD::FTRUNC
, MVT::f64
, Legal
);
309 setOperationAction(ISD::FROUND
, MVT::f64
, Legal
);
311 setOperationAction(ISD::FFLOOR
, MVT::f32
, Legal
);
312 setOperationAction(ISD::FCEIL
, MVT::f32
, Legal
);
313 setOperationAction(ISD::FTRUNC
, MVT::f32
, Legal
);
314 setOperationAction(ISD::FROUND
, MVT::f32
, Legal
);
317 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
318 // to speed up scalar BSWAP64.
319 // CTPOP or CTTZ were introduced in P8/P9 respectively
320 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
321 if (Subtarget
.hasP9Vector())
322 setOperationAction(ISD::BSWAP
, MVT::i64
, Custom
);
324 setOperationAction(ISD::BSWAP
, MVT::i64
, Expand
);
325 if (Subtarget
.isISA3_0()) {
326 setOperationAction(ISD::CTTZ
, MVT::i32
, Legal
);
327 setOperationAction(ISD::CTTZ
, MVT::i64
, Legal
);
329 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
330 setOperationAction(ISD::CTTZ
, MVT::i64
, Expand
);
333 if (Subtarget
.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast
) {
334 setOperationAction(ISD::CTPOP
, MVT::i32
, Legal
);
335 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
337 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
338 setOperationAction(ISD::CTPOP
, MVT::i64
, Expand
);
341 // PowerPC does not have ROTR
342 setOperationAction(ISD::ROTR
, MVT::i32
, Expand
);
343 setOperationAction(ISD::ROTR
, MVT::i64
, Expand
);
345 if (!Subtarget
.useCRBits()) {
346 // PowerPC does not have Select
347 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
348 setOperationAction(ISD::SELECT
, MVT::i64
, Expand
);
349 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
350 setOperationAction(ISD::SELECT
, MVT::f64
, Expand
);
353 // PowerPC wants to turn select_cc of FP into fsel when possible.
354 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
355 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Custom
);
357 // PowerPC wants to optimize integer setcc a bit
358 if (!Subtarget
.useCRBits())
359 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
361 // PowerPC does not have BRCOND which requires SetCC
362 if (!Subtarget
.useCRBits())
363 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
365 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
367 if (Subtarget
.hasSPE()) {
368 // SPE has built-in conversions
369 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Legal
);
370 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Legal
);
371 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Legal
);
373 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
374 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
376 // PowerPC does not have [U|S]INT_TO_FP
377 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Expand
);
378 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Expand
);
381 if (Subtarget
.hasDirectMove() && isPPC64
) {
382 setOperationAction(ISD::BITCAST
, MVT::f32
, Legal
);
383 setOperationAction(ISD::BITCAST
, MVT::i32
, Legal
);
384 setOperationAction(ISD::BITCAST
, MVT::i64
, Legal
);
385 setOperationAction(ISD::BITCAST
, MVT::f64
, Legal
);
387 setOperationAction(ISD::BITCAST
, MVT::f32
, Expand
);
388 setOperationAction(ISD::BITCAST
, MVT::i32
, Expand
);
389 setOperationAction(ISD::BITCAST
, MVT::i64
, Expand
);
390 setOperationAction(ISD::BITCAST
, MVT::f64
, Expand
);
393 // We cannot sextinreg(i1). Expand to shifts.
394 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
396 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
397 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
398 // support continuation, user-level threading, and etc.. As a result, no
399 // other SjLj exception interfaces are implemented and please don't build
400 // your own exception handling based on them.
401 // LLVM/Clang supports zero-cost DWARF exception handling.
402 setOperationAction(ISD::EH_SJLJ_SETJMP
, MVT::i32
, Custom
);
403 setOperationAction(ISD::EH_SJLJ_LONGJMP
, MVT::Other
, Custom
);
405 // We want to legalize GlobalAddress and ConstantPool nodes into the
406 // appropriate instructions to materialize the address.
407 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
408 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
409 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
410 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
411 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
412 setOperationAction(ISD::GlobalAddress
, MVT::i64
, Custom
);
413 setOperationAction(ISD::GlobalTLSAddress
, MVT::i64
, Custom
);
414 setOperationAction(ISD::BlockAddress
, MVT::i64
, Custom
);
415 setOperationAction(ISD::ConstantPool
, MVT::i64
, Custom
);
416 setOperationAction(ISD::JumpTable
, MVT::i64
, Custom
);
419 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
421 // TRAMPOLINE is custom lowered.
422 setOperationAction(ISD::INIT_TRAMPOLINE
, MVT::Other
, Custom
);
423 setOperationAction(ISD::ADJUST_TRAMPOLINE
, MVT::Other
, Custom
);
425 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
426 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
428 if (Subtarget
.isSVR4ABI()) {
430 // VAARG always uses double-word chunks, so promote anything smaller.
431 setOperationAction(ISD::VAARG
, MVT::i1
, Promote
);
432 AddPromotedToType (ISD::VAARG
, MVT::i1
, MVT::i64
);
433 setOperationAction(ISD::VAARG
, MVT::i8
, Promote
);
434 AddPromotedToType (ISD::VAARG
, MVT::i8
, MVT::i64
);
435 setOperationAction(ISD::VAARG
, MVT::i16
, Promote
);
436 AddPromotedToType (ISD::VAARG
, MVT::i16
, MVT::i64
);
437 setOperationAction(ISD::VAARG
, MVT::i32
, Promote
);
438 AddPromotedToType (ISD::VAARG
, MVT::i32
, MVT::i64
);
439 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
441 // VAARG is custom lowered with the 32-bit SVR4 ABI.
442 setOperationAction(ISD::VAARG
, MVT::Other
, Custom
);
443 setOperationAction(ISD::VAARG
, MVT::i64
, Custom
);
446 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
448 if (Subtarget
.isSVR4ABI() && !isPPC64
)
449 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
450 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
452 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
454 // Use the default implementation.
455 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
456 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
457 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
458 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
459 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Custom
);
460 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i32
, Custom
);
461 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i64
, Custom
);
462 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i32
, Custom
);
463 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i64
, Custom
);
465 // We want to custom lower some of our intrinsics.
466 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
468 // To handle counter-based loop conditions.
469 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i1
, Custom
);
471 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i8
, Custom
);
472 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i16
, Custom
);
473 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i32
, Custom
);
474 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
476 // Comparisons that require checking two conditions.
477 if (Subtarget
.hasSPE()) {
478 setCondCodeAction(ISD::SETO
, MVT::f32
, Expand
);
479 setCondCodeAction(ISD::SETO
, MVT::f64
, Expand
);
480 setCondCodeAction(ISD::SETUO
, MVT::f32
, Expand
);
481 setCondCodeAction(ISD::SETUO
, MVT::f64
, Expand
);
483 setCondCodeAction(ISD::SETULT
, MVT::f32
, Expand
);
484 setCondCodeAction(ISD::SETULT
, MVT::f64
, Expand
);
485 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
486 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
487 setCondCodeAction(ISD::SETUEQ
, MVT::f32
, Expand
);
488 setCondCodeAction(ISD::SETUEQ
, MVT::f64
, Expand
);
489 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
490 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
491 setCondCodeAction(ISD::SETOLE
, MVT::f32
, Expand
);
492 setCondCodeAction(ISD::SETOLE
, MVT::f64
, Expand
);
493 setCondCodeAction(ISD::SETONE
, MVT::f32
, Expand
);
494 setCondCodeAction(ISD::SETONE
, MVT::f64
, Expand
);
496 if (Subtarget
.has64BitSupport()) {
497 // They also have instructions for converting between i64 and fp.
498 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
499 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Expand
);
500 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
501 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
502 // This is just the low 32 bits of a (signed) fp->i64 conversion.
503 // We cannot do this with Promote because i64 is not a legal type.
504 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
506 if (Subtarget
.hasLFIWAX() || Subtarget
.isPPC64())
507 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
509 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
510 if (Subtarget
.hasSPE())
511 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Legal
);
513 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Expand
);
516 // With the instructions enabled under FPCVT, we can do everything.
517 if (Subtarget
.hasFPCVT()) {
518 if (Subtarget
.has64BitSupport()) {
519 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
520 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Custom
);
521 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
522 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Custom
);
525 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
526 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
527 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
528 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
531 if (Subtarget
.use64BitRegs()) {
532 // 64-bit PowerPC implementations can support i64 types directly
533 addRegisterClass(MVT::i64
, &PPC::G8RCRegClass
);
534 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
535 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
536 // 64-bit PowerPC wants to expand i128 shifts itself.
537 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Custom
);
538 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Custom
);
539 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Custom
);
541 // 32-bit PowerPC wants to expand i64 shifts itself.
542 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
543 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
544 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
547 if (Subtarget
.hasAltivec()) {
548 // First set operation action for all vector types to expand. Then we
549 // will selectively turn on ones that can be effectively codegen'd.
550 for (MVT VT
: MVT::vector_valuetypes()) {
551 // add/sub are legal for all supported vector VT's.
552 setOperationAction(ISD::ADD
, VT
, Legal
);
553 setOperationAction(ISD::SUB
, VT
, Legal
);
554 setOperationAction(ISD::ABS
, VT
, Custom
);
556 // Vector instructions introduced in P8
557 if (Subtarget
.hasP8Altivec() && (VT
.SimpleTy
!= MVT::v1i128
)) {
558 setOperationAction(ISD::CTPOP
, VT
, Legal
);
559 setOperationAction(ISD::CTLZ
, VT
, Legal
);
562 setOperationAction(ISD::CTPOP
, VT
, Expand
);
563 setOperationAction(ISD::CTLZ
, VT
, Expand
);
566 // Vector instructions introduced in P9
567 if (Subtarget
.hasP9Altivec() && (VT
.SimpleTy
!= MVT::v1i128
))
568 setOperationAction(ISD::CTTZ
, VT
, Legal
);
570 setOperationAction(ISD::CTTZ
, VT
, Expand
);
572 // We promote all shuffles to v16i8.
573 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Promote
);
574 AddPromotedToType (ISD::VECTOR_SHUFFLE
, VT
, MVT::v16i8
);
576 // We promote all non-typed operations to v4i32.
577 setOperationAction(ISD::AND
, VT
, Promote
);
578 AddPromotedToType (ISD::AND
, VT
, MVT::v4i32
);
579 setOperationAction(ISD::OR
, VT
, Promote
);
580 AddPromotedToType (ISD::OR
, VT
, MVT::v4i32
);
581 setOperationAction(ISD::XOR
, VT
, Promote
);
582 AddPromotedToType (ISD::XOR
, VT
, MVT::v4i32
);
583 setOperationAction(ISD::LOAD
, VT
, Promote
);
584 AddPromotedToType (ISD::LOAD
, VT
, MVT::v4i32
);
585 setOperationAction(ISD::SELECT
, VT
, Promote
);
586 AddPromotedToType (ISD::SELECT
, VT
, MVT::v4i32
);
587 setOperationAction(ISD::VSELECT
, VT
, Legal
);
588 setOperationAction(ISD::SELECT_CC
, VT
, Promote
);
589 AddPromotedToType (ISD::SELECT_CC
, VT
, MVT::v4i32
);
590 setOperationAction(ISD::STORE
, VT
, Promote
);
591 AddPromotedToType (ISD::STORE
, VT
, MVT::v4i32
);
593 // No other operations are legal.
594 setOperationAction(ISD::MUL
, VT
, Expand
);
595 setOperationAction(ISD::SDIV
, VT
, Expand
);
596 setOperationAction(ISD::SREM
, VT
, Expand
);
597 setOperationAction(ISD::UDIV
, VT
, Expand
);
598 setOperationAction(ISD::UREM
, VT
, Expand
);
599 setOperationAction(ISD::FDIV
, VT
, Expand
);
600 setOperationAction(ISD::FREM
, VT
, Expand
);
601 setOperationAction(ISD::FNEG
, VT
, Expand
);
602 setOperationAction(ISD::FSQRT
, VT
, Expand
);
603 setOperationAction(ISD::FLOG
, VT
, Expand
);
604 setOperationAction(ISD::FLOG10
, VT
, Expand
);
605 setOperationAction(ISD::FLOG2
, VT
, Expand
);
606 setOperationAction(ISD::FEXP
, VT
, Expand
);
607 setOperationAction(ISD::FEXP2
, VT
, Expand
);
608 setOperationAction(ISD::FSIN
, VT
, Expand
);
609 setOperationAction(ISD::FCOS
, VT
, Expand
);
610 setOperationAction(ISD::FABS
, VT
, Expand
);
611 setOperationAction(ISD::FFLOOR
, VT
, Expand
);
612 setOperationAction(ISD::FCEIL
, VT
, Expand
);
613 setOperationAction(ISD::FTRUNC
, VT
, Expand
);
614 setOperationAction(ISD::FRINT
, VT
, Expand
);
615 setOperationAction(ISD::FNEARBYINT
, VT
, Expand
);
616 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Expand
);
617 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Expand
);
618 setOperationAction(ISD::BUILD_VECTOR
, VT
, Expand
);
619 setOperationAction(ISD::MULHU
, VT
, Expand
);
620 setOperationAction(ISD::MULHS
, VT
, Expand
);
621 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
622 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
623 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
624 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
625 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Expand
);
626 setOperationAction(ISD::FPOW
, VT
, Expand
);
627 setOperationAction(ISD::BSWAP
, VT
, Expand
);
628 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Expand
);
629 setOperationAction(ISD::ROTL
, VT
, Expand
);
630 setOperationAction(ISD::ROTR
, VT
, Expand
);
632 for (MVT InnerVT
: MVT::vector_valuetypes()) {
633 setTruncStoreAction(VT
, InnerVT
, Expand
);
634 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
635 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
636 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
640 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
641 // with merges, splats, etc.
642 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v16i8
, Custom
);
644 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
645 // are cheap, so handle them before they get expanded to scalar.
646 setOperationAction(ISD::TRUNCATE
, MVT::v8i8
, Custom
);
647 setOperationAction(ISD::TRUNCATE
, MVT::v4i8
, Custom
);
648 setOperationAction(ISD::TRUNCATE
, MVT::v2i8
, Custom
);
649 setOperationAction(ISD::TRUNCATE
, MVT::v4i16
, Custom
);
650 setOperationAction(ISD::TRUNCATE
, MVT::v2i16
, Custom
);
652 setOperationAction(ISD::AND
, MVT::v4i32
, Legal
);
653 setOperationAction(ISD::OR
, MVT::v4i32
, Legal
);
654 setOperationAction(ISD::XOR
, MVT::v4i32
, Legal
);
655 setOperationAction(ISD::LOAD
, MVT::v4i32
, Legal
);
656 setOperationAction(ISD::SELECT
, MVT::v4i32
,
657 Subtarget
.useCRBits() ? Legal
: Expand
);
658 setOperationAction(ISD::STORE
, MVT::v4i32
, Legal
);
659 setOperationAction(ISD::FP_TO_SINT
, MVT::v4i32
, Legal
);
660 setOperationAction(ISD::FP_TO_UINT
, MVT::v4i32
, Legal
);
661 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i32
, Legal
);
662 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i32
, Legal
);
663 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
664 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
665 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
666 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
668 // Without hasP8Altivec set, v2i64 SMAX isn't available.
669 // But ABS custom lowering requires SMAX support.
670 if (!Subtarget
.hasP8Altivec())
671 setOperationAction(ISD::ABS
, MVT::v2i64
, Expand
);
673 addRegisterClass(MVT::v4f32
, &PPC::VRRCRegClass
);
674 addRegisterClass(MVT::v4i32
, &PPC::VRRCRegClass
);
675 addRegisterClass(MVT::v8i16
, &PPC::VRRCRegClass
);
676 addRegisterClass(MVT::v16i8
, &PPC::VRRCRegClass
);
678 setOperationAction(ISD::MUL
, MVT::v4f32
, Legal
);
679 setOperationAction(ISD::FMA
, MVT::v4f32
, Legal
);
681 if (TM
.Options
.UnsafeFPMath
|| Subtarget
.hasVSX()) {
682 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
683 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
686 if (Subtarget
.hasP8Altivec())
687 setOperationAction(ISD::MUL
, MVT::v4i32
, Legal
);
689 setOperationAction(ISD::MUL
, MVT::v4i32
, Custom
);
691 setOperationAction(ISD::MUL
, MVT::v8i16
, Custom
);
692 setOperationAction(ISD::MUL
, MVT::v16i8
, Custom
);
694 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Custom
);
695 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Custom
);
697 setOperationAction(ISD::BUILD_VECTOR
, MVT::v16i8
, Custom
);
698 setOperationAction(ISD::BUILD_VECTOR
, MVT::v8i16
, Custom
);
699 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i32
, Custom
);
700 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
702 // Altivec does not contain unordered floating-point compare instructions
703 setCondCodeAction(ISD::SETUO
, MVT::v4f32
, Expand
);
704 setCondCodeAction(ISD::SETUEQ
, MVT::v4f32
, Expand
);
705 setCondCodeAction(ISD::SETO
, MVT::v4f32
, Expand
);
706 setCondCodeAction(ISD::SETONE
, MVT::v4f32
, Expand
);
708 if (Subtarget
.hasVSX()) {
709 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, Legal
);
710 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
711 if (Subtarget
.hasP8Vector()) {
712 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
713 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Legal
);
715 if (Subtarget
.hasDirectMove() && isPPC64
) {
716 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v16i8
, Legal
);
717 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v8i16
, Legal
);
718 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Legal
);
719 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2i64
, Legal
);
720 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v16i8
, Legal
);
721 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v8i16
, Legal
);
722 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i32
, Legal
);
723 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2i64
, Legal
);
725 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
727 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Legal
);
728 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Legal
);
729 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Legal
);
730 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Legal
);
731 setOperationAction(ISD::FROUND
, MVT::v2f64
, Legal
);
733 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
735 setOperationAction(ISD::MUL
, MVT::v2f64
, Legal
);
736 setOperationAction(ISD::FMA
, MVT::v2f64
, Legal
);
738 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
739 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
741 // Share the Altivec comparison restrictions.
742 setCondCodeAction(ISD::SETUO
, MVT::v2f64
, Expand
);
743 setCondCodeAction(ISD::SETUEQ
, MVT::v2f64
, Expand
);
744 setCondCodeAction(ISD::SETO
, MVT::v2f64
, Expand
);
745 setCondCodeAction(ISD::SETONE
, MVT::v2f64
, Expand
);
747 setOperationAction(ISD::LOAD
, MVT::v2f64
, Legal
);
748 setOperationAction(ISD::STORE
, MVT::v2f64
, Legal
);
750 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2f64
, Legal
);
752 if (Subtarget
.hasP8Vector())
753 addRegisterClass(MVT::f32
, &PPC::VSSRCRegClass
);
755 addRegisterClass(MVT::f64
, &PPC::VSFRCRegClass
);
757 addRegisterClass(MVT::v4i32
, &PPC::VSRCRegClass
);
758 addRegisterClass(MVT::v4f32
, &PPC::VSRCRegClass
);
759 addRegisterClass(MVT::v2f64
, &PPC::VSRCRegClass
);
761 if (Subtarget
.hasP8Altivec()) {
762 setOperationAction(ISD::SHL
, MVT::v2i64
, Legal
);
763 setOperationAction(ISD::SRA
, MVT::v2i64
, Legal
);
764 setOperationAction(ISD::SRL
, MVT::v2i64
, Legal
);
766 // 128 bit shifts can be accomplished via 3 instructions for SHL and
767 // SRL, but not for SRA because of the instructions available:
768 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
770 setOperationAction(ISD::SHL
, MVT::v1i128
, Expand
);
771 setOperationAction(ISD::SRL
, MVT::v1i128
, Expand
);
772 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
774 setOperationAction(ISD::SETCC
, MVT::v2i64
, Legal
);
777 setOperationAction(ISD::SHL
, MVT::v2i64
, Expand
);
778 setOperationAction(ISD::SRA
, MVT::v2i64
, Expand
);
779 setOperationAction(ISD::SRL
, MVT::v2i64
, Expand
);
781 setOperationAction(ISD::SETCC
, MVT::v2i64
, Custom
);
783 // VSX v2i64 only supports non-arithmetic operations.
784 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
785 setOperationAction(ISD::SUB
, MVT::v2i64
, Expand
);
788 setOperationAction(ISD::LOAD
, MVT::v2i64
, Promote
);
789 AddPromotedToType (ISD::LOAD
, MVT::v2i64
, MVT::v2f64
);
790 setOperationAction(ISD::STORE
, MVT::v2i64
, Promote
);
791 AddPromotedToType (ISD::STORE
, MVT::v2i64
, MVT::v2f64
);
793 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2i64
, Legal
);
795 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Legal
);
796 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Legal
);
797 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Legal
);
798 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Legal
);
800 // Custom handling for partial vectors of integers converted to
801 // floating point. We already have optimal handling for v2i32 through
802 // the DAG combine, so those aren't necessary.
803 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i8
, Custom
);
804 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i8
, Custom
);
805 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i16
, Custom
);
806 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i16
, Custom
);
807 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i8
, Custom
);
808 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i8
, Custom
);
809 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i16
, Custom
);
810 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i16
, Custom
);
812 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
813 setOperationAction(ISD::FNEG
, MVT::v2f64
, Legal
);
814 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
815 setOperationAction(ISD::FABS
, MVT::v2f64
, Legal
);
817 if (Subtarget
.hasDirectMove())
818 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2i64
, Custom
);
819 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2f64
, Custom
);
821 addRegisterClass(MVT::v2i64
, &PPC::VSRCRegClass
);
824 if (Subtarget
.hasP8Altivec()) {
825 addRegisterClass(MVT::v2i64
, &PPC::VRRCRegClass
);
826 addRegisterClass(MVT::v1i128
, &PPC::VRRCRegClass
);
829 if (Subtarget
.hasP9Vector()) {
830 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i32
, Custom
);
831 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
833 // 128 bit shifts can be accomplished via 3 instructions for SHL and
834 // SRL, but not for SRA because of the instructions available:
835 // VS{RL} and VS{RL}O.
836 setOperationAction(ISD::SHL
, MVT::v1i128
, Legal
);
837 setOperationAction(ISD::SRL
, MVT::v1i128
, Legal
);
838 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
840 if (EnableQuadPrecision
) {
841 addRegisterClass(MVT::f128
, &PPC::VRRCRegClass
);
842 setOperationAction(ISD::FADD
, MVT::f128
, Legal
);
843 setOperationAction(ISD::FSUB
, MVT::f128
, Legal
);
844 setOperationAction(ISD::FDIV
, MVT::f128
, Legal
);
845 setOperationAction(ISD::FMUL
, MVT::f128
, Legal
);
846 setOperationAction(ISD::FP_EXTEND
, MVT::f128
, Legal
);
847 // No extending loads to f128 on PPC.
848 for (MVT FPT
: MVT::fp_valuetypes())
849 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, FPT
, Expand
);
850 setOperationAction(ISD::FMA
, MVT::f128
, Legal
);
851 setCondCodeAction(ISD::SETULT
, MVT::f128
, Expand
);
852 setCondCodeAction(ISD::SETUGT
, MVT::f128
, Expand
);
853 setCondCodeAction(ISD::SETUEQ
, MVT::f128
, Expand
);
854 setCondCodeAction(ISD::SETOGE
, MVT::f128
, Expand
);
855 setCondCodeAction(ISD::SETOLE
, MVT::f128
, Expand
);
856 setCondCodeAction(ISD::SETONE
, MVT::f128
, Expand
);
858 setOperationAction(ISD::FTRUNC
, MVT::f128
, Legal
);
859 setOperationAction(ISD::FRINT
, MVT::f128
, Legal
);
860 setOperationAction(ISD::FFLOOR
, MVT::f128
, Legal
);
861 setOperationAction(ISD::FCEIL
, MVT::f128
, Legal
);
862 setOperationAction(ISD::FNEARBYINT
, MVT::f128
, Legal
);
863 setOperationAction(ISD::FROUND
, MVT::f128
, Legal
);
865 setOperationAction(ISD::SELECT
, MVT::f128
, Expand
);
866 setOperationAction(ISD::FP_ROUND
, MVT::f64
, Legal
);
867 setOperationAction(ISD::FP_ROUND
, MVT::f32
, Legal
);
868 setTruncStoreAction(MVT::f128
, MVT::f64
, Expand
);
869 setTruncStoreAction(MVT::f128
, MVT::f32
, Expand
);
870 setOperationAction(ISD::BITCAST
, MVT::i128
, Custom
);
871 // No implementation for these ops for PowerPC.
872 setOperationAction(ISD::FSIN
, MVT::f128
, Expand
);
873 setOperationAction(ISD::FCOS
, MVT::f128
, Expand
);
874 setOperationAction(ISD::FPOW
, MVT::f128
, Expand
);
875 setOperationAction(ISD::FPOWI
, MVT::f128
, Expand
);
876 setOperationAction(ISD::FREM
, MVT::f128
, Expand
);
881 if (Subtarget
.hasP9Altivec()) {
882 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v8i16
, Custom
);
883 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v16i8
, Custom
);
887 if (Subtarget
.hasQPX()) {
888 setOperationAction(ISD::FADD
, MVT::v4f64
, Legal
);
889 setOperationAction(ISD::FSUB
, MVT::v4f64
, Legal
);
890 setOperationAction(ISD::FMUL
, MVT::v4f64
, Legal
);
891 setOperationAction(ISD::FREM
, MVT::v4f64
, Expand
);
893 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f64
, Legal
);
894 setOperationAction(ISD::FGETSIGN
, MVT::v4f64
, Expand
);
896 setOperationAction(ISD::LOAD
, MVT::v4f64
, Custom
);
897 setOperationAction(ISD::STORE
, MVT::v4f64
, Custom
);
899 setTruncStoreAction(MVT::v4f64
, MVT::v4f32
, Custom
);
900 setLoadExtAction(ISD::EXTLOAD
, MVT::v4f64
, MVT::v4f32
, Custom
);
902 if (!Subtarget
.useCRBits())
903 setOperationAction(ISD::SELECT
, MVT::v4f64
, Expand
);
904 setOperationAction(ISD::VSELECT
, MVT::v4f64
, Legal
);
906 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f64
, Legal
);
907 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f64
, Expand
);
908 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4f64
, Expand
);
909 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4f64
, Expand
);
910 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4f64
, Custom
);
911 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f64
, Legal
);
912 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f64
, Custom
);
914 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f64
, Legal
);
915 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f64
, Expand
);
917 setOperationAction(ISD::FP_ROUND
, MVT::v4f32
, Legal
);
918 setOperationAction(ISD::FP_ROUND_INREG
, MVT::v4f32
, Expand
);
919 setOperationAction(ISD::FP_EXTEND
, MVT::v4f64
, Legal
);
921 setOperationAction(ISD::FNEG
, MVT::v4f64
, Legal
);
922 setOperationAction(ISD::FABS
, MVT::v4f64
, Legal
);
923 setOperationAction(ISD::FSIN
, MVT::v4f64
, Expand
);
924 setOperationAction(ISD::FCOS
, MVT::v4f64
, Expand
);
925 setOperationAction(ISD::FPOW
, MVT::v4f64
, Expand
);
926 setOperationAction(ISD::FLOG
, MVT::v4f64
, Expand
);
927 setOperationAction(ISD::FLOG2
, MVT::v4f64
, Expand
);
928 setOperationAction(ISD::FLOG10
, MVT::v4f64
, Expand
);
929 setOperationAction(ISD::FEXP
, MVT::v4f64
, Expand
);
930 setOperationAction(ISD::FEXP2
, MVT::v4f64
, Expand
);
932 setOperationAction(ISD::FMINNUM
, MVT::v4f64
, Legal
);
933 setOperationAction(ISD::FMAXNUM
, MVT::v4f64
, Legal
);
935 setIndexedLoadAction(ISD::PRE_INC
, MVT::v4f64
, Legal
);
936 setIndexedStoreAction(ISD::PRE_INC
, MVT::v4f64
, Legal
);
938 addRegisterClass(MVT::v4f64
, &PPC::QFRCRegClass
);
940 setOperationAction(ISD::FADD
, MVT::v4f32
, Legal
);
941 setOperationAction(ISD::FSUB
, MVT::v4f32
, Legal
);
942 setOperationAction(ISD::FMUL
, MVT::v4f32
, Legal
);
943 setOperationAction(ISD::FREM
, MVT::v4f32
, Expand
);
945 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f32
, Legal
);
946 setOperationAction(ISD::FGETSIGN
, MVT::v4f32
, Expand
);
948 setOperationAction(ISD::LOAD
, MVT::v4f32
, Custom
);
949 setOperationAction(ISD::STORE
, MVT::v4f32
, Custom
);
951 if (!Subtarget
.useCRBits())
952 setOperationAction(ISD::SELECT
, MVT::v4f32
, Expand
);
953 setOperationAction(ISD::VSELECT
, MVT::v4f32
, Legal
);
955 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Legal
);
956 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Expand
);
957 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4f32
, Expand
);
958 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4f32
, Expand
);
959 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4f32
, Custom
);
960 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
961 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
963 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f32
, Legal
);
964 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f32
, Expand
);
966 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
967 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
968 setOperationAction(ISD::FSIN
, MVT::v4f32
, Expand
);
969 setOperationAction(ISD::FCOS
, MVT::v4f32
, Expand
);
970 setOperationAction(ISD::FPOW
, MVT::v4f32
, Expand
);
971 setOperationAction(ISD::FLOG
, MVT::v4f32
, Expand
);
972 setOperationAction(ISD::FLOG2
, MVT::v4f32
, Expand
);
973 setOperationAction(ISD::FLOG10
, MVT::v4f32
, Expand
);
974 setOperationAction(ISD::FEXP
, MVT::v4f32
, Expand
);
975 setOperationAction(ISD::FEXP2
, MVT::v4f32
, Expand
);
977 setOperationAction(ISD::FMINNUM
, MVT::v4f32
, Legal
);
978 setOperationAction(ISD::FMAXNUM
, MVT::v4f32
, Legal
);
980 setIndexedLoadAction(ISD::PRE_INC
, MVT::v4f32
, Legal
);
981 setIndexedStoreAction(ISD::PRE_INC
, MVT::v4f32
, Legal
);
983 addRegisterClass(MVT::v4f32
, &PPC::QSRCRegClass
);
985 setOperationAction(ISD::AND
, MVT::v4i1
, Legal
);
986 setOperationAction(ISD::OR
, MVT::v4i1
, Legal
);
987 setOperationAction(ISD::XOR
, MVT::v4i1
, Legal
);
989 if (!Subtarget
.useCRBits())
990 setOperationAction(ISD::SELECT
, MVT::v4i1
, Expand
);
991 setOperationAction(ISD::VSELECT
, MVT::v4i1
, Legal
);
993 setOperationAction(ISD::LOAD
, MVT::v4i1
, Custom
);
994 setOperationAction(ISD::STORE
, MVT::v4i1
, Custom
);
996 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i1
, Custom
);
997 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i1
, Expand
);
998 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4i1
, Expand
);
999 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4i1
, Expand
);
1000 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i1
, Custom
);
1001 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i1
, Expand
);
1002 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i1
, Custom
);
1004 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i1
, Custom
);
1005 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i1
, Custom
);
1007 addRegisterClass(MVT::v4i1
, &PPC::QBRCRegClass
);
1009 setOperationAction(ISD::FFLOOR
, MVT::v4f64
, Legal
);
1010 setOperationAction(ISD::FCEIL
, MVT::v4f64
, Legal
);
1011 setOperationAction(ISD::FTRUNC
, MVT::v4f64
, Legal
);
1012 setOperationAction(ISD::FROUND
, MVT::v4f64
, Legal
);
1014 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
1015 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
1016 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
1017 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
1019 setOperationAction(ISD::FNEARBYINT
, MVT::v4f64
, Expand
);
1020 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Expand
);
1022 // These need to set FE_INEXACT, and so cannot be vectorized here.
1023 setOperationAction(ISD::FRINT
, MVT::v4f64
, Expand
);
1024 setOperationAction(ISD::FRINT
, MVT::v4f32
, Expand
);
1026 if (TM
.Options
.UnsafeFPMath
) {
1027 setOperationAction(ISD::FDIV
, MVT::v4f64
, Legal
);
1028 setOperationAction(ISD::FSQRT
, MVT::v4f64
, Legal
);
1030 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
1031 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
1033 setOperationAction(ISD::FDIV
, MVT::v4f64
, Expand
);
1034 setOperationAction(ISD::FSQRT
, MVT::v4f64
, Expand
);
1036 setOperationAction(ISD::FDIV
, MVT::v4f32
, Expand
);
1037 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Expand
);
1041 if (Subtarget
.has64BitSupport())
1042 setOperationAction(ISD::PREFETCH
, MVT::Other
, Legal
);
1044 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, isPPC64
? Legal
: Custom
);
1047 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i64
, Expand
);
1048 setOperationAction(ISD::ATOMIC_STORE
, MVT::i64
, Expand
);
1051 setBooleanContents(ZeroOrOneBooleanContent
);
1053 if (Subtarget
.hasAltivec()) {
1054 // Altivec instructions set fields to all zeros or all ones.
1055 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
1059 // These libcalls are not available in 32-bit.
1060 setLibcallName(RTLIB::SHL_I128
, nullptr);
1061 setLibcallName(RTLIB::SRL_I128
, nullptr);
1062 setLibcallName(RTLIB::SRA_I128
, nullptr);
1065 setStackPointerRegisterToSaveRestore(isPPC64
? PPC::X1
: PPC::R1
);
1067 // We have target-specific dag combine patterns for the following nodes:
1068 setTargetDAGCombine(ISD::ADD
);
1069 setTargetDAGCombine(ISD::SHL
);
1070 setTargetDAGCombine(ISD::SRA
);
1071 setTargetDAGCombine(ISD::SRL
);
1072 setTargetDAGCombine(ISD::SINT_TO_FP
);
1073 setTargetDAGCombine(ISD::BUILD_VECTOR
);
1074 if (Subtarget
.hasFPCVT())
1075 setTargetDAGCombine(ISD::UINT_TO_FP
);
1076 setTargetDAGCombine(ISD::LOAD
);
1077 setTargetDAGCombine(ISD::STORE
);
1078 setTargetDAGCombine(ISD::BR_CC
);
1079 if (Subtarget
.useCRBits())
1080 setTargetDAGCombine(ISD::BRCOND
);
1081 setTargetDAGCombine(ISD::BSWAP
);
1082 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN
);
1083 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN
);
1084 setTargetDAGCombine(ISD::INTRINSIC_VOID
);
1086 setTargetDAGCombine(ISD::SIGN_EXTEND
);
1087 setTargetDAGCombine(ISD::ZERO_EXTEND
);
1088 setTargetDAGCombine(ISD::ANY_EXTEND
);
1090 setTargetDAGCombine(ISD::TRUNCATE
);
1092 if (Subtarget
.useCRBits()) {
1093 setTargetDAGCombine(ISD::TRUNCATE
);
1094 setTargetDAGCombine(ISD::SETCC
);
1095 setTargetDAGCombine(ISD::SELECT_CC
);
1098 // Use reciprocal estimates.
1099 if (TM
.Options
.UnsafeFPMath
) {
1100 setTargetDAGCombine(ISD::FDIV
);
1101 setTargetDAGCombine(ISD::FSQRT
);
1104 if (Subtarget
.hasP9Altivec()) {
1105 setTargetDAGCombine(ISD::ABS
);
1106 setTargetDAGCombine(ISD::VSELECT
);
1109 // Darwin long double math library functions have $LDBL128 appended.
1110 if (Subtarget
.isDarwin()) {
1111 setLibcallName(RTLIB::COS_PPCF128
, "cosl$LDBL128");
1112 setLibcallName(RTLIB::POW_PPCF128
, "powl$LDBL128");
1113 setLibcallName(RTLIB::REM_PPCF128
, "fmodl$LDBL128");
1114 setLibcallName(RTLIB::SIN_PPCF128
, "sinl$LDBL128");
1115 setLibcallName(RTLIB::SQRT_PPCF128
, "sqrtl$LDBL128");
1116 setLibcallName(RTLIB::LOG_PPCF128
, "logl$LDBL128");
1117 setLibcallName(RTLIB::LOG2_PPCF128
, "log2l$LDBL128");
1118 setLibcallName(RTLIB::LOG10_PPCF128
, "log10l$LDBL128");
1119 setLibcallName(RTLIB::EXP_PPCF128
, "expl$LDBL128");
1120 setLibcallName(RTLIB::EXP2_PPCF128
, "exp2l$LDBL128");
1123 if (EnableQuadPrecision
) {
1124 setLibcallName(RTLIB::LOG_F128
, "logf128");
1125 setLibcallName(RTLIB::LOG2_F128
, "log2f128");
1126 setLibcallName(RTLIB::LOG10_F128
, "log10f128");
1127 setLibcallName(RTLIB::EXP_F128
, "expf128");
1128 setLibcallName(RTLIB::EXP2_F128
, "exp2f128");
1129 setLibcallName(RTLIB::SIN_F128
, "sinf128");
1130 setLibcallName(RTLIB::COS_F128
, "cosf128");
1131 setLibcallName(RTLIB::POW_F128
, "powf128");
1132 setLibcallName(RTLIB::FMIN_F128
, "fminf128");
1133 setLibcallName(RTLIB::FMAX_F128
, "fmaxf128");
1134 setLibcallName(RTLIB::POWI_F128
, "__powikf2");
1135 setLibcallName(RTLIB::REM_F128
, "fmodf128");
1138 // With 32 condition bits, we don't need to sink (and duplicate) compares
1139 // aggressively in CodeGenPrep.
1140 if (Subtarget
.useCRBits()) {
1141 setHasMultipleConditionRegisters();
1142 setJumpIsExpensive();
1145 setMinFunctionAlignment(2);
1146 if (Subtarget
.isDarwin())
1147 setPrefFunctionAlignment(4);
1149 switch (Subtarget
.getDarwinDirective()) {
1154 case PPC::DIR_E500mc
:
1155 case PPC::DIR_E5500
:
1158 case PPC::DIR_PWR5X
:
1160 case PPC::DIR_PWR6X
:
1164 setPrefFunctionAlignment(4);
1165 setPrefLoopAlignment(4);
1169 if (Subtarget
.enableMachineScheduler())
1170 setSchedulingPreference(Sched::Source
);
1172 setSchedulingPreference(Sched::Hybrid
);
1174 computeRegisterProperties(STI
.getRegisterInfo());
1176 // The Freescale cores do better with aggressive inlining of memcpy and
1177 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1178 if (Subtarget
.getDarwinDirective() == PPC::DIR_E500mc
||
1179 Subtarget
.getDarwinDirective() == PPC::DIR_E5500
) {
1180 MaxStoresPerMemset
= 32;
1181 MaxStoresPerMemsetOptSize
= 16;
1182 MaxStoresPerMemcpy
= 32;
1183 MaxStoresPerMemcpyOptSize
= 8;
1184 MaxStoresPerMemmove
= 32;
1185 MaxStoresPerMemmoveOptSize
= 8;
1186 } else if (Subtarget
.getDarwinDirective() == PPC::DIR_A2
) {
1187 // The A2 also benefits from (very) aggressive inlining of memcpy and
1188 // friends. The overhead of a the function call, even when warm, can be
1189 // over one hundred cycles.
1190 MaxStoresPerMemset
= 128;
1191 MaxStoresPerMemcpy
= 128;
1192 MaxStoresPerMemmove
= 128;
1193 MaxLoadsPerMemcmp
= 128;
1195 MaxLoadsPerMemcmp
= 8;
1196 MaxLoadsPerMemcmpOptSize
= 4;
1200 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1201 /// the desired ByVal argument alignment.
1202 static void getMaxByValAlign(Type
*Ty
, unsigned &MaxAlign
,
1203 unsigned MaxMaxAlign
) {
1204 if (MaxAlign
== MaxMaxAlign
)
1206 if (VectorType
*VTy
= dyn_cast
<VectorType
>(Ty
)) {
1207 if (MaxMaxAlign
>= 32 && VTy
->getBitWidth() >= 256)
1209 else if (VTy
->getBitWidth() >= 128 && MaxAlign
< 16)
1211 } else if (ArrayType
*ATy
= dyn_cast
<ArrayType
>(Ty
)) {
1212 unsigned EltAlign
= 0;
1213 getMaxByValAlign(ATy
->getElementType(), EltAlign
, MaxMaxAlign
);
1214 if (EltAlign
> MaxAlign
)
1215 MaxAlign
= EltAlign
;
1216 } else if (StructType
*STy
= dyn_cast
<StructType
>(Ty
)) {
1217 for (auto *EltTy
: STy
->elements()) {
1218 unsigned EltAlign
= 0;
1219 getMaxByValAlign(EltTy
, EltAlign
, MaxMaxAlign
);
1220 if (EltAlign
> MaxAlign
)
1221 MaxAlign
= EltAlign
;
1222 if (MaxAlign
== MaxMaxAlign
)
1228 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1229 /// function arguments in the caller parameter area.
1230 unsigned PPCTargetLowering::getByValTypeAlignment(Type
*Ty
,
1231 const DataLayout
&DL
) const {
1232 // Darwin passes everything on 4 byte boundary.
1233 if (Subtarget
.isDarwin())
1236 // 16byte and wider vectors are passed on 16byte boundary.
1237 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1238 unsigned Align
= Subtarget
.isPPC64() ? 8 : 4;
1239 if (Subtarget
.hasAltivec() || Subtarget
.hasQPX())
1240 getMaxByValAlign(Ty
, Align
, Subtarget
.hasQPX() ? 32 : 16);
1244 unsigned PPCTargetLowering::getNumRegistersForCallingConv(LLVMContext
&Context
,
1245 CallingConv:: ID CC
,
1247 if (Subtarget
.hasSPE() && VT
== MVT::f64
)
1249 return PPCTargetLowering::getNumRegisters(Context
, VT
);
1252 MVT
PPCTargetLowering::getRegisterTypeForCallingConv(LLVMContext
&Context
,
1253 CallingConv:: ID CC
,
1255 if (Subtarget
.hasSPE() && VT
== MVT::f64
)
1257 return PPCTargetLowering::getRegisterType(Context
, VT
);
1260 bool PPCTargetLowering::useSoftFloat() const {
1261 return Subtarget
.useSoftFloat();
1264 bool PPCTargetLowering::hasSPE() const {
1265 return Subtarget
.hasSPE();
1268 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode
) const {
1269 switch ((PPCISD::NodeType
)Opcode
) {
1270 case PPCISD::FIRST_NUMBER
: break;
1271 case PPCISD::FSEL
: return "PPCISD::FSEL";
1272 case PPCISD::FCFID
: return "PPCISD::FCFID";
1273 case PPCISD::FCFIDU
: return "PPCISD::FCFIDU";
1274 case PPCISD::FCFIDS
: return "PPCISD::FCFIDS";
1275 case PPCISD::FCFIDUS
: return "PPCISD::FCFIDUS";
1276 case PPCISD::FCTIDZ
: return "PPCISD::FCTIDZ";
1277 case PPCISD::FCTIWZ
: return "PPCISD::FCTIWZ";
1278 case PPCISD::FCTIDUZ
: return "PPCISD::FCTIDUZ";
1279 case PPCISD::FCTIWUZ
: return "PPCISD::FCTIWUZ";
1280 case PPCISD::FP_TO_UINT_IN_VSR
:
1281 return "PPCISD::FP_TO_UINT_IN_VSR,";
1282 case PPCISD::FP_TO_SINT_IN_VSR
:
1283 return "PPCISD::FP_TO_SINT_IN_VSR";
1284 case PPCISD::FRE
: return "PPCISD::FRE";
1285 case PPCISD::FRSQRTE
: return "PPCISD::FRSQRTE";
1286 case PPCISD::STFIWX
: return "PPCISD::STFIWX";
1287 case PPCISD::VMADDFP
: return "PPCISD::VMADDFP";
1288 case PPCISD::VNMSUBFP
: return "PPCISD::VNMSUBFP";
1289 case PPCISD::VPERM
: return "PPCISD::VPERM";
1290 case PPCISD::XXSPLT
: return "PPCISD::XXSPLT";
1291 case PPCISD::VECINSERT
: return "PPCISD::VECINSERT";
1292 case PPCISD::XXREVERSE
: return "PPCISD::XXREVERSE";
1293 case PPCISD::XXPERMDI
: return "PPCISD::XXPERMDI";
1294 case PPCISD::VECSHL
: return "PPCISD::VECSHL";
1295 case PPCISD::CMPB
: return "PPCISD::CMPB";
1296 case PPCISD::Hi
: return "PPCISD::Hi";
1297 case PPCISD::Lo
: return "PPCISD::Lo";
1298 case PPCISD::TOC_ENTRY
: return "PPCISD::TOC_ENTRY";
1299 case PPCISD::ATOMIC_CMP_SWAP_8
: return "PPCISD::ATOMIC_CMP_SWAP_8";
1300 case PPCISD::ATOMIC_CMP_SWAP_16
: return "PPCISD::ATOMIC_CMP_SWAP_16";
1301 case PPCISD::DYNALLOC
: return "PPCISD::DYNALLOC";
1302 case PPCISD::DYNAREAOFFSET
: return "PPCISD::DYNAREAOFFSET";
1303 case PPCISD::GlobalBaseReg
: return "PPCISD::GlobalBaseReg";
1304 case PPCISD::SRL
: return "PPCISD::SRL";
1305 case PPCISD::SRA
: return "PPCISD::SRA";
1306 case PPCISD::SHL
: return "PPCISD::SHL";
1307 case PPCISD::SRA_ADDZE
: return "PPCISD::SRA_ADDZE";
1308 case PPCISD::CALL
: return "PPCISD::CALL";
1309 case PPCISD::CALL_NOP
: return "PPCISD::CALL_NOP";
1310 case PPCISD::MTCTR
: return "PPCISD::MTCTR";
1311 case PPCISD::BCTRL
: return "PPCISD::BCTRL";
1312 case PPCISD::BCTRL_LOAD_TOC
: return "PPCISD::BCTRL_LOAD_TOC";
1313 case PPCISD::RET_FLAG
: return "PPCISD::RET_FLAG";
1314 case PPCISD::READ_TIME_BASE
: return "PPCISD::READ_TIME_BASE";
1315 case PPCISD::EH_SJLJ_SETJMP
: return "PPCISD::EH_SJLJ_SETJMP";
1316 case PPCISD::EH_SJLJ_LONGJMP
: return "PPCISD::EH_SJLJ_LONGJMP";
1317 case PPCISD::MFOCRF
: return "PPCISD::MFOCRF";
1318 case PPCISD::MFVSR
: return "PPCISD::MFVSR";
1319 case PPCISD::MTVSRA
: return "PPCISD::MTVSRA";
1320 case PPCISD::MTVSRZ
: return "PPCISD::MTVSRZ";
1321 case PPCISD::SINT_VEC_TO_FP
: return "PPCISD::SINT_VEC_TO_FP";
1322 case PPCISD::UINT_VEC_TO_FP
: return "PPCISD::UINT_VEC_TO_FP";
1323 case PPCISD::ANDIo_1_EQ_BIT
: return "PPCISD::ANDIo_1_EQ_BIT";
1324 case PPCISD::ANDIo_1_GT_BIT
: return "PPCISD::ANDIo_1_GT_BIT";
1325 case PPCISD::VCMP
: return "PPCISD::VCMP";
1326 case PPCISD::VCMPo
: return "PPCISD::VCMPo";
1327 case PPCISD::LBRX
: return "PPCISD::LBRX";
1328 case PPCISD::STBRX
: return "PPCISD::STBRX";
1329 case PPCISD::LFIWAX
: return "PPCISD::LFIWAX";
1330 case PPCISD::LFIWZX
: return "PPCISD::LFIWZX";
1331 case PPCISD::LXSIZX
: return "PPCISD::LXSIZX";
1332 case PPCISD::STXSIX
: return "PPCISD::STXSIX";
1333 case PPCISD::VEXTS
: return "PPCISD::VEXTS";
1334 case PPCISD::SExtVElems
: return "PPCISD::SExtVElems";
1335 case PPCISD::LXVD2X
: return "PPCISD::LXVD2X";
1336 case PPCISD::STXVD2X
: return "PPCISD::STXVD2X";
1337 case PPCISD::ST_VSR_SCAL_INT
:
1338 return "PPCISD::ST_VSR_SCAL_INT";
1339 case PPCISD::COND_BRANCH
: return "PPCISD::COND_BRANCH";
1340 case PPCISD::BDNZ
: return "PPCISD::BDNZ";
1341 case PPCISD::BDZ
: return "PPCISD::BDZ";
1342 case PPCISD::MFFS
: return "PPCISD::MFFS";
1343 case PPCISD::FADDRTZ
: return "PPCISD::FADDRTZ";
1344 case PPCISD::TC_RETURN
: return "PPCISD::TC_RETURN";
1345 case PPCISD::CR6SET
: return "PPCISD::CR6SET";
1346 case PPCISD::CR6UNSET
: return "PPCISD::CR6UNSET";
1347 case PPCISD::PPC32_GOT
: return "PPCISD::PPC32_GOT";
1348 case PPCISD::PPC32_PICGOT
: return "PPCISD::PPC32_PICGOT";
1349 case PPCISD::ADDIS_GOT_TPREL_HA
: return "PPCISD::ADDIS_GOT_TPREL_HA";
1350 case PPCISD::LD_GOT_TPREL_L
: return "PPCISD::LD_GOT_TPREL_L";
1351 case PPCISD::ADD_TLS
: return "PPCISD::ADD_TLS";
1352 case PPCISD::ADDIS_TLSGD_HA
: return "PPCISD::ADDIS_TLSGD_HA";
1353 case PPCISD::ADDI_TLSGD_L
: return "PPCISD::ADDI_TLSGD_L";
1354 case PPCISD::GET_TLS_ADDR
: return "PPCISD::GET_TLS_ADDR";
1355 case PPCISD::ADDI_TLSGD_L_ADDR
: return "PPCISD::ADDI_TLSGD_L_ADDR";
1356 case PPCISD::ADDIS_TLSLD_HA
: return "PPCISD::ADDIS_TLSLD_HA";
1357 case PPCISD::ADDI_TLSLD_L
: return "PPCISD::ADDI_TLSLD_L";
1358 case PPCISD::GET_TLSLD_ADDR
: return "PPCISD::GET_TLSLD_ADDR";
1359 case PPCISD::ADDI_TLSLD_L_ADDR
: return "PPCISD::ADDI_TLSLD_L_ADDR";
1360 case PPCISD::ADDIS_DTPREL_HA
: return "PPCISD::ADDIS_DTPREL_HA";
1361 case PPCISD::ADDI_DTPREL_L
: return "PPCISD::ADDI_DTPREL_L";
1362 case PPCISD::VADD_SPLAT
: return "PPCISD::VADD_SPLAT";
1363 case PPCISD::SC
: return "PPCISD::SC";
1364 case PPCISD::CLRBHRB
: return "PPCISD::CLRBHRB";
1365 case PPCISD::MFBHRBE
: return "PPCISD::MFBHRBE";
1366 case PPCISD::RFEBB
: return "PPCISD::RFEBB";
1367 case PPCISD::XXSWAPD
: return "PPCISD::XXSWAPD";
1368 case PPCISD::SWAP_NO_CHAIN
: return "PPCISD::SWAP_NO_CHAIN";
1369 case PPCISD::VABSD
: return "PPCISD::VABSD";
1370 case PPCISD::QVFPERM
: return "PPCISD::QVFPERM";
1371 case PPCISD::QVGPCI
: return "PPCISD::QVGPCI";
1372 case PPCISD::QVALIGNI
: return "PPCISD::QVALIGNI";
1373 case PPCISD::QVESPLATI
: return "PPCISD::QVESPLATI";
1374 case PPCISD::QBFLT
: return "PPCISD::QBFLT";
1375 case PPCISD::QVLFSb
: return "PPCISD::QVLFSb";
1376 case PPCISD::BUILD_FP128
: return "PPCISD::BUILD_FP128";
1377 case PPCISD::EXTSWSLI
: return "PPCISD::EXTSWSLI";
1382 EVT
PPCTargetLowering::getSetCCResultType(const DataLayout
&DL
, LLVMContext
&C
,
1385 return Subtarget
.useCRBits() ? MVT::i1
: MVT::i32
;
1387 if (Subtarget
.hasQPX())
1388 return EVT::getVectorVT(C
, MVT::i1
, VT
.getVectorNumElements());
1390 return VT
.changeVectorElementTypeToInteger();
1393 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT
) const {
1394 assert(VT
.isFloatingPoint() && "Non-floating-point FMA?");
1398 //===----------------------------------------------------------------------===//
1399 // Node matching predicates, for use by the tblgen matching code.
1400 //===----------------------------------------------------------------------===//
1402 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1403 static bool isFloatingPointZero(SDValue Op
) {
1404 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(Op
))
1405 return CFP
->getValueAPF().isZero();
1406 else if (ISD::isEXTLoad(Op
.getNode()) || ISD::isNON_EXTLoad(Op
.getNode())) {
1407 // Maybe this has already been legalized into the constant pool?
1408 if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(Op
.getOperand(1)))
1409 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CP
->getConstVal()))
1410 return CFP
->getValueAPF().isZero();
1415 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1416 /// true if Op is undef or if it matches the specified value.
1417 static bool isConstantOrUndef(int Op
, int Val
) {
1418 return Op
< 0 || Op
== Val
;
1421 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1422 /// VPKUHUM instruction.
1423 /// The ShuffleKind distinguishes between big-endian operations with
1424 /// two different inputs (0), either-endian operations with two identical
1425 /// inputs (1), and little-endian operations with two different inputs (2).
1426 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1427 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1428 SelectionDAG
&DAG
) {
1429 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1430 if (ShuffleKind
== 0) {
1433 for (unsigned i
= 0; i
!= 16; ++i
)
1434 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+1))
1436 } else if (ShuffleKind
== 2) {
1439 for (unsigned i
= 0; i
!= 16; ++i
)
1440 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2))
1442 } else if (ShuffleKind
== 1) {
1443 unsigned j
= IsLE
? 0 : 1;
1444 for (unsigned i
= 0; i
!= 8; ++i
)
1445 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1446 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
))
1452 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1453 /// VPKUWUM instruction.
1454 /// The ShuffleKind distinguishes between big-endian operations with
1455 /// two different inputs (0), either-endian operations with two identical
1456 /// inputs (1), and little-endian operations with two different inputs (2).
1457 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1458 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1459 SelectionDAG
&DAG
) {
1460 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1461 if (ShuffleKind
== 0) {
1464 for (unsigned i
= 0; i
!= 16; i
+= 2)
1465 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+2) ||
1466 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+3))
1468 } else if (ShuffleKind
== 2) {
1471 for (unsigned i
= 0; i
!= 16; i
+= 2)
1472 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1473 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1))
1475 } else if (ShuffleKind
== 1) {
1476 unsigned j
= IsLE
? 0 : 2;
1477 for (unsigned i
= 0; i
!= 8; i
+= 2)
1478 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1479 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1480 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1481 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1))
1487 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1488 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1489 /// current subtarget.
1491 /// The ShuffleKind distinguishes between big-endian operations with
1492 /// two different inputs (0), either-endian operations with two identical
1493 /// inputs (1), and little-endian operations with two different inputs (2).
1494 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1495 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1496 SelectionDAG
&DAG
) {
1497 const PPCSubtarget
& Subtarget
=
1498 static_cast<const PPCSubtarget
&>(DAG
.getSubtarget());
1499 if (!Subtarget
.hasP8Vector())
1502 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1503 if (ShuffleKind
== 0) {
1506 for (unsigned i
= 0; i
!= 16; i
+= 4)
1507 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+4) ||
1508 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+5) ||
1509 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+6) ||
1510 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+7))
1512 } else if (ShuffleKind
== 2) {
1515 for (unsigned i
= 0; i
!= 16; i
+= 4)
1516 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1517 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1) ||
1518 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+2) ||
1519 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+3))
1521 } else if (ShuffleKind
== 1) {
1522 unsigned j
= IsLE
? 0 : 4;
1523 for (unsigned i
= 0; i
!= 8; i
+= 4)
1524 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1525 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1526 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+j
+2) ||
1527 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+j
+3) ||
1528 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1529 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1) ||
1530 !isConstantOrUndef(N
->getMaskElt(i
+10), i
*2+j
+2) ||
1531 !isConstantOrUndef(N
->getMaskElt(i
+11), i
*2+j
+3))
1537 /// isVMerge - Common function, used to match vmrg* shuffles.
1539 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1540 unsigned LHSStart
, unsigned RHSStart
) {
1541 if (N
->getValueType(0) != MVT::v16i8
)
1543 assert((UnitSize
== 1 || UnitSize
== 2 || UnitSize
== 4) &&
1544 "Unsupported merge size!");
1546 for (unsigned i
= 0; i
!= 8/UnitSize
; ++i
) // Step over units
1547 for (unsigned j
= 0; j
!= UnitSize
; ++j
) { // Step over bytes within unit
1548 if (!isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+j
),
1549 LHSStart
+j
+i
*UnitSize
) ||
1550 !isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+UnitSize
+j
),
1551 RHSStart
+j
+i
*UnitSize
))
1557 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1558 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1559 /// The ShuffleKind distinguishes between big-endian merges with two
1560 /// different inputs (0), either-endian merges with two identical inputs (1),
1561 /// and little-endian merges with two different inputs (2). For the latter,
1562 /// the input operands are swapped (see PPCInstrAltivec.td).
1563 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1564 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1565 if (DAG
.getDataLayout().isLittleEndian()) {
1566 if (ShuffleKind
== 1) // unary
1567 return isVMerge(N
, UnitSize
, 0, 0);
1568 else if (ShuffleKind
== 2) // swapped
1569 return isVMerge(N
, UnitSize
, 0, 16);
1573 if (ShuffleKind
== 1) // unary
1574 return isVMerge(N
, UnitSize
, 8, 8);
1575 else if (ShuffleKind
== 0) // normal
1576 return isVMerge(N
, UnitSize
, 8, 24);
1582 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1583 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1584 /// The ShuffleKind distinguishes between big-endian merges with two
1585 /// different inputs (0), either-endian merges with two identical inputs (1),
1586 /// and little-endian merges with two different inputs (2). For the latter,
1587 /// the input operands are swapped (see PPCInstrAltivec.td).
1588 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1589 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1590 if (DAG
.getDataLayout().isLittleEndian()) {
1591 if (ShuffleKind
== 1) // unary
1592 return isVMerge(N
, UnitSize
, 8, 8);
1593 else if (ShuffleKind
== 2) // swapped
1594 return isVMerge(N
, UnitSize
, 8, 24);
1598 if (ShuffleKind
== 1) // unary
1599 return isVMerge(N
, UnitSize
, 0, 0);
1600 else if (ShuffleKind
== 0) // normal
1601 return isVMerge(N
, UnitSize
, 0, 16);
1608 * Common function used to match vmrgew and vmrgow shuffles
1610 * The indexOffset determines whether to look for even or odd words in
1611 * the shuffle mask. This is based on the of the endianness of the target
1614 * - Use offset of 0 to check for odd elements
1615 * - Use offset of 4 to check for even elements
1617 * - Use offset of 0 to check for even elements
1618 * - Use offset of 4 to check for odd elements
1619 * A detailed description of the vector element ordering for little endian and
1620 * big endian can be found at
1621 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1622 * Targeting your applications - what little endian and big endian IBM XL C/C++
1623 * compiler differences mean to you
1625 * The mask to the shuffle vector instruction specifies the indices of the
1626 * elements from the two input vectors to place in the result. The elements are
1627 * numbered in array-access order, starting with the first vector. These vectors
1628 * are always of type v16i8, thus each vector will contain 16 elements of size
1629 * 8. More info on the shuffle vector can be found in the
1630 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1631 * Language Reference.
1633 * The RHSStartValue indicates whether the same input vectors are used (unary)
1634 * or two different input vectors are used, based on the following:
1635 * - If the instruction uses the same vector for both inputs, the range of the
1636 * indices will be 0 to 15. In this case, the RHSStart value passed should
1638 * - If the instruction has two different vectors then the range of the
1639 * indices will be 0 to 31. In this case, the RHSStart value passed should
1640 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1641 * to 31 specify elements in the second vector).
1643 * \param[in] N The shuffle vector SD Node to analyze
1644 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1645 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1646 * vector to the shuffle_vector instruction
1647 * \return true iff this shuffle vector represents an even or odd word merge
1649 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned IndexOffset
,
1650 unsigned RHSStartValue
) {
1651 if (N
->getValueType(0) != MVT::v16i8
)
1654 for (unsigned i
= 0; i
< 2; ++i
)
1655 for (unsigned j
= 0; j
< 4; ++j
)
1656 if (!isConstantOrUndef(N
->getMaskElt(i
*4+j
),
1657 i
*RHSStartValue
+j
+IndexOffset
) ||
1658 !isConstantOrUndef(N
->getMaskElt(i
*4+j
+8),
1659 i
*RHSStartValue
+j
+IndexOffset
+8))
1665 * Determine if the specified shuffle mask is suitable for the vmrgew or
1666 * vmrgow instructions.
1668 * \param[in] N The shuffle vector SD Node to analyze
1669 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1670 * \param[in] ShuffleKind Identify the type of merge:
1671 * - 0 = big-endian merge with two different inputs;
1672 * - 1 = either-endian merge with two identical inputs;
1673 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1674 * little-endian merges).
1675 * \param[in] DAG The current SelectionDAG
1676 * \return true iff this shuffle mask
1678 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode
*N
, bool CheckEven
,
1679 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1680 if (DAG
.getDataLayout().isLittleEndian()) {
1681 unsigned indexOffset
= CheckEven
? 4 : 0;
1682 if (ShuffleKind
== 1) // Unary
1683 return isVMerge(N
, indexOffset
, 0);
1684 else if (ShuffleKind
== 2) // swapped
1685 return isVMerge(N
, indexOffset
, 16);
1690 unsigned indexOffset
= CheckEven
? 0 : 4;
1691 if (ShuffleKind
== 1) // Unary
1692 return isVMerge(N
, indexOffset
, 0);
1693 else if (ShuffleKind
== 0) // Normal
1694 return isVMerge(N
, indexOffset
, 16);
1701 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1702 /// amount, otherwise return -1.
1703 /// The ShuffleKind distinguishes between big-endian operations with two
1704 /// different inputs (0), either-endian operations with two identical inputs
1705 /// (1), and little-endian operations with two different inputs (2). For the
1706 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1707 int PPC::isVSLDOIShuffleMask(SDNode
*N
, unsigned ShuffleKind
,
1708 SelectionDAG
&DAG
) {
1709 if (N
->getValueType(0) != MVT::v16i8
)
1712 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
1714 // Find the first non-undef value in the shuffle mask.
1716 for (i
= 0; i
!= 16 && SVOp
->getMaskElt(i
) < 0; ++i
)
1719 if (i
== 16) return -1; // all undef.
1721 // Otherwise, check to see if the rest of the elements are consecutively
1722 // numbered from this value.
1723 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
1724 if (ShiftAmt
< i
) return -1;
1727 bool isLE
= DAG
.getDataLayout().isLittleEndian();
1729 if ((ShuffleKind
== 0 && !isLE
) || (ShuffleKind
== 2 && isLE
)) {
1730 // Check the rest of the elements to see if they are consecutive.
1731 for (++i
; i
!= 16; ++i
)
1732 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
1734 } else if (ShuffleKind
== 1) {
1735 // Check the rest of the elements to see if they are consecutive.
1736 for (++i
; i
!= 16; ++i
)
1737 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), (ShiftAmt
+i
) & 15))
1743 ShiftAmt
= 16 - ShiftAmt
;
1748 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1749 /// specifies a splat of a single element that is suitable for input to
1750 /// VSPLTB/VSPLTH/VSPLTW.
1751 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode
*N
, unsigned EltSize
) {
1752 assert(N
->getValueType(0) == MVT::v16i8
&&
1753 (EltSize
== 1 || EltSize
== 2 || EltSize
== 4));
1755 // The consecutive indices need to specify an element, not part of two
1756 // different elements. So abandon ship early if this isn't the case.
1757 if (N
->getMaskElt(0) % EltSize
!= 0)
1760 // This is a splat operation if each element of the permute is the same, and
1761 // if the value doesn't reference the second vector.
1762 unsigned ElementBase
= N
->getMaskElt(0);
1764 // FIXME: Handle UNDEF elements too!
1765 if (ElementBase
>= 16)
1768 // Check that the indices are consecutive, in the case of a multi-byte element
1769 // splatted with a v16i8 mask.
1770 for (unsigned i
= 1; i
!= EltSize
; ++i
)
1771 if (N
->getMaskElt(i
) < 0 || N
->getMaskElt(i
) != (int)(i
+ElementBase
))
1774 for (unsigned i
= EltSize
, e
= 16; i
!= e
; i
+= EltSize
) {
1775 if (N
->getMaskElt(i
) < 0) continue;
1776 for (unsigned j
= 0; j
!= EltSize
; ++j
)
1777 if (N
->getMaskElt(i
+j
) != N
->getMaskElt(j
))
1783 /// Check that the mask is shuffling N byte elements. Within each N byte
1784 /// element of the mask, the indices could be either in increasing or
1785 /// decreasing order as long as they are consecutive.
1786 /// \param[in] N the shuffle vector SD Node to analyze
1787 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1788 /// Word/DoubleWord/QuadWord).
1789 /// \param[in] StepLen the delta indices number among the N byte element, if
1790 /// the mask is in increasing/decreasing order then it is 1/-1.
1791 /// \return true iff the mask is shuffling N byte elements.
1792 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*N
, unsigned Width
,
1794 assert((Width
== 2 || Width
== 4 || Width
== 8 || Width
== 16) &&
1795 "Unexpected element width.");
1796 assert((StepLen
== 1 || StepLen
== -1) && "Unexpected element width.");
1798 unsigned NumOfElem
= 16 / Width
;
1799 unsigned MaskVal
[16]; // Width is never greater than 16
1800 for (unsigned i
= 0; i
< NumOfElem
; ++i
) {
1801 MaskVal
[0] = N
->getMaskElt(i
* Width
);
1802 if ((StepLen
== 1) && (MaskVal
[0] % Width
)) {
1804 } else if ((StepLen
== -1) && ((MaskVal
[0] + 1) % Width
)) {
1808 for (unsigned int j
= 1; j
< Width
; ++j
) {
1809 MaskVal
[j
] = N
->getMaskElt(i
* Width
+ j
);
1810 if (MaskVal
[j
] != MaskVal
[j
-1] + StepLen
) {
1819 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
1820 unsigned &InsertAtByte
, bool &Swap
, bool IsLE
) {
1821 if (!isNByteElemShuffleMask(N
, 4, 1))
1824 // Now we look at mask elements 0,4,8,12
1825 unsigned M0
= N
->getMaskElt(0) / 4;
1826 unsigned M1
= N
->getMaskElt(4) / 4;
1827 unsigned M2
= N
->getMaskElt(8) / 4;
1828 unsigned M3
= N
->getMaskElt(12) / 4;
1829 unsigned LittleEndianShifts
[] = { 2, 1, 0, 3 };
1830 unsigned BigEndianShifts
[] = { 3, 0, 1, 2 };
1832 // Below, let H and L be arbitrary elements of the shuffle mask
1833 // where H is in the range [4,7] and L is in the range [0,3].
1834 // H, 1, 2, 3 or L, 5, 6, 7
1835 if ((M0
> 3 && M1
== 1 && M2
== 2 && M3
== 3) ||
1836 (M0
< 4 && M1
== 5 && M2
== 6 && M3
== 7)) {
1837 ShiftElts
= IsLE
? LittleEndianShifts
[M0
& 0x3] : BigEndianShifts
[M0
& 0x3];
1838 InsertAtByte
= IsLE
? 12 : 0;
1842 // 0, H, 2, 3 or 4, L, 6, 7
1843 if ((M1
> 3 && M0
== 0 && M2
== 2 && M3
== 3) ||
1844 (M1
< 4 && M0
== 4 && M2
== 6 && M3
== 7)) {
1845 ShiftElts
= IsLE
? LittleEndianShifts
[M1
& 0x3] : BigEndianShifts
[M1
& 0x3];
1846 InsertAtByte
= IsLE
? 8 : 4;
1850 // 0, 1, H, 3 or 4, 5, L, 7
1851 if ((M2
> 3 && M0
== 0 && M1
== 1 && M3
== 3) ||
1852 (M2
< 4 && M0
== 4 && M1
== 5 && M3
== 7)) {
1853 ShiftElts
= IsLE
? LittleEndianShifts
[M2
& 0x3] : BigEndianShifts
[M2
& 0x3];
1854 InsertAtByte
= IsLE
? 4 : 8;
1858 // 0, 1, 2, H or 4, 5, 6, L
1859 if ((M3
> 3 && M0
== 0 && M1
== 1 && M2
== 2) ||
1860 (M3
< 4 && M0
== 4 && M1
== 5 && M2
== 6)) {
1861 ShiftElts
= IsLE
? LittleEndianShifts
[M3
& 0x3] : BigEndianShifts
[M3
& 0x3];
1862 InsertAtByte
= IsLE
? 0 : 12;
1867 // If both vector operands for the shuffle are the same vector, the mask will
1868 // contain only elements from the first one and the second one will be undef.
1869 if (N
->getOperand(1).isUndef()) {
1872 unsigned XXINSERTWSrcElem
= IsLE
? 2 : 1;
1873 if (M0
== XXINSERTWSrcElem
&& M1
== 1 && M2
== 2 && M3
== 3) {
1874 InsertAtByte
= IsLE
? 12 : 0;
1877 if (M0
== 0 && M1
== XXINSERTWSrcElem
&& M2
== 2 && M3
== 3) {
1878 InsertAtByte
= IsLE
? 8 : 4;
1881 if (M0
== 0 && M1
== 1 && M2
== XXINSERTWSrcElem
&& M3
== 3) {
1882 InsertAtByte
= IsLE
? 4 : 8;
1885 if (M0
== 0 && M1
== 1 && M2
== 2 && M3
== XXINSERTWSrcElem
) {
1886 InsertAtByte
= IsLE
? 0 : 12;
1894 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
1895 bool &Swap
, bool IsLE
) {
1896 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
1897 // Ensure each byte index of the word is consecutive.
1898 if (!isNByteElemShuffleMask(N
, 4, 1))
1901 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1902 unsigned M0
= N
->getMaskElt(0) / 4;
1903 unsigned M1
= N
->getMaskElt(4) / 4;
1904 unsigned M2
= N
->getMaskElt(8) / 4;
1905 unsigned M3
= N
->getMaskElt(12) / 4;
1907 // If both vector operands for the shuffle are the same vector, the mask will
1908 // contain only elements from the first one and the second one will be undef.
1909 if (N
->getOperand(1).isUndef()) {
1910 assert(M0
< 4 && "Indexing into an undef vector?");
1911 if (M1
!= (M0
+ 1) % 4 || M2
!= (M1
+ 1) % 4 || M3
!= (M2
+ 1) % 4)
1914 ShiftElts
= IsLE
? (4 - M0
) % 4 : M0
;
1919 // Ensure each word index of the ShuffleVector Mask is consecutive.
1920 if (M1
!= (M0
+ 1) % 8 || M2
!= (M1
+ 1) % 8 || M3
!= (M2
+ 1) % 8)
1924 if (M0
== 0 || M0
== 7 || M0
== 6 || M0
== 5) {
1925 // Input vectors don't need to be swapped if the leading element
1926 // of the result is one of the 3 left elements of the second vector
1927 // (or if there is no shift to be done at all).
1929 ShiftElts
= (8 - M0
) % 8;
1930 } else if (M0
== 4 || M0
== 3 || M0
== 2 || M0
== 1) {
1931 // Input vectors need to be swapped if the leading element
1932 // of the result is one of the 3 left elements of the first vector
1933 // (or if we're shifting by 4 - thereby simply swapping the vectors).
1935 ShiftElts
= (4 - M0
) % 4;
1940 if (M0
== 0 || M0
== 1 || M0
== 2 || M0
== 3) {
1941 // Input vectors don't need to be swapped if the leading element
1942 // of the result is one of the 4 elements of the first vector.
1945 } else if (M0
== 4 || M0
== 5 || M0
== 6 || M0
== 7) {
1946 // Input vectors need to be swapped if the leading element
1947 // of the result is one of the 4 elements of the right vector.
1956 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode
*N
, int Width
) {
1957 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
1959 if (!isNByteElemShuffleMask(N
, Width
, -1))
1962 for (int i
= 0; i
< 16; i
+= Width
)
1963 if (N
->getMaskElt(i
) != i
+ Width
- 1)
1969 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode
*N
) {
1970 return isXXBRShuffleMaskHelper(N
, 2);
1973 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode
*N
) {
1974 return isXXBRShuffleMaskHelper(N
, 4);
1977 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode
*N
) {
1978 return isXXBRShuffleMaskHelper(N
, 8);
1981 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode
*N
) {
1982 return isXXBRShuffleMaskHelper(N
, 16);
1985 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
1986 /// if the inputs to the instruction should be swapped and set \p DM to the
1987 /// value for the immediate.
1988 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
1989 /// AND element 0 of the result comes from the first input (LE) or second input
1990 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
1991 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
1993 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &DM
,
1994 bool &Swap
, bool IsLE
) {
1995 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
1997 // Ensure each byte index of the double word is consecutive.
1998 if (!isNByteElemShuffleMask(N
, 8, 1))
2001 unsigned M0
= N
->getMaskElt(0) / 8;
2002 unsigned M1
= N
->getMaskElt(8) / 8;
2003 assert(((M0
| M1
) < 4) && "A mask element out of bounds?");
2005 // If both vector operands for the shuffle are the same vector, the mask will
2006 // contain only elements from the first one and the second one will be undef.
2007 if (N
->getOperand(1).isUndef()) {
2008 if ((M0
| M1
) < 2) {
2009 DM
= IsLE
? (((~M1
) & 1) << 1) + ((~M0
) & 1) : (M0
<< 1) + (M1
& 1);
2017 if (M0
> 1 && M1
< 2) {
2019 } else if (M0
< 2 && M1
> 1) {
2026 // Note: if control flow comes here that means Swap is already set above
2027 DM
= (((~M1
) & 1) << 1) + ((~M0
) & 1);
2030 if (M0
< 2 && M1
> 1) {
2032 } else if (M0
> 1 && M1
< 2) {
2039 // Note: if control flow comes here that means Swap is already set above
2040 DM
= (M0
<< 1) + (M1
& 1);
2046 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2047 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2048 unsigned PPC::getVSPLTImmediate(SDNode
*N
, unsigned EltSize
,
2049 SelectionDAG
&DAG
) {
2050 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2051 assert(isSplatShuffleMask(SVOp
, EltSize
));
2052 if (DAG
.getDataLayout().isLittleEndian())
2053 return (16 / EltSize
) - 1 - (SVOp
->getMaskElt(0) / EltSize
);
2055 return SVOp
->getMaskElt(0) / EltSize
;
2058 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2059 /// by using a vspltis[bhw] instruction of the specified element size, return
2060 /// the constant being splatted. The ByteSize field indicates the number of
2061 /// bytes of each element [124] -> [bhw].
2062 SDValue
PPC::get_VSPLTI_elt(SDNode
*N
, unsigned ByteSize
, SelectionDAG
&DAG
) {
2063 SDValue
OpVal(nullptr, 0);
2065 // If ByteSize of the splat is bigger than the element size of the
2066 // build_vector, then we have a case where we are checking for a splat where
2067 // multiple elements of the buildvector are folded together into a single
2068 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2069 unsigned EltSize
= 16/N
->getNumOperands();
2070 if (EltSize
< ByteSize
) {
2071 unsigned Multiple
= ByteSize
/EltSize
; // Number of BV entries per spltval.
2072 SDValue UniquedVals
[4];
2073 assert(Multiple
> 1 && Multiple
<= 4 && "How can this happen?");
2075 // See if all of the elements in the buildvector agree across.
2076 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2077 if (N
->getOperand(i
).isUndef()) continue;
2078 // If the element isn't a constant, bail fully out.
2079 if (!isa
<ConstantSDNode
>(N
->getOperand(i
))) return SDValue();
2081 if (!UniquedVals
[i
&(Multiple
-1)].getNode())
2082 UniquedVals
[i
&(Multiple
-1)] = N
->getOperand(i
);
2083 else if (UniquedVals
[i
&(Multiple
-1)] != N
->getOperand(i
))
2084 return SDValue(); // no match.
2087 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2088 // either constant or undef values that are identical for each chunk. See
2089 // if these chunks can form into a larger vspltis*.
2091 // Check to see if all of the leading entries are either 0 or -1. If
2092 // neither, then this won't fit into the immediate field.
2093 bool LeadingZero
= true;
2094 bool LeadingOnes
= true;
2095 for (unsigned i
= 0; i
!= Multiple
-1; ++i
) {
2096 if (!UniquedVals
[i
].getNode()) continue; // Must have been undefs.
2098 LeadingZero
&= isNullConstant(UniquedVals
[i
]);
2099 LeadingOnes
&= isAllOnesConstant(UniquedVals
[i
]);
2101 // Finally, check the least significant entry.
2103 if (!UniquedVals
[Multiple
-1].getNode())
2104 return DAG
.getTargetConstant(0, SDLoc(N
), MVT::i32
); // 0,0,0,undef
2105 int Val
= cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getZExtValue();
2106 if (Val
< 16) // 0,0,0,4 -> vspltisw(4)
2107 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2110 if (!UniquedVals
[Multiple
-1].getNode())
2111 return DAG
.getTargetConstant(~0U, SDLoc(N
), MVT::i32
); // -1,-1,-1,undef
2112 int Val
=cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getSExtValue();
2113 if (Val
>= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2114 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2120 // Check to see if this buildvec has a single non-undef value in its elements.
2121 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2122 if (N
->getOperand(i
).isUndef()) continue;
2123 if (!OpVal
.getNode())
2124 OpVal
= N
->getOperand(i
);
2125 else if (OpVal
!= N
->getOperand(i
))
2129 if (!OpVal
.getNode()) return SDValue(); // All UNDEF: use implicit def.
2131 unsigned ValSizeInBytes
= EltSize
;
2133 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(OpVal
)) {
2134 Value
= CN
->getZExtValue();
2135 } else if (ConstantFPSDNode
*CN
= dyn_cast
<ConstantFPSDNode
>(OpVal
)) {
2136 assert(CN
->getValueType(0) == MVT::f32
&& "Only one legal FP vector type!");
2137 Value
= FloatToBits(CN
->getValueAPF().convertToFloat());
2140 // If the splat value is larger than the element value, then we can never do
2141 // this splat. The only case that we could fit the replicated bits into our
2142 // immediate field for would be zero, and we prefer to use vxor for it.
2143 if (ValSizeInBytes
< ByteSize
) return SDValue();
2145 // If the element value is larger than the splat value, check if it consists
2146 // of a repeated bit pattern of size ByteSize.
2147 if (!APInt(ValSizeInBytes
* 8, Value
).isSplat(ByteSize
* 8))
2150 // Properly sign extend the value.
2151 int MaskVal
= SignExtend32(Value
, ByteSize
* 8);
2153 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2154 if (MaskVal
== 0) return SDValue();
2156 // Finally, if this value fits in a 5 bit sext field, return it
2157 if (SignExtend32
<5>(MaskVal
) == MaskVal
)
2158 return DAG
.getTargetConstant(MaskVal
, SDLoc(N
), MVT::i32
);
2162 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2163 /// amount, otherwise return -1.
2164 int PPC::isQVALIGNIShuffleMask(SDNode
*N
) {
2165 EVT VT
= N
->getValueType(0);
2166 if (VT
!= MVT::v4f64
&& VT
!= MVT::v4f32
&& VT
!= MVT::v4i1
)
2169 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2171 // Find the first non-undef value in the shuffle mask.
2173 for (i
= 0; i
!= 4 && SVOp
->getMaskElt(i
) < 0; ++i
)
2176 if (i
== 4) return -1; // all undef.
2178 // Otherwise, check to see if the rest of the elements are consecutively
2179 // numbered from this value.
2180 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
2181 if (ShiftAmt
< i
) return -1;
2184 // Check the rest of the elements to see if they are consecutive.
2185 for (++i
; i
!= 4; ++i
)
2186 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
2192 //===----------------------------------------------------------------------===//
2193 // Addressing Mode Selection
2194 //===----------------------------------------------------------------------===//
2196 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2197 /// or 64-bit immediate, and if the value can be accurately represented as a
2198 /// sign extension from a 16-bit value. If so, this returns true and the
2200 bool llvm::isIntS16Immediate(SDNode
*N
, int16_t &Imm
) {
2201 if (!isa
<ConstantSDNode
>(N
))
2204 Imm
= (int16_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2205 if (N
->getValueType(0) == MVT::i32
)
2206 return Imm
== (int32_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2208 return Imm
== (int64_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2210 bool llvm::isIntS16Immediate(SDValue Op
, int16_t &Imm
) {
2211 return isIntS16Immediate(Op
.getNode(), Imm
);
2214 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2215 /// can be represented as an indexed [r+r] operation. Returns false if it
2216 /// can be more efficiently represented with [r+imm].
2217 bool PPCTargetLowering::SelectAddressRegReg(SDValue N
, SDValue
&Base
,
2219 SelectionDAG
&DAG
) const {
2221 if (N
.getOpcode() == ISD::ADD
) {
2222 if (isIntS16Immediate(N
.getOperand(1), imm
))
2223 return false; // r+i
2224 if (N
.getOperand(1).getOpcode() == PPCISD::Lo
)
2225 return false; // r+i
2227 Base
= N
.getOperand(0);
2228 Index
= N
.getOperand(1);
2230 } else if (N
.getOpcode() == ISD::OR
) {
2231 if (isIntS16Immediate(N
.getOperand(1), imm
))
2232 return false; // r+i can fold it if we can.
2234 // If this is an or of disjoint bitfields, we can codegen this as an add
2235 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2237 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2239 if (LHSKnown
.Zero
.getBoolValue()) {
2240 KnownBits RHSKnown
= DAG
.computeKnownBits(N
.getOperand(1));
2241 // If all of the bits are known zero on the LHS or RHS, the add won't
2243 if (~(LHSKnown
.Zero
| RHSKnown
.Zero
) == 0) {
2244 Base
= N
.getOperand(0);
2245 Index
= N
.getOperand(1);
2254 // If we happen to be doing an i64 load or store into a stack slot that has
2255 // less than a 4-byte alignment, then the frame-index elimination may need to
2256 // use an indexed load or store instruction (because the offset may not be a
2257 // multiple of 4). The extra register needed to hold the offset comes from the
2258 // register scavenger, and it is possible that the scavenger will need to use
2259 // an emergency spill slot. As a result, we need to make sure that a spill slot
2260 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2262 static void fixupFuncForFI(SelectionDAG
&DAG
, int FrameIdx
, EVT VT
) {
2263 // FIXME: This does not handle the LWA case.
2267 // NOTE: We'll exclude negative FIs here, which come from argument
2268 // lowering, because there are no known test cases triggering this problem
2269 // using packed structures (or similar). We can remove this exclusion if
2270 // we find such a test case. The reason why this is so test-case driven is
2271 // because this entire 'fixup' is only to prevent crashes (from the
2272 // register scavenger) on not-really-valid inputs. For example, if we have:
2274 // %b = bitcast i1* %a to i64*
2275 // store i64* a, i64 b
2276 // then the store should really be marked as 'align 1', but is not. If it
2277 // were marked as 'align 1' then the indexed form would have been
2278 // instruction-selected initially, and the problem this 'fixup' is preventing
2279 // won't happen regardless.
2283 MachineFunction
&MF
= DAG
.getMachineFunction();
2284 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2286 unsigned Align
= MFI
.getObjectAlignment(FrameIdx
);
2290 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
2291 FuncInfo
->setHasNonRISpills();
2294 /// Returns true if the address N can be represented by a base register plus
2295 /// a signed 16-bit displacement [r+imm], and if it is not better
2296 /// represented as reg+reg. If \p Alignment is non-zero, only accept
2297 /// displacements that are multiples of that value.
2298 bool PPCTargetLowering::SelectAddressRegImm(SDValue N
, SDValue
&Disp
,
2301 unsigned Alignment
) const {
2302 // FIXME dl should come from parent load or store, not from address
2304 // If this can be more profitably realized as r+r, fail.
2305 if (SelectAddressRegReg(N
, Disp
, Base
, DAG
))
2308 if (N
.getOpcode() == ISD::ADD
) {
2310 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2311 (!Alignment
|| (imm
% Alignment
) == 0)) {
2312 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2313 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2314 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2315 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2317 Base
= N
.getOperand(0);
2319 return true; // [r+i]
2320 } else if (N
.getOperand(1).getOpcode() == PPCISD::Lo
) {
2321 // Match LOAD (ADD (X, Lo(G))).
2322 assert(!cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))->getZExtValue()
2323 && "Cannot handle constant offsets yet!");
2324 Disp
= N
.getOperand(1).getOperand(0); // The global address.
2325 assert(Disp
.getOpcode() == ISD::TargetGlobalAddress
||
2326 Disp
.getOpcode() == ISD::TargetGlobalTLSAddress
||
2327 Disp
.getOpcode() == ISD::TargetConstantPool
||
2328 Disp
.getOpcode() == ISD::TargetJumpTable
);
2329 Base
= N
.getOperand(0);
2330 return true; // [&g+r]
2332 } else if (N
.getOpcode() == ISD::OR
) {
2334 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2335 (!Alignment
|| (imm
% Alignment
) == 0)) {
2336 // If this is an or of disjoint bitfields, we can codegen this as an add
2337 // (for better address arithmetic) if the LHS and RHS of the OR are
2338 // provably disjoint.
2339 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2341 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)imm
) == ~0ULL) {
2342 // If all of the bits are known zero on the LHS or RHS, the add won't
2344 if (FrameIndexSDNode
*FI
=
2345 dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2346 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2347 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2349 Base
= N
.getOperand(0);
2351 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2355 } else if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
)) {
2356 // Loading from a constant address.
2358 // If this address fits entirely in a 16-bit sext immediate field, codegen
2361 if (isIntS16Immediate(CN
, Imm
) && (!Alignment
|| (Imm
% Alignment
) == 0)) {
2362 Disp
= DAG
.getTargetConstant(Imm
, dl
, CN
->getValueType(0));
2363 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2364 CN
->getValueType(0));
2368 // Handle 32-bit sext immediates with LIS + addr mode.
2369 if ((CN
->getValueType(0) == MVT::i32
||
2370 (int64_t)CN
->getZExtValue() == (int)CN
->getZExtValue()) &&
2371 (!Alignment
|| (CN
->getZExtValue() % Alignment
) == 0)) {
2372 int Addr
= (int)CN
->getZExtValue();
2374 // Otherwise, break this down into an LIS + disp.
2375 Disp
= DAG
.getTargetConstant((short)Addr
, dl
, MVT::i32
);
2377 Base
= DAG
.getTargetConstant((Addr
- (signed short)Addr
) >> 16, dl
,
2379 unsigned Opc
= CN
->getValueType(0) == MVT::i32
? PPC::LIS
: PPC::LIS8
;
2380 Base
= SDValue(DAG
.getMachineNode(Opc
, dl
, CN
->getValueType(0), Base
), 0);
2385 Disp
= DAG
.getTargetConstant(0, dl
, getPointerTy(DAG
.getDataLayout()));
2386 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
)) {
2387 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2388 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2391 return true; // [r+0]
2394 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2395 /// represented as an indexed [r+r] operation.
2396 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N
, SDValue
&Base
,
2398 SelectionDAG
&DAG
) const {
2399 // Check to see if we can easily represent this as an [r+r] address. This
2400 // will fail if it thinks that the address is more profitably represented as
2401 // reg+imm, e.g. where imm = 0.
2402 if (SelectAddressRegReg(N
, Base
, Index
, DAG
))
2405 // If the address is the result of an add, we will utilize the fact that the
2406 // address calculation includes an implicit add. However, we can reduce
2407 // register pressure if we do not materialize a constant just for use as the
2408 // index register. We only get rid of the add if it is not an add of a
2409 // value and a 16-bit signed constant and both have a single use.
2411 if (N
.getOpcode() == ISD::ADD
&&
2412 (!isIntS16Immediate(N
.getOperand(1), imm
) ||
2413 !N
.getOperand(1).hasOneUse() || !N
.getOperand(0).hasOneUse())) {
2414 Base
= N
.getOperand(0);
2415 Index
= N
.getOperand(1);
2419 // Otherwise, do it the hard way, using R0 as the base register.
2420 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2426 /// Returns true if we should use a direct load into vector instruction
2427 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2428 static bool usePartialVectorLoads(SDNode
*N
) {
2429 if (!N
->hasOneUse())
2432 // If there are any other uses other than scalar to vector, then we should
2433 // keep it as a scalar load -> direct move pattern to prevent multiple
2434 // loads. Currently, only check for i64 since we have lxsd/lfd to do this
2435 // efficiently, but no update equivalent.
2436 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
2437 EVT MemVT
= LD
->getMemoryVT();
2438 if (MemVT
.isSimple() && MemVT
.getSimpleVT().SimpleTy
== MVT::i64
) {
2439 SDNode
*User
= *(LD
->use_begin());
2440 if (User
->getOpcode() == ISD::SCALAR_TO_VECTOR
)
2448 /// getPreIndexedAddressParts - returns true by value, base pointer and
2449 /// offset pointer and addressing mode by reference if the node's address
2450 /// can be legally represented as pre-indexed load / store address.
2451 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
2453 ISD::MemIndexedMode
&AM
,
2454 SelectionDAG
&DAG
) const {
2455 if (DisablePPCPreinc
) return false;
2461 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
2462 Ptr
= LD
->getBasePtr();
2463 VT
= LD
->getMemoryVT();
2464 Alignment
= LD
->getAlignment();
2465 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
2466 Ptr
= ST
->getBasePtr();
2467 VT
= ST
->getMemoryVT();
2468 Alignment
= ST
->getAlignment();
2473 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2474 // instructions because we can fold these into a more efficient instruction
2475 // instead, (such as LXSD).
2476 if (isLoad
&& usePartialVectorLoads(N
)) {
2480 // PowerPC doesn't have preinc load/store instructions for vectors (except
2481 // for QPX, which does have preinc r+r forms).
2482 if (VT
.isVector()) {
2483 if (!Subtarget
.hasQPX() || (VT
!= MVT::v4f64
&& VT
!= MVT::v4f32
)) {
2485 } else if (SelectAddressRegRegOnly(Ptr
, Offset
, Base
, DAG
)) {
2491 if (SelectAddressRegReg(Ptr
, Base
, Offset
, DAG
)) {
2492 // Common code will reject creating a pre-inc form if the base pointer
2493 // is a frame index, or if N is a store and the base pointer is either
2494 // the same as or a predecessor of the value being stored. Check for
2495 // those situations here, and try with swapped Base/Offset instead.
2498 if (isa
<FrameIndexSDNode
>(Base
) || isa
<RegisterSDNode
>(Base
))
2501 SDValue Val
= cast
<StoreSDNode
>(N
)->getValue();
2502 if (Val
== Base
|| Base
.getNode()->isPredecessorOf(Val
.getNode()))
2507 std::swap(Base
, Offset
);
2513 // LDU/STU can only handle immediates that are a multiple of 4.
2514 if (VT
!= MVT::i64
) {
2515 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, 0))
2518 // LDU/STU need an address with at least 4-byte alignment.
2522 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, 4))
2526 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
2527 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2528 // sext i32 to i64 when addr mode is r+i.
2529 if (LD
->getValueType(0) == MVT::i64
&& LD
->getMemoryVT() == MVT::i32
&&
2530 LD
->getExtensionType() == ISD::SEXTLOAD
&&
2531 isa
<ConstantSDNode
>(Offset
))
2539 //===----------------------------------------------------------------------===//
2540 // LowerOperation implementation
2541 //===----------------------------------------------------------------------===//
2543 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2544 /// and LoOpFlags to the target MO flags.
2545 static void getLabelAccessInfo(bool IsPIC
, const PPCSubtarget
&Subtarget
,
2546 unsigned &HiOpFlags
, unsigned &LoOpFlags
,
2547 const GlobalValue
*GV
= nullptr) {
2548 HiOpFlags
= PPCII::MO_HA
;
2549 LoOpFlags
= PPCII::MO_LO
;
2551 // Don't use the pic base if not in PIC relocation model.
2553 HiOpFlags
|= PPCII::MO_PIC_FLAG
;
2554 LoOpFlags
|= PPCII::MO_PIC_FLAG
;
2557 // If this is a reference to a global value that requires a non-lazy-ptr, make
2558 // sure that instruction lowering adds it.
2559 if (GV
&& Subtarget
.hasLazyResolverStub(GV
)) {
2560 HiOpFlags
|= PPCII::MO_NLP_FLAG
;
2561 LoOpFlags
|= PPCII::MO_NLP_FLAG
;
2563 if (GV
->hasHiddenVisibility()) {
2564 HiOpFlags
|= PPCII::MO_NLP_HIDDEN_FLAG
;
2565 LoOpFlags
|= PPCII::MO_NLP_HIDDEN_FLAG
;
2570 static SDValue
LowerLabelRef(SDValue HiPart
, SDValue LoPart
, bool isPIC
,
2571 SelectionDAG
&DAG
) {
2573 EVT PtrVT
= HiPart
.getValueType();
2574 SDValue Zero
= DAG
.getConstant(0, DL
, PtrVT
);
2576 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, DL
, PtrVT
, HiPart
, Zero
);
2577 SDValue Lo
= DAG
.getNode(PPCISD::Lo
, DL
, PtrVT
, LoPart
, Zero
);
2579 // With PIC, the first instruction is actually "GR+hi(&G)".
2581 Hi
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
2582 DAG
.getNode(PPCISD::GlobalBaseReg
, DL
, PtrVT
), Hi
);
2584 // Generate non-pic code that has direct accesses to the constant pool.
2585 // The address of the global is just (hi(&g)+lo(&g)).
2586 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Hi
, Lo
);
2589 static void setUsesTOCBasePtr(MachineFunction
&MF
) {
2590 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
2591 FuncInfo
->setUsesTOCBasePtr();
2594 static void setUsesTOCBasePtr(SelectionDAG
&DAG
) {
2595 setUsesTOCBasePtr(DAG
.getMachineFunction());
2598 static SDValue
getTOCEntry(SelectionDAG
&DAG
, const SDLoc
&dl
, bool Is64Bit
,
2600 EVT VT
= Is64Bit
? MVT::i64
: MVT::i32
;
2601 SDValue Reg
= Is64Bit
? DAG
.getRegister(PPC::X2
, VT
) :
2602 DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, VT
);
2604 SDValue Ops
[] = { GA
, Reg
};
2605 return DAG
.getMemIntrinsicNode(
2606 PPCISD::TOC_ENTRY
, dl
, DAG
.getVTList(VT
, MVT::Other
), Ops
, VT
,
2607 MachinePointerInfo::getGOT(DAG
.getMachineFunction()), 0,
2608 MachineMemOperand::MOLoad
);
2611 SDValue
PPCTargetLowering::LowerConstantPool(SDValue Op
,
2612 SelectionDAG
&DAG
) const {
2613 EVT PtrVT
= Op
.getValueType();
2614 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
2615 const Constant
*C
= CP
->getConstVal();
2617 // 64-bit SVR4 ABI code is always position-independent.
2618 // The actual address of the GlobalValue is stored in the TOC.
2619 if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64()) {
2620 setUsesTOCBasePtr(DAG
);
2621 SDValue GA
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0);
2622 return getTOCEntry(DAG
, SDLoc(CP
), true, GA
);
2625 unsigned MOHiFlag
, MOLoFlag
;
2626 bool IsPIC
= isPositionIndependent();
2627 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2629 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2630 SDValue GA
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(),
2631 PPCII::MO_PIC_FLAG
);
2632 return getTOCEntry(DAG
, SDLoc(CP
), false, GA
);
2636 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0, MOHiFlag
);
2638 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0, MOLoFlag
);
2639 return LowerLabelRef(CPIHi
, CPILo
, IsPIC
, DAG
);
2642 // For 64-bit PowerPC, prefer the more compact relative encodings.
2643 // This trades 32 bits per jump table entry for one or two instructions
2644 // on the jump site.
2645 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2646 if (isJumpTableRelative())
2647 return MachineJumpTableInfo::EK_LabelDifference32
;
2649 return TargetLowering::getJumpTableEncoding();
2652 bool PPCTargetLowering::isJumpTableRelative() const {
2653 if (Subtarget
.isPPC64())
2655 return TargetLowering::isJumpTableRelative();
2658 SDValue
PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table
,
2659 SelectionDAG
&DAG
) const {
2660 if (!Subtarget
.isPPC64())
2661 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
2663 switch (getTargetMachine().getCodeModel()) {
2664 case CodeModel::Small
:
2665 case CodeModel::Medium
:
2666 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
2668 return DAG
.getNode(PPCISD::GlobalBaseReg
, SDLoc(),
2669 getPointerTy(DAG
.getDataLayout()));
2674 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction
*MF
,
2676 MCContext
&Ctx
) const {
2677 if (!Subtarget
.isPPC64())
2678 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
2680 switch (getTargetMachine().getCodeModel()) {
2681 case CodeModel::Small
:
2682 case CodeModel::Medium
:
2683 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
2685 return MCSymbolRefExpr::create(MF
->getPICBaseSymbol(), Ctx
);
2689 SDValue
PPCTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
2690 EVT PtrVT
= Op
.getValueType();
2691 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
2693 // 64-bit SVR4 ABI code is always position-independent.
2694 // The actual address of the GlobalValue is stored in the TOC.
2695 if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64()) {
2696 setUsesTOCBasePtr(DAG
);
2697 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
2698 return getTOCEntry(DAG
, SDLoc(JT
), true, GA
);
2701 unsigned MOHiFlag
, MOLoFlag
;
2702 bool IsPIC
= isPositionIndependent();
2703 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2705 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2706 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
,
2707 PPCII::MO_PIC_FLAG
);
2708 return getTOCEntry(DAG
, SDLoc(GA
), false, GA
);
2711 SDValue JTIHi
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOHiFlag
);
2712 SDValue JTILo
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOLoFlag
);
2713 return LowerLabelRef(JTIHi
, JTILo
, IsPIC
, DAG
);
2716 SDValue
PPCTargetLowering::LowerBlockAddress(SDValue Op
,
2717 SelectionDAG
&DAG
) const {
2718 EVT PtrVT
= Op
.getValueType();
2719 BlockAddressSDNode
*BASDN
= cast
<BlockAddressSDNode
>(Op
);
2720 const BlockAddress
*BA
= BASDN
->getBlockAddress();
2722 // 64-bit SVR4 ABI code is always position-independent.
2723 // The actual BlockAddress is stored in the TOC.
2724 if (Subtarget
.isSVR4ABI() &&
2725 (Subtarget
.isPPC64() || isPositionIndependent())) {
2726 if (Subtarget
.isPPC64())
2727 setUsesTOCBasePtr(DAG
);
2728 SDValue GA
= DAG
.getTargetBlockAddress(BA
, PtrVT
, BASDN
->getOffset());
2729 return getTOCEntry(DAG
, SDLoc(BASDN
), Subtarget
.isPPC64(), GA
);
2732 unsigned MOHiFlag
, MOLoFlag
;
2733 bool IsPIC
= isPositionIndependent();
2734 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2735 SDValue TgtBAHi
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOHiFlag
);
2736 SDValue TgtBALo
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOLoFlag
);
2737 return LowerLabelRef(TgtBAHi
, TgtBALo
, IsPIC
, DAG
);
2740 SDValue
PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
2741 SelectionDAG
&DAG
) const {
2742 // FIXME: TLS addresses currently use medium model code sequences,
2743 // which is the most useful form. Eventually support for small and
2744 // large models could be added if users need it, at the cost of
2745 // additional complexity.
2746 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
2747 if (DAG
.getTarget().useEmulatedTLS())
2748 return LowerToTLSEmulatedModel(GA
, DAG
);
2751 const GlobalValue
*GV
= GA
->getGlobal();
2752 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2753 bool is64bit
= Subtarget
.isPPC64();
2754 const Module
*M
= DAG
.getMachineFunction().getFunction().getParent();
2755 PICLevel::Level picLevel
= M
->getPICLevel();
2757 TLSModel::Model Model
= getTargetMachine().getTLSModel(GV
);
2759 if (Model
== TLSModel::LocalExec
) {
2760 SDValue TGAHi
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2761 PPCII::MO_TPREL_HA
);
2762 SDValue TGALo
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2763 PPCII::MO_TPREL_LO
);
2764 SDValue TLSReg
= is64bit
? DAG
.getRegister(PPC::X13
, MVT::i64
)
2765 : DAG
.getRegister(PPC::R2
, MVT::i32
);
2767 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, dl
, PtrVT
, TGAHi
, TLSReg
);
2768 return DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, TGALo
, Hi
);
2771 if (Model
== TLSModel::InitialExec
) {
2772 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2773 SDValue TGATLS
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2777 setUsesTOCBasePtr(DAG
);
2778 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2779 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_GOT_TPREL_HA
, dl
,
2780 PtrVT
, GOTReg
, TGA
);
2782 GOTPtr
= DAG
.getNode(PPCISD::PPC32_GOT
, dl
, PtrVT
);
2783 SDValue TPOffset
= DAG
.getNode(PPCISD::LD_GOT_TPREL_L
, dl
,
2784 PtrVT
, TGA
, GOTPtr
);
2785 return DAG
.getNode(PPCISD::ADD_TLS
, dl
, PtrVT
, TPOffset
, TGATLS
);
2788 if (Model
== TLSModel::GeneralDynamic
) {
2789 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2792 setUsesTOCBasePtr(DAG
);
2793 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2794 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSGD_HA
, dl
, PtrVT
,
2797 if (picLevel
== PICLevel::SmallPIC
)
2798 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2800 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2802 return DAG
.getNode(PPCISD::ADDI_TLSGD_L_ADDR
, dl
, PtrVT
,
2806 if (Model
== TLSModel::LocalDynamic
) {
2807 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2810 setUsesTOCBasePtr(DAG
);
2811 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2812 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSLD_HA
, dl
, PtrVT
,
2815 if (picLevel
== PICLevel::SmallPIC
)
2816 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2818 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2820 SDValue TLSAddr
= DAG
.getNode(PPCISD::ADDI_TLSLD_L_ADDR
, dl
,
2821 PtrVT
, GOTPtr
, TGA
, TGA
);
2822 SDValue DtvOffsetHi
= DAG
.getNode(PPCISD::ADDIS_DTPREL_HA
, dl
,
2823 PtrVT
, TLSAddr
, TGA
);
2824 return DAG
.getNode(PPCISD::ADDI_DTPREL_L
, dl
, PtrVT
, DtvOffsetHi
, TGA
);
2827 llvm_unreachable("Unknown TLS model!");
2830 SDValue
PPCTargetLowering::LowerGlobalAddress(SDValue Op
,
2831 SelectionDAG
&DAG
) const {
2832 EVT PtrVT
= Op
.getValueType();
2833 GlobalAddressSDNode
*GSDN
= cast
<GlobalAddressSDNode
>(Op
);
2835 const GlobalValue
*GV
= GSDN
->getGlobal();
2837 // 64-bit SVR4 ABI code is always position-independent.
2838 // The actual address of the GlobalValue is stored in the TOC.
2839 if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64()) {
2840 setUsesTOCBasePtr(DAG
);
2841 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset());
2842 return getTOCEntry(DAG
, DL
, true, GA
);
2845 unsigned MOHiFlag
, MOLoFlag
;
2846 bool IsPIC
= isPositionIndependent();
2847 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
, GV
);
2849 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2850 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
,
2852 PPCII::MO_PIC_FLAG
);
2853 return getTOCEntry(DAG
, DL
, false, GA
);
2857 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOHiFlag
);
2859 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOLoFlag
);
2861 SDValue Ptr
= LowerLabelRef(GAHi
, GALo
, IsPIC
, DAG
);
2863 // If the global reference is actually to a non-lazy-pointer, we have to do an
2864 // extra load to get the address of the global.
2865 if (MOHiFlag
& PPCII::MO_NLP_FLAG
)
2866 Ptr
= DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
2870 SDValue
PPCTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const {
2871 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
2874 if (Op
.getValueType() == MVT::v2i64
) {
2875 // When the operands themselves are v2i64 values, we need to do something
2876 // special because VSX has no underlying comparison operations for these.
2877 if (Op
.getOperand(0).getValueType() == MVT::v2i64
) {
2878 // Equality can be handled by casting to the legal type for Altivec
2879 // comparisons, everything else needs to be expanded.
2880 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
2881 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
,
2882 DAG
.getSetCC(dl
, MVT::v4i32
,
2883 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, Op
.getOperand(0)),
2884 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, Op
.getOperand(1)),
2891 // We handle most of these in the usual way.
2895 // If we're comparing for equality to zero, expose the fact that this is
2896 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2897 // fold the new nodes.
2898 if (SDValue V
= lowerCmpEqZeroToCtlzSrl(Op
, DAG
))
2901 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
2902 // Leave comparisons against 0 and -1 alone for now, since they're usually
2903 // optimized. FIXME: revisit this when we can custom lower all setcc
2905 if (C
->isAllOnesValue() || C
->isNullValue())
2909 // If we have an integer seteq/setne, turn it into a compare against zero
2910 // by xor'ing the rhs with the lhs, which is faster than setting a
2911 // condition register, reading it back out, and masking the correct bit. The
2912 // normal approach here uses sub to do this instead of xor. Using xor exposes
2913 // the result to other bit-twiddling opportunities.
2914 EVT LHSVT
= Op
.getOperand(0).getValueType();
2915 if (LHSVT
.isInteger() && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
)) {
2916 EVT VT
= Op
.getValueType();
2917 SDValue Sub
= DAG
.getNode(ISD::XOR
, dl
, LHSVT
, Op
.getOperand(0),
2919 return DAG
.getSetCC(dl
, VT
, Sub
, DAG
.getConstant(0, dl
, LHSVT
), CC
);
2924 SDValue
PPCTargetLowering::LowerVAARG(SDValue Op
, SelectionDAG
&DAG
) const {
2925 SDNode
*Node
= Op
.getNode();
2926 EVT VT
= Node
->getValueType(0);
2927 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2928 SDValue InChain
= Node
->getOperand(0);
2929 SDValue VAListPtr
= Node
->getOperand(1);
2930 const Value
*SV
= cast
<SrcValueSDNode
>(Node
->getOperand(2))->getValue();
2933 assert(!Subtarget
.isPPC64() && "LowerVAARG is PPC32 only");
2936 SDValue GprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
2937 VAListPtr
, MachinePointerInfo(SV
), MVT::i8
);
2938 InChain
= GprIndex
.getValue(1);
2940 if (VT
== MVT::i64
) {
2941 // Check if GprIndex is even
2942 SDValue GprAnd
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, GprIndex
,
2943 DAG
.getConstant(1, dl
, MVT::i32
));
2944 SDValue CC64
= DAG
.getSetCC(dl
, MVT::i32
, GprAnd
,
2945 DAG
.getConstant(0, dl
, MVT::i32
), ISD::SETNE
);
2946 SDValue GprIndexPlusOne
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, GprIndex
,
2947 DAG
.getConstant(1, dl
, MVT::i32
));
2948 // Align GprIndex to be even if it isn't
2949 GprIndex
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC64
, GprIndexPlusOne
,
2953 // fpr index is 1 byte after gpr
2954 SDValue FprPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
2955 DAG
.getConstant(1, dl
, MVT::i32
));
2958 SDValue FprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
2959 FprPtr
, MachinePointerInfo(SV
), MVT::i8
);
2960 InChain
= FprIndex
.getValue(1);
2962 SDValue RegSaveAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
2963 DAG
.getConstant(8, dl
, MVT::i32
));
2965 SDValue OverflowAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
2966 DAG
.getConstant(4, dl
, MVT::i32
));
2969 SDValue OverflowArea
=
2970 DAG
.getLoad(MVT::i32
, dl
, InChain
, OverflowAreaPtr
, MachinePointerInfo());
2971 InChain
= OverflowArea
.getValue(1);
2973 SDValue RegSaveArea
=
2974 DAG
.getLoad(MVT::i32
, dl
, InChain
, RegSaveAreaPtr
, MachinePointerInfo());
2975 InChain
= RegSaveArea
.getValue(1);
2977 // select overflow_area if index > 8
2978 SDValue CC
= DAG
.getSetCC(dl
, MVT::i32
, VT
.isInteger() ? GprIndex
: FprIndex
,
2979 DAG
.getConstant(8, dl
, MVT::i32
), ISD::SETLT
);
2981 // adjustment constant gpr_index * 4/8
2982 SDValue RegConstant
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
,
2983 VT
.isInteger() ? GprIndex
: FprIndex
,
2984 DAG
.getConstant(VT
.isInteger() ? 4 : 8, dl
,
2987 // OurReg = RegSaveArea + RegConstant
2988 SDValue OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, RegSaveArea
,
2991 // Floating types are 32 bytes into RegSaveArea
2992 if (VT
.isFloatingPoint())
2993 OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OurReg
,
2994 DAG
.getConstant(32, dl
, MVT::i32
));
2996 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
2997 SDValue IndexPlus1
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
,
2998 VT
.isInteger() ? GprIndex
: FprIndex
,
2999 DAG
.getConstant(VT
== MVT::i64
? 2 : 1, dl
,
3002 InChain
= DAG
.getTruncStore(InChain
, dl
, IndexPlus1
,
3003 VT
.isInteger() ? VAListPtr
: FprPtr
,
3004 MachinePointerInfo(SV
), MVT::i8
);
3006 // determine if we should load from reg_save_area or overflow_area
3007 SDValue Result
= DAG
.getNode(ISD::SELECT
, dl
, PtrVT
, CC
, OurReg
, OverflowArea
);
3009 // increase overflow_area by 4/8 if gpr/fpr > 8
3010 SDValue OverflowAreaPlusN
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OverflowArea
,
3011 DAG
.getConstant(VT
.isInteger() ? 4 : 8,
3014 OverflowArea
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC
, OverflowArea
,
3017 InChain
= DAG
.getTruncStore(InChain
, dl
, OverflowArea
, OverflowAreaPtr
,
3018 MachinePointerInfo(), MVT::i32
);
3020 return DAG
.getLoad(VT
, dl
, InChain
, Result
, MachinePointerInfo());
3023 SDValue
PPCTargetLowering::LowerVACOPY(SDValue Op
, SelectionDAG
&DAG
) const {
3024 assert(!Subtarget
.isPPC64() && "LowerVACOPY is PPC32 only");
3026 // We have to copy the entire va_list struct:
3027 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3028 return DAG
.getMemcpy(Op
.getOperand(0), Op
,
3029 Op
.getOperand(1), Op
.getOperand(2),
3030 DAG
.getConstant(12, SDLoc(Op
), MVT::i32
), 8, false, true,
3031 false, MachinePointerInfo(), MachinePointerInfo());
3034 SDValue
PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op
,
3035 SelectionDAG
&DAG
) const {
3036 return Op
.getOperand(0);
3039 SDValue
PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op
,
3040 SelectionDAG
&DAG
) const {
3041 SDValue Chain
= Op
.getOperand(0);
3042 SDValue Trmp
= Op
.getOperand(1); // trampoline
3043 SDValue FPtr
= Op
.getOperand(2); // nested function
3044 SDValue Nest
= Op
.getOperand(3); // 'nest' parameter value
3047 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3048 bool isPPC64
= (PtrVT
== MVT::i64
);
3049 Type
*IntPtrTy
= DAG
.getDataLayout().getIntPtrType(*DAG
.getContext());
3051 TargetLowering::ArgListTy Args
;
3052 TargetLowering::ArgListEntry Entry
;
3054 Entry
.Ty
= IntPtrTy
;
3055 Entry
.Node
= Trmp
; Args
.push_back(Entry
);
3057 // TrampSize == (isPPC64 ? 48 : 40);
3058 Entry
.Node
= DAG
.getConstant(isPPC64
? 48 : 40, dl
,
3059 isPPC64
? MVT::i64
: MVT::i32
);
3060 Args
.push_back(Entry
);
3062 Entry
.Node
= FPtr
; Args
.push_back(Entry
);
3063 Entry
.Node
= Nest
; Args
.push_back(Entry
);
3065 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3066 TargetLowering::CallLoweringInfo
CLI(DAG
);
3067 CLI
.setDebugLoc(dl
).setChain(Chain
).setLibCallee(
3068 CallingConv::C
, Type::getVoidTy(*DAG
.getContext()),
3069 DAG
.getExternalSymbol("__trampoline_setup", PtrVT
), std::move(Args
));
3071 std::pair
<SDValue
, SDValue
> CallResult
= LowerCallTo(CLI
);
3072 return CallResult
.second
;
3075 SDValue
PPCTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
3076 MachineFunction
&MF
= DAG
.getMachineFunction();
3077 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3078 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3082 if (Subtarget
.isDarwinABI() || Subtarget
.isPPC64()) {
3083 // vastart just stores the address of the VarArgsFrameIndex slot into the
3084 // memory location argument.
3085 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
3086 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3087 return DAG
.getStore(Op
.getOperand(0), dl
, FR
, Op
.getOperand(1),
3088 MachinePointerInfo(SV
));
3091 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3092 // We suppose the given va_list is already allocated.
3095 // char gpr; /* index into the array of 8 GPRs
3096 // * stored in the register save area
3097 // * gpr=0 corresponds to r3,
3098 // * gpr=1 to r4, etc.
3100 // char fpr; /* index into the array of 8 FPRs
3101 // * stored in the register save area
3102 // * fpr=0 corresponds to f1,
3103 // * fpr=1 to f2, etc.
3105 // char *overflow_arg_area;
3106 // /* location on stack that holds
3107 // * the next overflow argument
3109 // char *reg_save_area;
3110 // /* where r3:r10 and f1:f8 (if saved)
3115 SDValue ArgGPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumGPR(), dl
, MVT::i32
);
3116 SDValue ArgFPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumFPR(), dl
, MVT::i32
);
3117 SDValue StackOffsetFI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsStackOffset(),
3119 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
3122 uint64_t FrameOffset
= PtrVT
.getSizeInBits()/8;
3123 SDValue ConstFrameOffset
= DAG
.getConstant(FrameOffset
, dl
, PtrVT
);
3125 uint64_t StackOffset
= PtrVT
.getSizeInBits()/8 - 1;
3126 SDValue ConstStackOffset
= DAG
.getConstant(StackOffset
, dl
, PtrVT
);
3128 uint64_t FPROffset
= 1;
3129 SDValue ConstFPROffset
= DAG
.getConstant(FPROffset
, dl
, PtrVT
);
3131 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3133 // Store first byte : number of int regs
3134 SDValue firstStore
=
3135 DAG
.getTruncStore(Op
.getOperand(0), dl
, ArgGPR
, Op
.getOperand(1),
3136 MachinePointerInfo(SV
), MVT::i8
);
3137 uint64_t nextOffset
= FPROffset
;
3138 SDValue nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Op
.getOperand(1),
3141 // Store second byte : number of float regs
3142 SDValue secondStore
=
3143 DAG
.getTruncStore(firstStore
, dl
, ArgFPR
, nextPtr
,
3144 MachinePointerInfo(SV
, nextOffset
), MVT::i8
);
3145 nextOffset
+= StackOffset
;
3146 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstStackOffset
);
3148 // Store second word : arguments given on stack
3149 SDValue thirdStore
= DAG
.getStore(secondStore
, dl
, StackOffsetFI
, nextPtr
,
3150 MachinePointerInfo(SV
, nextOffset
));
3151 nextOffset
+= FrameOffset
;
3152 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstFrameOffset
);
3154 // Store third word : arguments given in registers
3155 return DAG
.getStore(thirdStore
, dl
, FR
, nextPtr
,
3156 MachinePointerInfo(SV
, nextOffset
));
3159 /// FPR - The set of FP registers that should be allocated for arguments,
3161 static const MCPhysReg FPR
[] = {PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
,
3162 PPC::F6
, PPC::F7
, PPC::F8
, PPC::F9
, PPC::F10
,
3163 PPC::F11
, PPC::F12
, PPC::F13
};
3165 /// QFPR - The set of QPX registers that should be allocated for arguments.
3166 static const MCPhysReg QFPR
[] = {
3167 PPC::QF1
, PPC::QF2
, PPC::QF3
, PPC::QF4
, PPC::QF5
, PPC::QF6
, PPC::QF7
,
3168 PPC::QF8
, PPC::QF9
, PPC::QF10
, PPC::QF11
, PPC::QF12
, PPC::QF13
};
3170 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3172 static unsigned CalculateStackSlotSize(EVT ArgVT
, ISD::ArgFlagsTy Flags
,
3173 unsigned PtrByteSize
) {
3174 unsigned ArgSize
= ArgVT
.getStoreSize();
3175 if (Flags
.isByVal())
3176 ArgSize
= Flags
.getByValSize();
3178 // Round up to multiples of the pointer size, except for array members,
3179 // which are always packed.
3180 if (!Flags
.isInConsecutiveRegs())
3181 ArgSize
= ((ArgSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3186 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3188 static unsigned CalculateStackSlotAlignment(EVT ArgVT
, EVT OrigVT
,
3189 ISD::ArgFlagsTy Flags
,
3190 unsigned PtrByteSize
) {
3191 unsigned Align
= PtrByteSize
;
3193 // Altivec parameters are padded to a 16 byte boundary.
3194 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
3195 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
3196 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
3197 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
3199 // QPX vector types stored in double-precision are padded to a 32 byte
3201 else if (ArgVT
== MVT::v4f64
|| ArgVT
== MVT::v4i1
)
3204 // ByVal parameters are aligned as requested.
3205 if (Flags
.isByVal()) {
3206 unsigned BVAlign
= Flags
.getByValAlign();
3207 if (BVAlign
> PtrByteSize
) {
3208 if (BVAlign
% PtrByteSize
!= 0)
3210 "ByVal alignment is not a multiple of the pointer size");
3216 // Array members are always packed to their original alignment.
3217 if (Flags
.isInConsecutiveRegs()) {
3218 // If the array member was split into multiple registers, the first
3219 // needs to be aligned to the size of the full type. (Except for
3220 // ppcf128, which is only aligned as its f64 components.)
3221 if (Flags
.isSplit() && OrigVT
!= MVT::ppcf128
)
3222 Align
= OrigVT
.getStoreSize();
3224 Align
= ArgVT
.getStoreSize();
3230 /// CalculateStackSlotUsed - Return whether this argument will use its
3231 /// stack slot (instead of being passed in registers). ArgOffset,
3232 /// AvailableFPRs, and AvailableVRs must hold the current argument
3233 /// position, and will be updated to account for this argument.
3234 static bool CalculateStackSlotUsed(EVT ArgVT
, EVT OrigVT
,
3235 ISD::ArgFlagsTy Flags
,
3236 unsigned PtrByteSize
,
3237 unsigned LinkageSize
,
3238 unsigned ParamAreaSize
,
3239 unsigned &ArgOffset
,
3240 unsigned &AvailableFPRs
,
3241 unsigned &AvailableVRs
, bool HasQPX
) {
3242 bool UseMemory
= false;
3244 // Respect alignment of argument on the stack.
3246 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
3247 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
3248 // If there's no space left in the argument save area, we must
3249 // use memory (this check also catches zero-sized arguments).
3250 if (ArgOffset
>= LinkageSize
+ ParamAreaSize
)
3253 // Allocate argument on the stack.
3254 ArgOffset
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
3255 if (Flags
.isInConsecutiveRegsLast())
3256 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3257 // If we overran the argument save area, we must use memory
3258 // (this check catches arguments passed partially in memory)
3259 if (ArgOffset
> LinkageSize
+ ParamAreaSize
)
3262 // However, if the argument is actually passed in an FPR or a VR,
3263 // we don't use memory after all.
3264 if (!Flags
.isByVal()) {
3265 if (ArgVT
== MVT::f32
|| ArgVT
== MVT::f64
||
3266 // QPX registers overlap with the scalar FP registers.
3267 (HasQPX
&& (ArgVT
== MVT::v4f32
||
3268 ArgVT
== MVT::v4f64
||
3269 ArgVT
== MVT::v4i1
)))
3270 if (AvailableFPRs
> 0) {
3274 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
3275 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
3276 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
3277 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
3278 if (AvailableVRs
> 0) {
3287 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3288 /// ensure minimum alignment required for target.
3289 static unsigned EnsureStackAlignment(const PPCFrameLowering
*Lowering
,
3290 unsigned NumBytes
) {
3291 unsigned TargetAlign
= Lowering
->getStackAlignment();
3292 unsigned AlignMask
= TargetAlign
- 1;
3293 NumBytes
= (NumBytes
+ AlignMask
) & ~AlignMask
;
3297 SDValue
PPCTargetLowering::LowerFormalArguments(
3298 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3299 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3300 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3301 if (Subtarget
.isSVR4ABI()) {
3302 if (Subtarget
.isPPC64())
3303 return LowerFormalArguments_64SVR4(Chain
, CallConv
, isVarArg
, Ins
,
3306 return LowerFormalArguments_32SVR4(Chain
, CallConv
, isVarArg
, Ins
,
3309 return LowerFormalArguments_Darwin(Chain
, CallConv
, isVarArg
, Ins
,
3314 SDValue
PPCTargetLowering::LowerFormalArguments_32SVR4(
3315 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3316 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3317 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3319 // 32-bit SVR4 ABI Stack Frame Layout:
3320 // +-----------------------------------+
3321 // +--> | Back chain |
3322 // | +-----------------------------------+
3323 // | | Floating-point register save area |
3324 // | +-----------------------------------+
3325 // | | General register save area |
3326 // | +-----------------------------------+
3327 // | | CR save word |
3328 // | +-----------------------------------+
3329 // | | VRSAVE save word |
3330 // | +-----------------------------------+
3331 // | | Alignment padding |
3332 // | +-----------------------------------+
3333 // | | Vector register save area |
3334 // | +-----------------------------------+
3335 // | | Local variable space |
3336 // | +-----------------------------------+
3337 // | | Parameter list area |
3338 // | +-----------------------------------+
3339 // | | LR save word |
3340 // | +-----------------------------------+
3341 // SP--> +--- | Back chain |
3342 // +-----------------------------------+
3345 // System V Application Binary Interface PowerPC Processor Supplement
3346 // AltiVec Technology Programming Interface Manual
3348 MachineFunction
&MF
= DAG
.getMachineFunction();
3349 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3350 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3352 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3353 // Potential tail calls could cause overwriting of argument stack slots.
3354 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
3355 (CallConv
== CallingConv::Fast
));
3356 unsigned PtrByteSize
= 4;
3358 // Assign locations to all of the incoming arguments.
3359 SmallVector
<CCValAssign
, 16> ArgLocs
;
3360 PPCCCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), ArgLocs
,
3363 // Reserve space for the linkage area on the stack.
3364 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
3365 CCInfo
.AllocateStack(LinkageSize
, PtrByteSize
);
3366 if (useSoftFloat() || hasSPE())
3367 CCInfo
.PreAnalyzeFormalArguments(Ins
);
3369 CCInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4
);
3370 CCInfo
.clearWasPPCF128();
3372 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
3373 CCValAssign
&VA
= ArgLocs
[i
];
3375 // Arguments stored in registers.
3376 if (VA
.isRegLoc()) {
3377 const TargetRegisterClass
*RC
;
3378 EVT ValVT
= VA
.getValVT();
3380 switch (ValVT
.getSimpleVT().SimpleTy
) {
3382 llvm_unreachable("ValVT not supported by formal arguments Lowering");
3385 RC
= &PPC::GPRCRegClass
;
3388 if (Subtarget
.hasP8Vector())
3389 RC
= &PPC::VSSRCRegClass
;
3390 else if (Subtarget
.hasSPE())
3391 RC
= &PPC::SPE4RCRegClass
;
3393 RC
= &PPC::F4RCRegClass
;
3396 if (Subtarget
.hasVSX())
3397 RC
= &PPC::VSFRCRegClass
;
3398 else if (Subtarget
.hasSPE())
3399 RC
= &PPC::SPERCRegClass
;
3401 RC
= &PPC::F8RCRegClass
;
3406 RC
= &PPC::VRRCRegClass
;
3409 RC
= Subtarget
.hasQPX() ? &PPC::QSRCRegClass
: &PPC::VRRCRegClass
;
3413 RC
= &PPC::VRRCRegClass
;
3416 RC
= &PPC::QFRCRegClass
;
3419 RC
= &PPC::QBRCRegClass
;
3423 // Transform the arguments stored in physical registers into virtual ones.
3424 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
3425 SDValue ArgValue
= DAG
.getCopyFromReg(Chain
, dl
, Reg
,
3426 ValVT
== MVT::i1
? MVT::i32
: ValVT
);
3428 if (ValVT
== MVT::i1
)
3429 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, ArgValue
);
3431 InVals
.push_back(ArgValue
);
3433 // Argument stored in memory.
3434 assert(VA
.isMemLoc());
3436 // Get the extended size of the argument type in stack
3437 unsigned ArgSize
= VA
.getLocVT().getStoreSize();
3438 // Get the actual size of the argument type
3439 unsigned ObjSize
= VA
.getValVT().getStoreSize();
3440 unsigned ArgOffset
= VA
.getLocMemOffset();
3441 // Stack objects in PPC32 are right justified.
3442 ArgOffset
+= ArgSize
- ObjSize
;
3443 int FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, isImmutable
);
3445 // Create load nodes to retrieve arguments from the stack.
3446 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3448 DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
, MachinePointerInfo()));
3452 // Assign locations to all of the incoming aggregate by value arguments.
3453 // Aggregates passed by value are stored in the local variable space of the
3454 // caller's stack frame, right above the parameter list area.
3455 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
3456 CCState
CCByValInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
3457 ByValArgLocs
, *DAG
.getContext());
3459 // Reserve stack space for the allocations in CCInfo.
3460 CCByValInfo
.AllocateStack(CCInfo
.getNextStackOffset(), PtrByteSize
);
3462 CCByValInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4_ByVal
);
3464 // Area that is at least reserved in the caller of this function.
3465 unsigned MinReservedArea
= CCByValInfo
.getNextStackOffset();
3466 MinReservedArea
= std::max(MinReservedArea
, LinkageSize
);
3468 // Set the size that is at least reserved in caller of this function. Tail
3469 // call optimized function's reserved stack space needs to be aligned so that
3470 // taking the difference between two stack areas will result in an aligned
3473 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
3474 FuncInfo
->setMinReservedArea(MinReservedArea
);
3476 SmallVector
<SDValue
, 8> MemOps
;
3478 // If the function takes variable number of arguments, make a frame index for
3479 // the start of the first vararg value... for expansion of llvm.va_start.
3481 static const MCPhysReg GPArgRegs
[] = {
3482 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
3483 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
3485 const unsigned NumGPArgRegs
= array_lengthof(GPArgRegs
);
3487 static const MCPhysReg FPArgRegs
[] = {
3488 PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
, PPC::F6
, PPC::F7
,
3491 unsigned NumFPArgRegs
= array_lengthof(FPArgRegs
);
3493 if (useSoftFloat() || hasSPE())
3496 FuncInfo
->setVarArgsNumGPR(CCInfo
.getFirstUnallocated(GPArgRegs
));
3497 FuncInfo
->setVarArgsNumFPR(CCInfo
.getFirstUnallocated(FPArgRegs
));
3499 // Make room for NumGPArgRegs and NumFPArgRegs.
3500 int Depth
= NumGPArgRegs
* PtrVT
.getSizeInBits()/8 +
3501 NumFPArgRegs
* MVT(MVT::f64
).getSizeInBits()/8;
3503 FuncInfo
->setVarArgsStackOffset(
3504 MFI
.CreateFixedObject(PtrVT
.getSizeInBits()/8,
3505 CCInfo
.getNextStackOffset(), true));
3507 FuncInfo
->setVarArgsFrameIndex(MFI
.CreateStackObject(Depth
, 8, false));
3508 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
3510 // The fixed integer arguments of a variadic function are stored to the
3511 // VarArgsFrameIndex on the stack so that they may be loaded by
3512 // dereferencing the result of va_next.
3513 for (unsigned GPRIndex
= 0; GPRIndex
!= NumGPArgRegs
; ++GPRIndex
) {
3514 // Get an existing live-in vreg, or add a new one.
3515 unsigned VReg
= MF
.getRegInfo().getLiveInVirtReg(GPArgRegs
[GPRIndex
]);
3517 VReg
= MF
.addLiveIn(GPArgRegs
[GPRIndex
], &PPC::GPRCRegClass
);
3519 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3521 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
3522 MemOps
.push_back(Store
);
3523 // Increment the address by four for the next argument to store
3524 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, dl
, PtrVT
);
3525 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
3528 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3530 // The double arguments are stored to the VarArgsFrameIndex
3532 for (unsigned FPRIndex
= 0; FPRIndex
!= NumFPArgRegs
; ++FPRIndex
) {
3533 // Get an existing live-in vreg, or add a new one.
3534 unsigned VReg
= MF
.getRegInfo().getLiveInVirtReg(FPArgRegs
[FPRIndex
]);
3536 VReg
= MF
.addLiveIn(FPArgRegs
[FPRIndex
], &PPC::F8RCRegClass
);
3538 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::f64
);
3540 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
3541 MemOps
.push_back(Store
);
3542 // Increment the address by eight for the next argument to store
3543 SDValue PtrOff
= DAG
.getConstant(MVT(MVT::f64
).getSizeInBits()/8, dl
,
3545 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
3549 if (!MemOps
.empty())
3550 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
3555 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3556 // value to MVT::i64 and then truncate to the correct register size.
3557 SDValue
PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags
,
3558 EVT ObjectVT
, SelectionDAG
&DAG
,
3560 const SDLoc
&dl
) const {
3562 ArgVal
= DAG
.getNode(ISD::AssertSext
, dl
, MVT::i64
, ArgVal
,
3563 DAG
.getValueType(ObjectVT
));
3564 else if (Flags
.isZExt())
3565 ArgVal
= DAG
.getNode(ISD::AssertZext
, dl
, MVT::i64
, ArgVal
,
3566 DAG
.getValueType(ObjectVT
));
3568 return DAG
.getNode(ISD::TRUNCATE
, dl
, ObjectVT
, ArgVal
);
3571 SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
3572 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3573 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3574 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3575 // TODO: add description of PPC stack frame format, or at least some docs.
3577 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
3578 bool isLittleEndian
= Subtarget
.isLittleEndian();
3579 MachineFunction
&MF
= DAG
.getMachineFunction();
3580 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3581 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3583 assert(!(CallConv
== CallingConv::Fast
&& isVarArg
) &&
3584 "fastcc not supported on varargs functions");
3586 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3587 // Potential tail calls could cause overwriting of argument stack slots.
3588 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
3589 (CallConv
== CallingConv::Fast
));
3590 unsigned PtrByteSize
= 8;
3591 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
3593 static const MCPhysReg GPR
[] = {
3594 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
3595 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
3597 static const MCPhysReg VR
[] = {
3598 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
3599 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
3602 const unsigned Num_GPR_Regs
= array_lengthof(GPR
);
3603 const unsigned Num_FPR_Regs
= useSoftFloat() ? 0 : 13;
3604 const unsigned Num_VR_Regs
= array_lengthof(VR
);
3605 const unsigned Num_QFPR_Regs
= Num_FPR_Regs
;
3607 // Do a first pass over the arguments to determine whether the ABI
3608 // guarantees that our caller has allocated the parameter save area
3609 // on its stack frame. In the ELFv1 ABI, this is always the case;
3610 // in the ELFv2 ABI, it is true if this is a vararg function or if
3611 // any parameter is located in a stack slot.
3613 bool HasParameterArea
= !isELFv2ABI
|| isVarArg
;
3614 unsigned ParamAreaSize
= Num_GPR_Regs
* PtrByteSize
;
3615 unsigned NumBytes
= LinkageSize
;
3616 unsigned AvailableFPRs
= Num_FPR_Regs
;
3617 unsigned AvailableVRs
= Num_VR_Regs
;
3618 for (unsigned i
= 0, e
= Ins
.size(); i
!= e
; ++i
) {
3619 if (Ins
[i
].Flags
.isNest())
3622 if (CalculateStackSlotUsed(Ins
[i
].VT
, Ins
[i
].ArgVT
, Ins
[i
].Flags
,
3623 PtrByteSize
, LinkageSize
, ParamAreaSize
,
3624 NumBytes
, AvailableFPRs
, AvailableVRs
,
3625 Subtarget
.hasQPX()))
3626 HasParameterArea
= true;
3629 // Add DAG nodes to load the arguments or copy them out of registers. On
3630 // entry to a function on PPC, the arguments start after the linkage area,
3631 // although the first ones are often in registers.
3633 unsigned ArgOffset
= LinkageSize
;
3634 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
3635 unsigned &QFPR_idx
= FPR_idx
;
3636 SmallVector
<SDValue
, 8> MemOps
;
3637 Function::const_arg_iterator FuncArg
= MF
.getFunction().arg_begin();
3638 unsigned CurArgIdx
= 0;
3639 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
; ++ArgNo
) {
3641 bool needsLoad
= false;
3642 EVT ObjectVT
= Ins
[ArgNo
].VT
;
3643 EVT OrigVT
= Ins
[ArgNo
].ArgVT
;
3644 unsigned ObjSize
= ObjectVT
.getStoreSize();
3645 unsigned ArgSize
= ObjSize
;
3646 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
3647 if (Ins
[ArgNo
].isOrigArg()) {
3648 std::advance(FuncArg
, Ins
[ArgNo
].getOrigArgIndex() - CurArgIdx
);
3649 CurArgIdx
= Ins
[ArgNo
].getOrigArgIndex();
3651 // We re-align the argument offset for each argument, except when using the
3652 // fast calling convention, when we need to make sure we do that only when
3653 // we'll actually use a stack slot.
3654 unsigned CurArgOffset
, Align
;
3655 auto ComputeArgOffset
= [&]() {
3656 /* Respect alignment of argument on the stack. */
3657 Align
= CalculateStackSlotAlignment(ObjectVT
, OrigVT
, Flags
, PtrByteSize
);
3658 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
3659 CurArgOffset
= ArgOffset
;
3662 if (CallConv
!= CallingConv::Fast
) {
3665 /* Compute GPR index associated with argument offset. */
3666 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
3667 GPR_idx
= std::min(GPR_idx
, Num_GPR_Regs
);
3670 // FIXME the codegen can be much improved in some cases.
3671 // We do not have to keep everything in memory.
3672 if (Flags
.isByVal()) {
3673 assert(Ins
[ArgNo
].isOrigArg() && "Byval arguments cannot be implicit");
3675 if (CallConv
== CallingConv::Fast
)
3678 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3679 ObjSize
= Flags
.getByValSize();
3680 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3681 // Empty aggregate parameters do not take up registers. Examples:
3685 // etc. However, we have to provide a place-holder in InVals, so
3686 // pretend we have an 8-byte item at the current address for that
3689 int FI
= MFI
.CreateFixedObject(PtrByteSize
, ArgOffset
, true);
3690 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3691 InVals
.push_back(FIN
);
3695 // Create a stack object covering all stack doublewords occupied
3696 // by the argument. If the argument is (fully or partially) on
3697 // the stack, or if the argument is fully in registers but the
3698 // caller has allocated the parameter save anyway, we can refer
3699 // directly to the caller's stack frame. Otherwise, create a
3700 // local copy in our own frame.
3702 if (HasParameterArea
||
3703 ArgSize
+ ArgOffset
> LinkageSize
+ Num_GPR_Regs
* PtrByteSize
)
3704 FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, false, true);
3706 FI
= MFI
.CreateStackObject(ArgSize
, Align
, false);
3707 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3709 // Handle aggregates smaller than 8 bytes.
3710 if (ObjSize
< PtrByteSize
) {
3711 // The value of the object is its address, which differs from the
3712 // address of the enclosing doubleword on big-endian systems.
3714 if (!isLittleEndian
) {
3715 SDValue ArgOff
= DAG
.getConstant(PtrByteSize
- ObjSize
, dl
, PtrVT
);
3716 Arg
= DAG
.getNode(ISD::ADD
, dl
, ArgOff
.getValueType(), Arg
, ArgOff
);
3718 InVals
.push_back(Arg
);
3720 if (GPR_idx
!= Num_GPR_Regs
) {
3721 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3722 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3723 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3726 if (ObjSize
==1 || ObjSize
==2 || ObjSize
==4) {
3727 EVT ObjType
= (ObjSize
== 1 ? MVT::i8
:
3728 (ObjSize
== 2 ? MVT::i16
: MVT::i32
));
3729 Store
= DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, Arg
,
3730 MachinePointerInfo(&*FuncArg
), ObjType
);
3732 // For sizes that don't fit a truncating store (3, 5, 6, 7),
3733 // store the whole register as-is to the parameter save area
3735 Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
,
3736 MachinePointerInfo(&*FuncArg
));
3739 MemOps
.push_back(Store
);
3741 // Whether we copied from a register or not, advance the offset
3742 // into the parameter save area by a full doubleword.
3743 ArgOffset
+= PtrByteSize
;
3747 // The value of the object is its address, which is the address of
3748 // its first stack doubleword.
3749 InVals
.push_back(FIN
);
3751 // Store whatever pieces of the object are in registers to memory.
3752 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
3753 if (GPR_idx
== Num_GPR_Regs
)
3756 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
3757 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3758 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3761 SDValue Off
= DAG
.getConstant(j
, dl
, PtrVT
);
3762 Addr
= DAG
.getNode(ISD::ADD
, dl
, Off
.getValueType(), Addr
, Off
);
3764 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, Addr
,
3765 MachinePointerInfo(&*FuncArg
, j
));
3766 MemOps
.push_back(Store
);
3769 ArgOffset
+= ArgSize
;
3773 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
3774 default: llvm_unreachable("Unhandled argument type!");
3778 if (Flags
.isNest()) {
3779 // The 'nest' parameter, if any, is passed in R11.
3780 unsigned VReg
= MF
.addLiveIn(PPC::X11
, &PPC::G8RCRegClass
);
3781 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3783 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
3784 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
3789 // These can be scalar arguments or elements of an integer array type
3790 // passed directly. Clang may use those instead of "byval" aggregate
3791 // types to avoid forcing arguments to memory unnecessarily.
3792 if (GPR_idx
!= Num_GPR_Regs
) {
3793 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3794 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3795 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3797 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
3798 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3799 // value to MVT::i64 and then truncate to the correct register size.
3800 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
3802 if (CallConv
== CallingConv::Fast
)
3806 ArgSize
= PtrByteSize
;
3808 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
3814 // These can be scalar arguments or elements of a float array type
3815 // passed directly. The latter are used to implement ELFv2 homogenous
3816 // float aggregates.
3817 if (FPR_idx
!= Num_FPR_Regs
) {
3820 if (ObjectVT
== MVT::f32
)
3821 VReg
= MF
.addLiveIn(FPR
[FPR_idx
],
3822 Subtarget
.hasP8Vector()
3823 ? &PPC::VSSRCRegClass
3824 : &PPC::F4RCRegClass
);
3826 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], Subtarget
.hasVSX()
3827 ? &PPC::VSFRCRegClass
3828 : &PPC::F8RCRegClass
);
3830 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
3832 } else if (GPR_idx
!= Num_GPR_Regs
&& CallConv
!= CallingConv::Fast
) {
3833 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3834 // once we support fp <-> gpr moves.
3836 // This can only ever happen in the presence of f32 array types,
3837 // since otherwise we never run out of FPRs before running out
3839 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3840 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3841 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3843 if (ObjectVT
== MVT::f32
) {
3844 if ((ArgOffset
% PtrByteSize
) == (isLittleEndian
? 4 : 0))
3845 ArgVal
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, ArgVal
,
3846 DAG
.getConstant(32, dl
, MVT::i32
));
3847 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, ArgVal
);
3850 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, ObjectVT
, ArgVal
);
3852 if (CallConv
== CallingConv::Fast
)
3858 // When passing an array of floats, the array occupies consecutive
3859 // space in the argument area; only round up to the next doubleword
3860 // at the end of the array. Otherwise, each float takes 8 bytes.
3861 if (CallConv
!= CallingConv::Fast
|| needsLoad
) {
3862 ArgSize
= Flags
.isInConsecutiveRegs() ? ObjSize
: PtrByteSize
;
3863 ArgOffset
+= ArgSize
;
3864 if (Flags
.isInConsecutiveRegsLast())
3865 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3876 if (!Subtarget
.hasQPX()) {
3877 // These can be scalar arguments or elements of a vector array type
3878 // passed directly. The latter are used to implement ELFv2 homogenous
3879 // vector aggregates.
3880 if (VR_idx
!= Num_VR_Regs
) {
3881 unsigned VReg
= MF
.addLiveIn(VR
[VR_idx
], &PPC::VRRCRegClass
);
3882 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
3885 if (CallConv
== CallingConv::Fast
)
3889 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
3894 assert(ObjectVT
.getSimpleVT().SimpleTy
== MVT::v4f32
&&
3895 "Invalid QPX parameter type");
3900 // QPX vectors are treated like their scalar floating-point subregisters
3901 // (except that they're larger).
3902 unsigned Sz
= ObjectVT
.getSimpleVT().SimpleTy
== MVT::v4f32
? 16 : 32;
3903 if (QFPR_idx
!= Num_QFPR_Regs
) {
3904 const TargetRegisterClass
*RC
;
3905 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
3906 case MVT::v4f64
: RC
= &PPC::QFRCRegClass
; break;
3907 case MVT::v4f32
: RC
= &PPC::QSRCRegClass
; break;
3908 default: RC
= &PPC::QBRCRegClass
; break;
3911 unsigned VReg
= MF
.addLiveIn(QFPR
[QFPR_idx
], RC
);
3912 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
3915 if (CallConv
== CallingConv::Fast
)
3919 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
3924 // We need to load the argument to a virtual register if we determined
3925 // above that we ran out of physical registers of the appropriate type.
3927 if (ObjSize
< ArgSize
&& !isLittleEndian
)
3928 CurArgOffset
+= ArgSize
- ObjSize
;
3929 int FI
= MFI
.CreateFixedObject(ObjSize
, CurArgOffset
, isImmutable
);
3930 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3931 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Chain
, FIN
, MachinePointerInfo());
3934 InVals
.push_back(ArgVal
);
3937 // Area that is at least reserved in the caller of this function.
3938 unsigned MinReservedArea
;
3939 if (HasParameterArea
)
3940 MinReservedArea
= std::max(ArgOffset
, LinkageSize
+ 8 * PtrByteSize
);
3942 MinReservedArea
= LinkageSize
;
3944 // Set the size that is at least reserved in caller of this function. Tail
3945 // call optimized functions' reserved stack space needs to be aligned so that
3946 // taking the difference between two stack areas will result in an aligned
3949 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
3950 FuncInfo
->setMinReservedArea(MinReservedArea
);
3952 // If the function takes variable number of arguments, make a frame index for
3953 // the start of the first vararg value... for expansion of llvm.va_start.
3955 int Depth
= ArgOffset
;
3957 FuncInfo
->setVarArgsFrameIndex(
3958 MFI
.CreateFixedObject(PtrByteSize
, Depth
, true));
3959 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
3961 // If this function is vararg, store any remaining integer argument regs
3962 // to their spots on the stack so that they may be loaded by dereferencing
3963 // the result of va_next.
3964 for (GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
3965 GPR_idx
< Num_GPR_Regs
; ++GPR_idx
) {
3966 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
3967 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3969 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
3970 MemOps
.push_back(Store
);
3971 // Increment the address by four for the next argument to store
3972 SDValue PtrOff
= DAG
.getConstant(PtrByteSize
, dl
, PtrVT
);
3973 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
3977 if (!MemOps
.empty())
3978 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
3983 SDValue
PPCTargetLowering::LowerFormalArguments_Darwin(
3984 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3985 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3986 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3987 // TODO: add description of PPC stack frame format, or at least some docs.
3989 MachineFunction
&MF
= DAG
.getMachineFunction();
3990 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3991 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3993 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3994 bool isPPC64
= PtrVT
== MVT::i64
;
3995 // Potential tail calls could cause overwriting of argument stack slots.
3996 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
3997 (CallConv
== CallingConv::Fast
));
3998 unsigned PtrByteSize
= isPPC64
? 8 : 4;
3999 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4000 unsigned ArgOffset
= LinkageSize
;
4001 // Area that is at least reserved in caller of this function.
4002 unsigned MinReservedArea
= ArgOffset
;
4004 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
4005 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
4006 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
4008 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
4009 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4010 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4012 static const MCPhysReg VR
[] = {
4013 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4014 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4017 const unsigned Num_GPR_Regs
= array_lengthof(GPR_32
);
4018 const unsigned Num_FPR_Regs
= useSoftFloat() ? 0 : 13;
4019 const unsigned Num_VR_Regs
= array_lengthof( VR
);
4021 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
4023 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
4025 // In 32-bit non-varargs functions, the stack space for vectors is after the
4026 // stack space for non-vectors. We do not use this space unless we have
4027 // too many vectors to fit in registers, something that only occurs in
4028 // constructed examples:), but we have to walk the arglist to figure
4029 // that out...for the pathological case, compute VecArgOffset as the
4030 // start of the vector parameter area. Computing VecArgOffset is the
4031 // entire point of the following loop.
4032 unsigned VecArgOffset
= ArgOffset
;
4033 if (!isVarArg
&& !isPPC64
) {
4034 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
;
4036 EVT ObjectVT
= Ins
[ArgNo
].VT
;
4037 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
4039 if (Flags
.isByVal()) {
4040 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4041 unsigned ObjSize
= Flags
.getByValSize();
4043 ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4044 VecArgOffset
+= ArgSize
;
4048 switch(ObjectVT
.getSimpleVT().SimpleTy
) {
4049 default: llvm_unreachable("Unhandled argument type!");
4055 case MVT::i64
: // PPC64
4057 // FIXME: We are guaranteed to be !isPPC64 at this point.
4058 // Does MVT::i64 apply?
4065 // Nothing to do, we're only looking at Nonvector args here.
4070 // We've found where the vector parameter area in memory is. Skip the
4071 // first 12 parameters; these don't use that memory.
4072 VecArgOffset
= ((VecArgOffset
+15)/16)*16;
4073 VecArgOffset
+= 12*16;
4075 // Add DAG nodes to load the arguments or copy them out of registers. On
4076 // entry to a function on PPC, the arguments start after the linkage area,
4077 // although the first ones are often in registers.
4079 SmallVector
<SDValue
, 8> MemOps
;
4080 unsigned nAltivecParamsAtEnd
= 0;
4081 Function::const_arg_iterator FuncArg
= MF
.getFunction().arg_begin();
4082 unsigned CurArgIdx
= 0;
4083 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
; ++ArgNo
) {
4085 bool needsLoad
= false;
4086 EVT ObjectVT
= Ins
[ArgNo
].VT
;
4087 unsigned ObjSize
= ObjectVT
.getSizeInBits()/8;
4088 unsigned ArgSize
= ObjSize
;
4089 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
4090 if (Ins
[ArgNo
].isOrigArg()) {
4091 std::advance(FuncArg
, Ins
[ArgNo
].getOrigArgIndex() - CurArgIdx
);
4092 CurArgIdx
= Ins
[ArgNo
].getOrigArgIndex();
4094 unsigned CurArgOffset
= ArgOffset
;
4096 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4097 if (ObjectVT
==MVT::v4f32
|| ObjectVT
==MVT::v4i32
||
4098 ObjectVT
==MVT::v8i16
|| ObjectVT
==MVT::v16i8
) {
4099 if (isVarArg
|| isPPC64
) {
4100 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
4101 MinReservedArea
+= CalculateStackSlotSize(ObjectVT
,
4104 } else nAltivecParamsAtEnd
++;
4106 // Calculate min reserved area.
4107 MinReservedArea
+= CalculateStackSlotSize(Ins
[ArgNo
].VT
,
4111 // FIXME the codegen can be much improved in some cases.
4112 // We do not have to keep everything in memory.
4113 if (Flags
.isByVal()) {
4114 assert(Ins
[ArgNo
].isOrigArg() && "Byval arguments cannot be implicit");
4116 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4117 ObjSize
= Flags
.getByValSize();
4118 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4119 // Objects of size 1 and 2 are right justified, everything else is
4120 // left justified. This means the memory address is adjusted forwards.
4121 if (ObjSize
==1 || ObjSize
==2) {
4122 CurArgOffset
= CurArgOffset
+ (4 - ObjSize
);
4124 // The value of the object is its address.
4125 int FI
= MFI
.CreateFixedObject(ObjSize
, CurArgOffset
, false, true);
4126 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4127 InVals
.push_back(FIN
);
4128 if (ObjSize
==1 || ObjSize
==2) {
4129 if (GPR_idx
!= Num_GPR_Regs
) {
4132 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4134 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4135 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4136 EVT ObjType
= ObjSize
== 1 ? MVT::i8
: MVT::i16
;
4138 DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, FIN
,
4139 MachinePointerInfo(&*FuncArg
), ObjType
);
4140 MemOps
.push_back(Store
);
4144 ArgOffset
+= PtrByteSize
;
4148 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
4149 // Store whatever pieces of the object are in registers
4150 // to memory. ArgOffset will be the address of the beginning
4152 if (GPR_idx
!= Num_GPR_Regs
) {
4155 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4157 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4158 int FI
= MFI
.CreateFixedObject(PtrByteSize
, ArgOffset
, true);
4159 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4160 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4161 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
,
4162 MachinePointerInfo(&*FuncArg
, j
));
4163 MemOps
.push_back(Store
);
4165 ArgOffset
+= PtrByteSize
;
4167 ArgOffset
+= ArgSize
- (ArgOffset
-CurArgOffset
);
4174 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
4175 default: llvm_unreachable("Unhandled argument type!");
4179 if (GPR_idx
!= Num_GPR_Regs
) {
4180 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4181 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i32
);
4183 if (ObjectVT
== MVT::i1
)
4184 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, ArgVal
);
4189 ArgSize
= PtrByteSize
;
4191 // All int arguments reserve stack space in the Darwin ABI.
4192 ArgOffset
+= PtrByteSize
;
4196 case MVT::i64
: // PPC64
4197 if (GPR_idx
!= Num_GPR_Regs
) {
4198 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4199 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
4201 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
4202 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4203 // value to MVT::i64 and then truncate to the correct register size.
4204 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
4209 ArgSize
= PtrByteSize
;
4211 // All int arguments reserve stack space in the Darwin ABI.
4217 // Every 4 bytes of argument space consumes one of the GPRs available for
4218 // argument passing.
4219 if (GPR_idx
!= Num_GPR_Regs
) {
4221 if (ObjSize
== 8 && GPR_idx
!= Num_GPR_Regs
&& !isPPC64
)
4224 if (FPR_idx
!= Num_FPR_Regs
) {
4227 if (ObjectVT
== MVT::f32
)
4228 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], &PPC::F4RCRegClass
);
4230 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], &PPC::F8RCRegClass
);
4232 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4238 // All FP arguments reserve stack space in the Darwin ABI.
4239 ArgOffset
+= isPPC64
? 8 : ObjSize
;
4245 // Note that vector arguments in registers don't reserve stack space,
4246 // except in varargs functions.
4247 if (VR_idx
!= Num_VR_Regs
) {
4248 unsigned VReg
= MF
.addLiveIn(VR
[VR_idx
], &PPC::VRRCRegClass
);
4249 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4251 while ((ArgOffset
% 16) != 0) {
4252 ArgOffset
+= PtrByteSize
;
4253 if (GPR_idx
!= Num_GPR_Regs
)
4257 GPR_idx
= std::min(GPR_idx
+4, Num_GPR_Regs
); // FIXME correct for ppc64?
4261 if (!isVarArg
&& !isPPC64
) {
4262 // Vectors go after all the nonvectors.
4263 CurArgOffset
= VecArgOffset
;
4266 // Vectors are aligned.
4267 ArgOffset
= ((ArgOffset
+15)/16)*16;
4268 CurArgOffset
= ArgOffset
;
4276 // We need to load the argument to a virtual register if we determined above
4277 // that we ran out of physical registers of the appropriate type.
4279 int FI
= MFI
.CreateFixedObject(ObjSize
,
4280 CurArgOffset
+ (ArgSize
- ObjSize
),
4282 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4283 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Chain
, FIN
, MachinePointerInfo());
4286 InVals
.push_back(ArgVal
);
4289 // Allow for Altivec parameters at the end, if needed.
4290 if (nAltivecParamsAtEnd
) {
4291 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
4292 MinReservedArea
+= 16*nAltivecParamsAtEnd
;
4295 // Area that is at least reserved in the caller of this function.
4296 MinReservedArea
= std::max(MinReservedArea
, LinkageSize
+ 8 * PtrByteSize
);
4298 // Set the size that is at least reserved in caller of this function. Tail
4299 // call optimized functions' reserved stack space needs to be aligned so that
4300 // taking the difference between two stack areas will result in an aligned
4303 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
4304 FuncInfo
->setMinReservedArea(MinReservedArea
);
4306 // If the function takes variable number of arguments, make a frame index for
4307 // the start of the first vararg value... for expansion of llvm.va_start.
4309 int Depth
= ArgOffset
;
4311 FuncInfo
->setVarArgsFrameIndex(
4312 MFI
.CreateFixedObject(PtrVT
.getSizeInBits()/8,
4314 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4316 // If this function is vararg, store any remaining integer argument regs
4317 // to their spots on the stack so that they may be loaded by dereferencing
4318 // the result of va_next.
4319 for (; GPR_idx
!= Num_GPR_Regs
; ++GPR_idx
) {
4323 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4325 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4327 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4329 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4330 MemOps
.push_back(Store
);
4331 // Increment the address by four for the next argument to store
4332 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, dl
, PtrVT
);
4333 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4337 if (!MemOps
.empty())
4338 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
4343 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4344 /// adjusted to accommodate the arguments for the tailcall.
4345 static int CalculateTailCallSPDiff(SelectionDAG
& DAG
, bool isTailCall
,
4346 unsigned ParamSize
) {
4348 if (!isTailCall
) return 0;
4350 PPCFunctionInfo
*FI
= DAG
.getMachineFunction().getInfo
<PPCFunctionInfo
>();
4351 unsigned CallerMinReservedArea
= FI
->getMinReservedArea();
4352 int SPDiff
= (int)CallerMinReservedArea
- (int)ParamSize
;
4353 // Remember only if the new adjustment is bigger.
4354 if (SPDiff
< FI
->getTailCallSPDelta())
4355 FI
->setTailCallSPDelta(SPDiff
);
4360 static bool isFunctionGlobalAddress(SDValue Callee
);
4363 callsShareTOCBase(const Function
*Caller
, SDValue Callee
,
4364 const TargetMachine
&TM
) {
4365 // If !G, Callee can be an external symbol.
4366 GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
4370 // The medium and large code models are expected to provide a sufficiently
4371 // large TOC to provide all data addressing needs of a module with a
4372 // single TOC. Since each module will be addressed with a single TOC then we
4373 // only need to check that caller and callee don't cross dso boundaries.
4374 if (CodeModel::Medium
== TM
.getCodeModel() ||
4375 CodeModel::Large
== TM
.getCodeModel())
4376 return TM
.shouldAssumeDSOLocal(*Caller
->getParent(), G
->getGlobal());
4378 // Otherwise we need to ensure callee and caller are in the same section,
4379 // since the linker may allocate multiple TOCs, and we don't know which
4380 // sections will belong to the same TOC base.
4382 const GlobalValue
*GV
= G
->getGlobal();
4383 if (!GV
->isStrongDefinitionForLinker())
4386 // Any explicitly-specified sections and section prefixes must also match.
4387 // Also, if we're using -ffunction-sections, then each function is always in
4388 // a different section (the same is true for COMDAT functions).
4389 if (TM
.getFunctionSections() || GV
->hasComdat() || Caller
->hasComdat() ||
4390 GV
->getSection() != Caller
->getSection())
4392 if (const auto *F
= dyn_cast
<Function
>(GV
)) {
4393 if (F
->getSectionPrefix() != Caller
->getSectionPrefix())
4397 // If the callee might be interposed, then we can't assume the ultimate call
4398 // target will be in the same section. Even in cases where we can assume that
4399 // interposition won't happen, in any case where the linker might insert a
4400 // stub to allow for interposition, we must generate code as though
4401 // interposition might occur. To understand why this matters, consider a
4402 // situation where: a -> b -> c where the arrows indicate calls. b and c are
4403 // in the same section, but a is in a different module (i.e. has a different
4404 // TOC base pointer). If the linker allows for interposition between b and c,
4405 // then it will generate a stub for the call edge between b and c which will
4406 // save the TOC pointer into the designated stack slot allocated by b. If we
4407 // return true here, and therefore allow a tail call between b and c, that
4408 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4409 // pointer into the stack slot allocated by a (where the a -> b stub saved
4410 // a's TOC base pointer). If we're not considering a tail call, but rather,
4411 // whether a nop is needed after the call instruction in b, because the linker
4412 // will insert a stub, it might complain about a missing nop if we omit it
4413 // (although many don't complain in this case).
4414 if (!TM
.shouldAssumeDSOLocal(*Caller
->getParent(), GV
))
4421 needStackSlotPassParameters(const PPCSubtarget
&Subtarget
,
4422 const SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
4423 assert(Subtarget
.isSVR4ABI() && Subtarget
.isPPC64());
4425 const unsigned PtrByteSize
= 8;
4426 const unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4428 static const MCPhysReg GPR
[] = {
4429 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4430 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4432 static const MCPhysReg VR
[] = {
4433 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4434 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4437 const unsigned NumGPRs
= array_lengthof(GPR
);
4438 const unsigned NumFPRs
= 13;
4439 const unsigned NumVRs
= array_lengthof(VR
);
4440 const unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
4442 unsigned NumBytes
= LinkageSize
;
4443 unsigned AvailableFPRs
= NumFPRs
;
4444 unsigned AvailableVRs
= NumVRs
;
4446 for (const ISD::OutputArg
& Param
: Outs
) {
4447 if (Param
.Flags
.isNest()) continue;
4449 if (CalculateStackSlotUsed(Param
.VT
, Param
.ArgVT
, Param
.Flags
,
4450 PtrByteSize
, LinkageSize
, ParamAreaSize
,
4451 NumBytes
, AvailableFPRs
, AvailableVRs
,
4452 Subtarget
.hasQPX()))
4459 hasSameArgumentList(const Function
*CallerFn
, ImmutableCallSite CS
) {
4460 if (CS
.arg_size() != CallerFn
->arg_size())
4463 ImmutableCallSite::arg_iterator CalleeArgIter
= CS
.arg_begin();
4464 ImmutableCallSite::arg_iterator CalleeArgEnd
= CS
.arg_end();
4465 Function::const_arg_iterator CallerArgIter
= CallerFn
->arg_begin();
4467 for (; CalleeArgIter
!= CalleeArgEnd
; ++CalleeArgIter
, ++CallerArgIter
) {
4468 const Value
* CalleeArg
= *CalleeArgIter
;
4469 const Value
* CallerArg
= &(*CallerArgIter
);
4470 if (CalleeArg
== CallerArg
)
4473 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4474 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4476 // 1st argument of callee is undef and has the same type as caller.
4477 if (CalleeArg
->getType() == CallerArg
->getType() &&
4478 isa
<UndefValue
>(CalleeArg
))
4487 // Returns true if TCO is possible between the callers and callees
4488 // calling conventions.
4490 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC
,
4491 CallingConv::ID CalleeCC
) {
4492 // Tail calls are possible with fastcc and ccc.
4493 auto isTailCallableCC
= [] (CallingConv::ID CC
){
4494 return CC
== CallingConv::C
|| CC
== CallingConv::Fast
;
4496 if (!isTailCallableCC(CallerCC
) || !isTailCallableCC(CalleeCC
))
4499 // We can safely tail call both fastcc and ccc callees from a c calling
4500 // convention caller. If the caller is fastcc, we may have less stack space
4501 // than a non-fastcc caller with the same signature so disable tail-calls in
4503 return CallerCC
== CallingConv::C
|| CallerCC
== CalleeCC
;
4507 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4509 CallingConv::ID CalleeCC
,
4510 ImmutableCallSite CS
,
4512 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
4513 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
4514 SelectionDAG
& DAG
) const {
4515 bool TailCallOpt
= getTargetMachine().Options
.GuaranteedTailCallOpt
;
4517 if (DisableSCO
&& !TailCallOpt
) return false;
4519 // Variadic argument functions are not supported.
4520 if (isVarArg
) return false;
4522 auto &Caller
= DAG
.getMachineFunction().getFunction();
4523 // Check that the calling conventions are compatible for tco.
4524 if (!areCallingConvEligibleForTCO_64SVR4(Caller
.getCallingConv(), CalleeCC
))
4527 // Caller contains any byval parameter is not supported.
4528 if (any_of(Ins
, [](const ISD::InputArg
&IA
) { return IA
.Flags
.isByVal(); }))
4531 // Callee contains any byval parameter is not supported, too.
4532 // Note: This is a quick work around, because in some cases, e.g.
4533 // caller's stack size > callee's stack size, we are still able to apply
4534 // sibling call optimization. For example, gcc is able to do SCO for caller1
4535 // in the following example, but not for caller2.
4540 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4544 // void caller1(struct test a, struct test c, struct test *b) {
4545 // callee(gTest, b); }
4546 // void caller2(struct test *b) { callee(gTest, b); }
4547 if (any_of(Outs
, [](const ISD::OutputArg
& OA
) { return OA
.Flags
.isByVal(); }))
4550 // If callee and caller use different calling conventions, we cannot pass
4551 // parameters on stack since offsets for the parameter area may be different.
4552 if (Caller
.getCallingConv() != CalleeCC
&&
4553 needStackSlotPassParameters(Subtarget
, Outs
))
4556 // No TCO/SCO on indirect call because Caller have to restore its TOC
4557 if (!isFunctionGlobalAddress(Callee
) &&
4558 !isa
<ExternalSymbolSDNode
>(Callee
))
4561 // If the caller and callee potentially have different TOC bases then we
4562 // cannot tail call since we need to restore the TOC pointer after the call.
4563 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4564 if (!callsShareTOCBase(&Caller
, Callee
, getTargetMachine()))
4567 // TCO allows altering callee ABI, so we don't have to check further.
4568 if (CalleeCC
== CallingConv::Fast
&& TailCallOpt
)
4571 if (DisableSCO
) return false;
4573 // If callee use the same argument list that caller is using, then we can
4574 // apply SCO on this case. If it is not, then we need to check if callee needs
4575 // stack for passing arguments.
4576 if (!hasSameArgumentList(&Caller
, CS
) &&
4577 needStackSlotPassParameters(Subtarget
, Outs
)) {
4584 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4585 /// for tail call optimization. Targets which want to do tail call
4586 /// optimization should implement this function.
4588 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee
,
4589 CallingConv::ID CalleeCC
,
4591 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
4592 SelectionDAG
& DAG
) const {
4593 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
4596 // Variable argument functions are not supported.
4600 MachineFunction
&MF
= DAG
.getMachineFunction();
4601 CallingConv::ID CallerCC
= MF
.getFunction().getCallingConv();
4602 if (CalleeCC
== CallingConv::Fast
&& CallerCC
== CalleeCC
) {
4603 // Functions containing by val parameters are not supported.
4604 for (unsigned i
= 0; i
!= Ins
.size(); i
++) {
4605 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
4606 if (Flags
.isByVal()) return false;
4609 // Non-PIC/GOT tail calls are supported.
4610 if (getTargetMachine().getRelocationModel() != Reloc::PIC_
)
4613 // At the moment we can only do local tail calls (in same module, hidden
4614 // or protected) if we are generating PIC.
4615 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
4616 return G
->getGlobal()->hasHiddenVisibility()
4617 || G
->getGlobal()->hasProtectedVisibility();
4623 /// isCallCompatibleAddress - Return the immediate to use if the specified
4624 /// 32-bit value is representable in the immediate field of a BxA instruction.
4625 static SDNode
*isBLACompatibleAddress(SDValue Op
, SelectionDAG
&DAG
) {
4626 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
4627 if (!C
) return nullptr;
4629 int Addr
= C
->getZExtValue();
4630 if ((Addr
& 3) != 0 || // Low 2 bits are implicitly zero.
4631 SignExtend32
<26>(Addr
) != Addr
)
4632 return nullptr; // Top 6 bits have to be sext of immediate.
4636 (int)C
->getZExtValue() >> 2, SDLoc(Op
),
4637 DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout()))
4643 struct TailCallArgumentInfo
{
4648 TailCallArgumentInfo() = default;
4651 } // end anonymous namespace
4653 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4654 static void StoreTailCallArgumentsToStackSlot(
4655 SelectionDAG
&DAG
, SDValue Chain
,
4656 const SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArgs
,
4657 SmallVectorImpl
<SDValue
> &MemOpChains
, const SDLoc
&dl
) {
4658 for (unsigned i
= 0, e
= TailCallArgs
.size(); i
!= e
; ++i
) {
4659 SDValue Arg
= TailCallArgs
[i
].Arg
;
4660 SDValue FIN
= TailCallArgs
[i
].FrameIdxOp
;
4661 int FI
= TailCallArgs
[i
].FrameIdx
;
4662 // Store relative to framepointer.
4663 MemOpChains
.push_back(DAG
.getStore(
4664 Chain
, dl
, Arg
, FIN
,
4665 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
)));
4669 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4670 /// the appropriate stack slot for the tail call optimized function call.
4671 static SDValue
EmitTailCallStoreFPAndRetAddr(SelectionDAG
&DAG
, SDValue Chain
,
4672 SDValue OldRetAddr
, SDValue OldFP
,
4673 int SPDiff
, const SDLoc
&dl
) {
4675 // Calculate the new stack slot for the return address.
4676 MachineFunction
&MF
= DAG
.getMachineFunction();
4677 const PPCSubtarget
&Subtarget
= MF
.getSubtarget
<PPCSubtarget
>();
4678 const PPCFrameLowering
*FL
= Subtarget
.getFrameLowering();
4679 bool isPPC64
= Subtarget
.isPPC64();
4680 int SlotSize
= isPPC64
? 8 : 4;
4681 int NewRetAddrLoc
= SPDiff
+ FL
->getReturnSaveOffset();
4682 int NewRetAddr
= MF
.getFrameInfo().CreateFixedObject(SlotSize
,
4683 NewRetAddrLoc
, true);
4684 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
4685 SDValue NewRetAddrFrIdx
= DAG
.getFrameIndex(NewRetAddr
, VT
);
4686 Chain
= DAG
.getStore(Chain
, dl
, OldRetAddr
, NewRetAddrFrIdx
,
4687 MachinePointerInfo::getFixedStack(MF
, NewRetAddr
));
4689 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4690 // slot as the FP is never overwritten.
4691 if (Subtarget
.isDarwinABI()) {
4692 int NewFPLoc
= SPDiff
+ FL
->getFramePointerSaveOffset();
4693 int NewFPIdx
= MF
.getFrameInfo().CreateFixedObject(SlotSize
, NewFPLoc
,
4695 SDValue NewFramePtrIdx
= DAG
.getFrameIndex(NewFPIdx
, VT
);
4696 Chain
= DAG
.getStore(Chain
, dl
, OldFP
, NewFramePtrIdx
,
4697 MachinePointerInfo::getFixedStack(
4698 DAG
.getMachineFunction(), NewFPIdx
));
4704 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4705 /// the position of the argument.
4707 CalculateTailCallArgDest(SelectionDAG
&DAG
, MachineFunction
&MF
, bool isPPC64
,
4708 SDValue Arg
, int SPDiff
, unsigned ArgOffset
,
4709 SmallVectorImpl
<TailCallArgumentInfo
>& TailCallArguments
) {
4710 int Offset
= ArgOffset
+ SPDiff
;
4711 uint32_t OpSize
= (Arg
.getValueSizeInBits() + 7) / 8;
4712 int FI
= MF
.getFrameInfo().CreateFixedObject(OpSize
, Offset
, true);
4713 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
4714 SDValue FIN
= DAG
.getFrameIndex(FI
, VT
);
4715 TailCallArgumentInfo Info
;
4717 Info
.FrameIdxOp
= FIN
;
4719 TailCallArguments
.push_back(Info
);
4722 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4723 /// stack slot. Returns the chain as result and the loaded frame pointers in
4724 /// LROpOut/FPOpout. Used when tail calling.
4725 SDValue
PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4726 SelectionDAG
&DAG
, int SPDiff
, SDValue Chain
, SDValue
&LROpOut
,
4727 SDValue
&FPOpOut
, const SDLoc
&dl
) const {
4729 // Load the LR and FP stack slot for later adjusting.
4730 EVT VT
= Subtarget
.isPPC64() ? MVT::i64
: MVT::i32
;
4731 LROpOut
= getReturnAddrFrameIndex(DAG
);
4732 LROpOut
= DAG
.getLoad(VT
, dl
, Chain
, LROpOut
, MachinePointerInfo());
4733 Chain
= SDValue(LROpOut
.getNode(), 1);
4735 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4736 // slot as the FP is never overwritten.
4737 if (Subtarget
.isDarwinABI()) {
4738 FPOpOut
= getFramePointerFrameIndex(DAG
);
4739 FPOpOut
= DAG
.getLoad(VT
, dl
, Chain
, FPOpOut
, MachinePointerInfo());
4740 Chain
= SDValue(FPOpOut
.getNode(), 1);
4746 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4747 /// by "Src" to address "Dst" of size "Size". Alignment information is
4748 /// specified by the specific parameter attribute. The copy will be passed as
4749 /// a byval function parameter.
4750 /// Sometimes what we are copying is the end of a larger object, the part that
4751 /// does not fit in registers.
4752 static SDValue
CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
,
4753 SDValue Chain
, ISD::ArgFlagsTy Flags
,
4754 SelectionDAG
&DAG
, const SDLoc
&dl
) {
4755 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), dl
, MVT::i32
);
4756 return DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
4757 false, false, false, MachinePointerInfo(),
4758 MachinePointerInfo());
4761 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4763 static void LowerMemOpCallTo(
4764 SelectionDAG
&DAG
, MachineFunction
&MF
, SDValue Chain
, SDValue Arg
,
4765 SDValue PtrOff
, int SPDiff
, unsigned ArgOffset
, bool isPPC64
,
4766 bool isTailCall
, bool isVector
, SmallVectorImpl
<SDValue
> &MemOpChains
,
4767 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
, const SDLoc
&dl
) {
4768 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
4773 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
4775 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
4776 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
4777 DAG
.getConstant(ArgOffset
, dl
, PtrVT
));
4779 MemOpChains
.push_back(
4780 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
4781 // Calculate and remember argument location.
4782 } else CalculateTailCallArgDest(DAG
, MF
, isPPC64
, Arg
, SPDiff
, ArgOffset
,
4787 PrepareTailCall(SelectionDAG
&DAG
, SDValue
&InFlag
, SDValue
&Chain
,
4788 const SDLoc
&dl
, int SPDiff
, unsigned NumBytes
, SDValue LROp
,
4790 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
) {
4791 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4792 // might overwrite each other in case of tail call optimization.
4793 SmallVector
<SDValue
, 8> MemOpChains2
;
4794 // Do not flag preceding copytoreg stuff together with the following stuff.
4796 StoreTailCallArgumentsToStackSlot(DAG
, Chain
, TailCallArguments
,
4798 if (!MemOpChains2
.empty())
4799 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains2
);
4801 // Store the return address to the appropriate stack slot.
4802 Chain
= EmitTailCallStoreFPAndRetAddr(DAG
, Chain
, LROp
, FPOp
, SPDiff
, dl
);
4804 // Emit callseq_end just before tailcall node.
4805 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
4806 DAG
.getIntPtrConstant(0, dl
, true), InFlag
, dl
);
4807 InFlag
= Chain
.getValue(1);
4810 // Is this global address that of a function that can be called by name? (as
4811 // opposed to something that must hold a descriptor for an indirect call).
4812 static bool isFunctionGlobalAddress(SDValue Callee
) {
4813 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
4814 if (Callee
.getOpcode() == ISD::GlobalTLSAddress
||
4815 Callee
.getOpcode() == ISD::TargetGlobalTLSAddress
)
4818 return G
->getGlobal()->getValueType()->isFunctionTy();
4825 PrepareCall(SelectionDAG
&DAG
, SDValue
&Callee
, SDValue
&InFlag
, SDValue
&Chain
,
4826 SDValue CallSeqStart
, const SDLoc
&dl
, int SPDiff
, bool isTailCall
,
4827 bool isPatchPoint
, bool hasNest
,
4828 SmallVectorImpl
<std::pair
<unsigned, SDValue
>> &RegsToPass
,
4829 SmallVectorImpl
<SDValue
> &Ops
, std::vector
<EVT
> &NodeTys
,
4830 ImmutableCallSite CS
, const PPCSubtarget
&Subtarget
) {
4831 bool isPPC64
= Subtarget
.isPPC64();
4832 bool isSVR4ABI
= Subtarget
.isSVR4ABI();
4833 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
4835 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
4836 NodeTys
.push_back(MVT::Other
); // Returns a chain
4837 NodeTys
.push_back(MVT::Glue
); // Returns a flag for retval copy to use.
4839 unsigned CallOpc
= PPCISD::CALL
;
4841 bool needIndirectCall
= true;
4842 if (!isSVR4ABI
|| !isPPC64
)
4843 if (SDNode
*Dest
= isBLACompatibleAddress(Callee
, DAG
)) {
4844 // If this is an absolute destination address, use the munged value.
4845 Callee
= SDValue(Dest
, 0);
4846 needIndirectCall
= false;
4849 // PC-relative references to external symbols should go through $stub, unless
4850 // we're building with the leopard linker or later, which automatically
4851 // synthesizes these stubs.
4852 const TargetMachine
&TM
= DAG
.getTarget();
4853 const Module
*Mod
= DAG
.getMachineFunction().getFunction().getParent();
4854 const GlobalValue
*GV
= nullptr;
4855 if (auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
4856 GV
= G
->getGlobal();
4857 bool Local
= TM
.shouldAssumeDSOLocal(*Mod
, GV
);
4858 bool UsePlt
= !Local
&& Subtarget
.isTargetELF() && !isPPC64
;
4860 if (isFunctionGlobalAddress(Callee
)) {
4861 GlobalAddressSDNode
*G
= cast
<GlobalAddressSDNode
>(Callee
);
4862 // A call to a TLS address is actually an indirect call to a
4863 // thread-specific pointer.
4864 unsigned OpFlags
= 0;
4866 OpFlags
= PPCII::MO_PLT
;
4868 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4869 // every direct call is) turn it into a TargetGlobalAddress /
4870 // TargetExternalSymbol node so that legalize doesn't hack it.
4871 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
,
4872 Callee
.getValueType(), 0, OpFlags
);
4873 needIndirectCall
= false;
4876 if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
4877 unsigned char OpFlags
= 0;
4880 OpFlags
= PPCII::MO_PLT
;
4882 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), Callee
.getValueType(),
4884 needIndirectCall
= false;
4888 // We'll form an invalid direct call when lowering a patchpoint; the full
4889 // sequence for an indirect call is complicated, and many of the
4890 // instructions introduced might have side effects (and, thus, can't be
4891 // removed later). The call itself will be removed as soon as the
4892 // argument/return lowering is complete, so the fact that it has the wrong
4893 // kind of operands should not really matter.
4894 needIndirectCall
= false;
4897 if (needIndirectCall
) {
4898 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
4899 // to do the call, we can't use PPCISD::CALL.
4900 SDValue MTCTROps
[] = {Chain
, Callee
, InFlag
};
4902 if (isSVR4ABI
&& isPPC64
&& !isELFv2ABI
) {
4903 // Function pointers in the 64-bit SVR4 ABI do not point to the function
4904 // entry point, but to the function descriptor (the function entry point
4905 // address is part of the function descriptor though).
4906 // The function descriptor is a three doubleword structure with the
4907 // following fields: function entry point, TOC base address and
4908 // environment pointer.
4909 // Thus for a call through a function pointer, the following actions need
4911 // 1. Save the TOC of the caller in the TOC save area of its stack
4912 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
4913 // 2. Load the address of the function entry point from the function
4915 // 3. Load the TOC of the callee from the function descriptor into r2.
4916 // 4. Load the environment pointer from the function descriptor into
4918 // 5. Branch to the function entry point address.
4919 // 6. On return of the callee, the TOC of the caller needs to be
4920 // restored (this is done in FinishCall()).
4922 // The loads are scheduled at the beginning of the call sequence, and the
4923 // register copies are flagged together to ensure that no other
4924 // operations can be scheduled in between. E.g. without flagging the
4925 // copies together, a TOC access in the caller could be scheduled between
4926 // the assignment of the callee TOC and the branch to the callee, which
4927 // results in the TOC access going through the TOC of the callee instead
4928 // of going through the TOC of the caller, which leads to incorrect code.
4930 // Load the address of the function entry point from the function
4932 SDValue LDChain
= CallSeqStart
.getValue(CallSeqStart
->getNumValues()-1);
4933 if (LDChain
.getValueType() == MVT::Glue
)
4934 LDChain
= CallSeqStart
.getValue(CallSeqStart
->getNumValues()-2);
4936 auto MMOFlags
= Subtarget
.hasInvariantFunctionDescriptors()
4937 ? (MachineMemOperand::MODereferenceable
|
4938 MachineMemOperand::MOInvariant
)
4939 : MachineMemOperand::MONone
;
4941 MachinePointerInfo
MPI(CS
? CS
.getCalledValue() : nullptr);
4942 SDValue LoadFuncPtr
= DAG
.getLoad(MVT::i64
, dl
, LDChain
, Callee
, MPI
,
4943 /* Alignment = */ 8, MMOFlags
);
4945 // Load environment pointer into r11.
4946 SDValue PtrOff
= DAG
.getIntPtrConstant(16, dl
);
4947 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, Callee
, PtrOff
);
4948 SDValue LoadEnvPtr
=
4949 DAG
.getLoad(MVT::i64
, dl
, LDChain
, AddPtr
, MPI
.getWithOffset(16),
4950 /* Alignment = */ 8, MMOFlags
);
4952 SDValue TOCOff
= DAG
.getIntPtrConstant(8, dl
);
4953 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, Callee
, TOCOff
);
4955 DAG
.getLoad(MVT::i64
, dl
, LDChain
, AddTOC
, MPI
.getWithOffset(8),
4956 /* Alignment = */ 8, MMOFlags
);
4958 setUsesTOCBasePtr(DAG
);
4959 SDValue TOCVal
= DAG
.getCopyToReg(Chain
, dl
, PPC::X2
, TOCPtr
,
4961 Chain
= TOCVal
.getValue(0);
4962 InFlag
= TOCVal
.getValue(1);
4964 // If the function call has an explicit 'nest' parameter, it takes the
4965 // place of the environment pointer.
4967 SDValue EnvVal
= DAG
.getCopyToReg(Chain
, dl
, PPC::X11
, LoadEnvPtr
,
4970 Chain
= EnvVal
.getValue(0);
4971 InFlag
= EnvVal
.getValue(1);
4974 MTCTROps
[0] = Chain
;
4975 MTCTROps
[1] = LoadFuncPtr
;
4976 MTCTROps
[2] = InFlag
;
4979 Chain
= DAG
.getNode(PPCISD::MTCTR
, dl
, NodeTys
,
4980 makeArrayRef(MTCTROps
, InFlag
.getNode() ? 3 : 2));
4981 InFlag
= Chain
.getValue(1);
4984 NodeTys
.push_back(MVT::Other
);
4985 NodeTys
.push_back(MVT::Glue
);
4986 Ops
.push_back(Chain
);
4987 CallOpc
= PPCISD::BCTRL
;
4988 Callee
.setNode(nullptr);
4989 // Add use of X11 (holding environment pointer)
4990 if (isSVR4ABI
&& isPPC64
&& !isELFv2ABI
&& !hasNest
)
4991 Ops
.push_back(DAG
.getRegister(PPC::X11
, PtrVT
));
4992 // Add CTR register as callee so a bctr can be emitted later.
4994 Ops
.push_back(DAG
.getRegister(isPPC64
? PPC::CTR8
: PPC::CTR
, PtrVT
));
4997 // If this is a direct call, pass the chain and the callee.
4998 if (Callee
.getNode()) {
4999 Ops
.push_back(Chain
);
5000 Ops
.push_back(Callee
);
5002 // If this is a tail call add stack pointer delta.
5004 Ops
.push_back(DAG
.getConstant(SPDiff
, dl
, MVT::i32
));
5006 // Add argument registers to the end of the list so that they are known live
5008 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
5009 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
5010 RegsToPass
[i
].second
.getValueType()));
5012 // All calls, in both the ELF V1 and V2 ABIs, need the TOC register live
5014 // We do need to reserve X2 to appease the verifier for the PATCHPOINT.
5015 if (isSVR4ABI
&& isPPC64
) {
5016 setUsesTOCBasePtr(DAG
);
5018 // We cannot add X2 as an operand here for PATCHPOINT, because there is no
5019 // way to mark dependencies as implicit here. We will add the X2 dependency
5020 // in EmitInstrWithCustomInserter.
5022 Ops
.push_back(DAG
.getRegister(PPC::X2
, PtrVT
));
5028 SDValue
PPCTargetLowering::LowerCallResult(
5029 SDValue Chain
, SDValue InFlag
, CallingConv::ID CallConv
, bool isVarArg
,
5030 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5031 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
5032 SmallVector
<CCValAssign
, 16> RVLocs
;
5033 CCState
CCRetInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
5036 CCRetInfo
.AnalyzeCallResult(
5037 Ins
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
5041 // Copy all of the result registers out of their specified physreg.
5042 for (unsigned i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
5043 CCValAssign
&VA
= RVLocs
[i
];
5044 assert(VA
.isRegLoc() && "Can only return in registers!");
5046 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
,
5047 VA
.getLocReg(), VA
.getLocVT(), InFlag
);
5048 Chain
= Val
.getValue(1);
5049 InFlag
= Val
.getValue(2);
5051 switch (VA
.getLocInfo()) {
5052 default: llvm_unreachable("Unknown loc info!");
5053 case CCValAssign::Full
: break;
5054 case CCValAssign::AExt
:
5055 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5057 case CCValAssign::ZExt
:
5058 Val
= DAG
.getNode(ISD::AssertZext
, dl
, VA
.getLocVT(), Val
,
5059 DAG
.getValueType(VA
.getValVT()));
5060 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5062 case CCValAssign::SExt
:
5063 Val
= DAG
.getNode(ISD::AssertSext
, dl
, VA
.getLocVT(), Val
,
5064 DAG
.getValueType(VA
.getValVT()));
5065 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5069 InVals
.push_back(Val
);
5075 SDValue
PPCTargetLowering::FinishCall(
5076 CallingConv::ID CallConv
, const SDLoc
&dl
, bool isTailCall
, bool isVarArg
,
5077 bool isPatchPoint
, bool hasNest
, SelectionDAG
&DAG
,
5078 SmallVector
<std::pair
<unsigned, SDValue
>, 8> &RegsToPass
, SDValue InFlag
,
5079 SDValue Chain
, SDValue CallSeqStart
, SDValue
&Callee
, int SPDiff
,
5080 unsigned NumBytes
, const SmallVectorImpl
<ISD::InputArg
> &Ins
,
5081 SmallVectorImpl
<SDValue
> &InVals
, ImmutableCallSite CS
) const {
5082 std::vector
<EVT
> NodeTys
;
5083 SmallVector
<SDValue
, 8> Ops
;
5084 unsigned CallOpc
= PrepareCall(DAG
, Callee
, InFlag
, Chain
, CallSeqStart
, dl
,
5085 SPDiff
, isTailCall
, isPatchPoint
, hasNest
,
5086 RegsToPass
, Ops
, NodeTys
, CS
, Subtarget
);
5088 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5089 if (isVarArg
&& Subtarget
.isSVR4ABI() && !Subtarget
.isPPC64())
5090 Ops
.push_back(DAG
.getRegister(PPC::CR1EQ
, MVT::i32
));
5092 // When performing tail call optimization the callee pops its arguments off
5093 // the stack. Account for this here so these bytes can be pushed back on in
5094 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5095 int BytesCalleePops
=
5096 (CallConv
== CallingConv::Fast
&&
5097 getTargetMachine().Options
.GuaranteedTailCallOpt
) ? NumBytes
: 0;
5099 // Add a register mask operand representing the call-preserved registers.
5100 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
5101 const uint32_t *Mask
=
5102 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CallConv
);
5103 assert(Mask
&& "Missing call preserved mask for calling convention");
5104 Ops
.push_back(DAG
.getRegisterMask(Mask
));
5106 if (InFlag
.getNode())
5107 Ops
.push_back(InFlag
);
5111 assert(((Callee
.getOpcode() == ISD::Register
&&
5112 cast
<RegisterSDNode
>(Callee
)->getReg() == PPC::CTR
) ||
5113 Callee
.getOpcode() == ISD::TargetExternalSymbol
||
5114 Callee
.getOpcode() == ISD::TargetGlobalAddress
||
5115 isa
<ConstantSDNode
>(Callee
)) &&
5116 "Expecting an global address, external symbol, absolute value or register");
5118 DAG
.getMachineFunction().getFrameInfo().setHasTailCall();
5119 return DAG
.getNode(PPCISD::TC_RETURN
, dl
, MVT::Other
, Ops
);
5122 // Add a NOP immediately after the branch instruction when using the 64-bit
5123 // SVR4 ABI. At link time, if caller and callee are in a different module and
5124 // thus have a different TOC, the call will be replaced with a call to a stub
5125 // function which saves the current TOC, loads the TOC of the callee and
5126 // branches to the callee. The NOP will be replaced with a load instruction
5127 // which restores the TOC of the caller from the TOC save slot of the current
5128 // stack frame. If caller and callee belong to the same module (and have the
5129 // same TOC), the NOP will remain unchanged.
5131 MachineFunction
&MF
= DAG
.getMachineFunction();
5132 if (!isTailCall
&& Subtarget
.isSVR4ABI()&& Subtarget
.isPPC64() &&
5134 if (CallOpc
== PPCISD::BCTRL
) {
5135 // This is a call through a function pointer.
5136 // Restore the caller TOC from the save area into R2.
5137 // See PrepareCall() for more information about calls through function
5138 // pointers in the 64-bit SVR4 ABI.
5139 // We are using a target-specific load with r2 hard coded, because the
5140 // result of a target-independent load would never go directly into r2,
5141 // since r2 is a reserved register (which prevents the register allocator
5142 // from allocating it), resulting in an additional register being
5143 // allocated and an unnecessary move instruction being generated.
5144 CallOpc
= PPCISD::BCTRL_LOAD_TOC
;
5146 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5147 SDValue StackPtr
= DAG
.getRegister(PPC::X1
, PtrVT
);
5148 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
5149 SDValue TOCOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
5150 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, StackPtr
, TOCOff
);
5152 // The address needs to go after the chain input but before the flag (or
5153 // any other variadic arguments).
5154 Ops
.insert(std::next(Ops
.begin()), AddTOC
);
5155 } else if (CallOpc
== PPCISD::CALL
&&
5156 !callsShareTOCBase(&MF
.getFunction(), Callee
, DAG
.getTarget())) {
5157 // Otherwise insert NOP for non-local calls.
5158 CallOpc
= PPCISD::CALL_NOP
;
5162 Chain
= DAG
.getNode(CallOpc
, dl
, NodeTys
, Ops
);
5163 InFlag
= Chain
.getValue(1);
5165 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
5166 DAG
.getIntPtrConstant(BytesCalleePops
, dl
, true),
5169 InFlag
= Chain
.getValue(1);
5171 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
,
5172 Ins
, dl
, DAG
, InVals
);
5176 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
5177 SmallVectorImpl
<SDValue
> &InVals
) const {
5178 SelectionDAG
&DAG
= CLI
.DAG
;
5180 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
5181 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
5182 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
5183 SDValue Chain
= CLI
.Chain
;
5184 SDValue Callee
= CLI
.Callee
;
5185 bool &isTailCall
= CLI
.IsTailCall
;
5186 CallingConv::ID CallConv
= CLI
.CallConv
;
5187 bool isVarArg
= CLI
.IsVarArg
;
5188 bool isPatchPoint
= CLI
.IsPatchPoint
;
5189 ImmutableCallSite CS
= CLI
.CS
;
5192 if (Subtarget
.useLongCalls() && !(CS
&& CS
.isMustTailCall()))
5194 else if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64())
5196 IsEligibleForTailCallOptimization_64SVR4(Callee
, CallConv
, CS
,
5197 isVarArg
, Outs
, Ins
, DAG
);
5199 isTailCall
= IsEligibleForTailCallOptimization(Callee
, CallConv
, isVarArg
,
5203 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
5206 assert(isa
<GlobalAddressSDNode
>(Callee
) &&
5207 "Callee should be an llvm::Function object.");
5209 const GlobalValue
*GV
=
5210 cast
<GlobalAddressSDNode
>(Callee
)->getGlobal();
5211 const unsigned Width
=
5212 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
5213 dbgs() << "TCO caller: "
5214 << left_justify(DAG
.getMachineFunction().getName(), Width
)
5215 << ", callee linkage: " << GV
->getVisibility() << ", "
5216 << GV
->getLinkage() << "\n");
5220 if (!isTailCall
&& CS
&& CS
.isMustTailCall())
5221 report_fatal_error("failed to perform tail call elimination on a call "
5222 "site marked musttail");
5224 // When long calls (i.e. indirect calls) are always used, calls are always
5225 // made via function pointer. If we have a function name, first translate it
5227 if (Subtarget
.useLongCalls() && isa
<GlobalAddressSDNode
>(Callee
) &&
5229 Callee
= LowerGlobalAddress(Callee
, DAG
);
5231 if (Subtarget
.isSVR4ABI()) {
5232 if (Subtarget
.isPPC64())
5233 return LowerCall_64SVR4(Chain
, Callee
, CallConv
, isVarArg
,
5234 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5235 dl
, DAG
, InVals
, CS
);
5237 return LowerCall_32SVR4(Chain
, Callee
, CallConv
, isVarArg
,
5238 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5239 dl
, DAG
, InVals
, CS
);
5242 return LowerCall_Darwin(Chain
, Callee
, CallConv
, isVarArg
,
5243 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5244 dl
, DAG
, InVals
, CS
);
5247 SDValue
PPCTargetLowering::LowerCall_32SVR4(
5248 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
5249 bool isTailCall
, bool isPatchPoint
,
5250 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5251 const SmallVectorImpl
<SDValue
> &OutVals
,
5252 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5253 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
5254 ImmutableCallSite CS
) const {
5255 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5256 // of the 32-bit SVR4 ABI stack frame layout.
5258 assert((CallConv
== CallingConv::C
||
5259 CallConv
== CallingConv::Cold
||
5260 CallConv
== CallingConv::Fast
) && "Unknown calling convention!");
5262 unsigned PtrByteSize
= 4;
5264 MachineFunction
&MF
= DAG
.getMachineFunction();
5266 // Mark this function as potentially containing a function that contains a
5267 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5268 // and restoring the callers stack pointer in this functions epilog. This is
5269 // done because by tail calling the called function might overwrite the value
5270 // in this function's (MF) stack pointer stack slot 0(SP).
5271 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5272 CallConv
== CallingConv::Fast
)
5273 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
5275 // Count how many bytes are to be pushed on the stack, including the linkage
5276 // area, parameter list area and the part of the local variable space which
5277 // contains copies of aggregates which are passed by value.
5279 // Assign locations to all of the outgoing arguments.
5280 SmallVector
<CCValAssign
, 16> ArgLocs
;
5281 PPCCCState
CCInfo(CallConv
, isVarArg
, MF
, ArgLocs
, *DAG
.getContext());
5283 // Reserve space for the linkage area on the stack.
5284 CCInfo
.AllocateStack(Subtarget
.getFrameLowering()->getLinkageSize(),
5287 CCInfo
.PreAnalyzeCallOperands(Outs
);
5290 // Handle fixed and variable vector arguments differently.
5291 // Fixed vector arguments go into registers as long as registers are
5292 // available. Variable vector arguments always go into memory.
5293 unsigned NumArgs
= Outs
.size();
5295 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
5296 MVT ArgVT
= Outs
[i
].VT
;
5297 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
5300 if (Outs
[i
].IsFixed
) {
5301 Result
= CC_PPC32_SVR4(i
, ArgVT
, ArgVT
, CCValAssign::Full
, ArgFlags
,
5304 Result
= CC_PPC32_SVR4_VarArg(i
, ArgVT
, ArgVT
, CCValAssign::Full
,
5310 errs() << "Call operand #" << i
<< " has unhandled type "
5311 << EVT(ArgVT
).getEVTString() << "\n";
5313 llvm_unreachable(nullptr);
5317 // All arguments are treated the same.
5318 CCInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4
);
5320 CCInfo
.clearWasPPCF128();
5322 // Assign locations to all of the outgoing aggregate by value arguments.
5323 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
5324 CCState
CCByValInfo(CallConv
, isVarArg
, MF
, ByValArgLocs
, *DAG
.getContext());
5326 // Reserve stack space for the allocations in CCInfo.
5327 CCByValInfo
.AllocateStack(CCInfo
.getNextStackOffset(), PtrByteSize
);
5329 CCByValInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4_ByVal
);
5331 // Size of the linkage area, parameter list area and the part of the local
5332 // space variable where copies of aggregates which are passed by value are
5334 unsigned NumBytes
= CCByValInfo
.getNextStackOffset();
5336 // Calculate by how many bytes the stack has to be adjusted in case of tail
5337 // call optimization.
5338 int SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
5340 // Adjust the stack pointer for the new arguments...
5341 // These operations are automatically eliminated by the prolog/epilog pass
5342 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
5343 SDValue CallSeqStart
= Chain
;
5345 // Load the return address and frame pointer so it can be moved somewhere else
5348 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
5350 // Set up a copy of the stack pointer for use loading and storing any
5351 // arguments that may not fit in the registers available for argument
5353 SDValue StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
5355 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
5356 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
5357 SmallVector
<SDValue
, 8> MemOpChains
;
5359 bool seenFloatArg
= false;
5360 // Walk the register/memloc assignments, inserting copies/loads.
5361 for (unsigned i
= 0, j
= 0, e
= ArgLocs
.size();
5364 CCValAssign
&VA
= ArgLocs
[i
];
5365 SDValue Arg
= OutVals
[i
];
5366 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5368 if (Flags
.isByVal()) {
5369 // Argument is an aggregate which is passed by value, thus we need to
5370 // create a copy of it in the local variable space of the current stack
5371 // frame (which is the stack frame of the caller) and pass the address of
5372 // this copy to the callee.
5373 assert((j
< ByValArgLocs
.size()) && "Index out of bounds!");
5374 CCValAssign
&ByValVA
= ByValArgLocs
[j
++];
5375 assert((VA
.getValNo() == ByValVA
.getValNo()) && "ValNo mismatch!");
5377 // Memory reserved in the local variable space of the callers stack frame.
5378 unsigned LocMemOffset
= ByValVA
.getLocMemOffset();
5380 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
5381 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
5384 // Create a copy of the argument in the local area of the current
5386 SDValue MemcpyCall
=
5387 CreateCopyOfByValArgument(Arg
, PtrOff
,
5388 CallSeqStart
.getNode()->getOperand(0),
5391 // This must go outside the CALLSEQ_START..END.
5392 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, NumBytes
, 0,
5394 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
5395 NewCallSeqStart
.getNode());
5396 Chain
= CallSeqStart
= NewCallSeqStart
;
5398 // Pass the address of the aggregate copy on the stack either in a
5399 // physical register or in the parameter list area of the current stack
5400 // frame to the callee.
5404 // When useCRBits() is true, there can be i1 arguments.
5405 // It is because getRegisterType(MVT::i1) => MVT::i1,
5406 // and for other integer types getRegisterType() => MVT::i32.
5407 // Extend i1 and ensure callee will get i32.
5408 if (Arg
.getValueType() == MVT::i1
)
5409 Arg
= DAG
.getNode(Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
,
5412 if (VA
.isRegLoc()) {
5413 seenFloatArg
|= VA
.getLocVT().isFloatingPoint();
5414 // Put argument in a physical register.
5415 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
5417 // Put argument in the parameter list area of the current stack frame.
5418 assert(VA
.isMemLoc());
5419 unsigned LocMemOffset
= VA
.getLocMemOffset();
5422 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
5423 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
5426 MemOpChains
.push_back(
5427 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
5429 // Calculate and remember argument location.
5430 CalculateTailCallArgDest(DAG
, MF
, false, Arg
, SPDiff
, LocMemOffset
,
5436 if (!MemOpChains
.empty())
5437 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
5439 // Build a sequence of copy-to-reg nodes chained together with token chain
5440 // and flag operands which copy the outgoing args into the appropriate regs.
5442 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
5443 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
5444 RegsToPass
[i
].second
, InFlag
);
5445 InFlag
= Chain
.getValue(1);
5448 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5451 SDVTList VTs
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
5452 SDValue Ops
[] = { Chain
, InFlag
};
5454 Chain
= DAG
.getNode(seenFloatArg
? PPCISD::CR6SET
: PPCISD::CR6UNSET
,
5455 dl
, VTs
, makeArrayRef(Ops
, InFlag
.getNode() ? 2 : 1));
5457 InFlag
= Chain
.getValue(1);
5461 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
5464 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
5465 /* unused except on PPC64 ELFv1 */ false, DAG
,
5466 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
5467 NumBytes
, Ins
, InVals
, CS
);
5470 // Copy an argument into memory, being careful to do this outside the
5471 // call sequence for the call to which the argument belongs.
5472 SDValue
PPCTargetLowering::createMemcpyOutsideCallSeq(
5473 SDValue Arg
, SDValue PtrOff
, SDValue CallSeqStart
, ISD::ArgFlagsTy Flags
,
5474 SelectionDAG
&DAG
, const SDLoc
&dl
) const {
5475 SDValue MemcpyCall
= CreateCopyOfByValArgument(Arg
, PtrOff
,
5476 CallSeqStart
.getNode()->getOperand(0),
5478 // The MEMCPY must go outside the CALLSEQ_START..END.
5479 int64_t FrameSize
= CallSeqStart
.getConstantOperandVal(1);
5480 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, FrameSize
, 0,
5482 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
5483 NewCallSeqStart
.getNode());
5484 return NewCallSeqStart
;
5487 SDValue
PPCTargetLowering::LowerCall_64SVR4(
5488 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
5489 bool isTailCall
, bool isPatchPoint
,
5490 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5491 const SmallVectorImpl
<SDValue
> &OutVals
,
5492 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5493 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
5494 ImmutableCallSite CS
) const {
5495 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
5496 bool isLittleEndian
= Subtarget
.isLittleEndian();
5497 unsigned NumOps
= Outs
.size();
5498 bool hasNest
= false;
5499 bool IsSibCall
= false;
5501 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5502 unsigned PtrByteSize
= 8;
5504 MachineFunction
&MF
= DAG
.getMachineFunction();
5506 if (isTailCall
&& !getTargetMachine().Options
.GuaranteedTailCallOpt
)
5509 // Mark this function as potentially containing a function that contains a
5510 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5511 // and restoring the callers stack pointer in this functions epilog. This is
5512 // done because by tail calling the called function might overwrite the value
5513 // in this function's (MF) stack pointer stack slot 0(SP).
5514 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5515 CallConv
== CallingConv::Fast
)
5516 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
5518 assert(!(CallConv
== CallingConv::Fast
&& isVarArg
) &&
5519 "fastcc not supported on varargs functions");
5521 // Count how many bytes are to be pushed on the stack, including the linkage
5522 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5523 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5524 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5525 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
5526 unsigned NumBytes
= LinkageSize
;
5527 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
5528 unsigned &QFPR_idx
= FPR_idx
;
5530 static const MCPhysReg GPR
[] = {
5531 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
5532 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
5534 static const MCPhysReg VR
[] = {
5535 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
5536 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
5539 const unsigned NumGPRs
= array_lengthof(GPR
);
5540 const unsigned NumFPRs
= useSoftFloat() ? 0 : 13;
5541 const unsigned NumVRs
= array_lengthof(VR
);
5542 const unsigned NumQFPRs
= NumFPRs
;
5544 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5545 // can be passed to the callee in registers.
5546 // For the fast calling convention, there is another check below.
5547 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5548 bool HasParameterArea
= !isELFv2ABI
|| isVarArg
|| CallConv
== CallingConv::Fast
;
5549 if (!HasParameterArea
) {
5550 unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
5551 unsigned AvailableFPRs
= NumFPRs
;
5552 unsigned AvailableVRs
= NumVRs
;
5553 unsigned NumBytesTmp
= NumBytes
;
5554 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5555 if (Outs
[i
].Flags
.isNest()) continue;
5556 if (CalculateStackSlotUsed(Outs
[i
].VT
, Outs
[i
].ArgVT
, Outs
[i
].Flags
,
5557 PtrByteSize
, LinkageSize
, ParamAreaSize
,
5558 NumBytesTmp
, AvailableFPRs
, AvailableVRs
,
5559 Subtarget
.hasQPX()))
5560 HasParameterArea
= true;
5564 // When using the fast calling convention, we don't provide backing for
5565 // arguments that will be in registers.
5566 unsigned NumGPRsUsed
= 0, NumFPRsUsed
= 0, NumVRsUsed
= 0;
5568 // Avoid allocating parameter area for fastcc functions if all the arguments
5569 // can be passed in the registers.
5570 if (CallConv
== CallingConv::Fast
)
5571 HasParameterArea
= false;
5573 // Add up all the space actually used.
5574 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5575 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5576 EVT ArgVT
= Outs
[i
].VT
;
5577 EVT OrigVT
= Outs
[i
].ArgVT
;
5582 if (CallConv
== CallingConv::Fast
) {
5583 if (Flags
.isByVal()) {
5584 NumGPRsUsed
+= (Flags
.getByValSize()+7)/8;
5585 if (NumGPRsUsed
> NumGPRs
)
5586 HasParameterArea
= true;
5588 switch (ArgVT
.getSimpleVT().SimpleTy
) {
5589 default: llvm_unreachable("Unexpected ValueType for argument!");
5593 if (++NumGPRsUsed
<= NumGPRs
)
5603 if (++NumVRsUsed
<= NumVRs
)
5607 // When using QPX, this is handled like a FP register, otherwise, it
5608 // is an Altivec register.
5609 if (Subtarget
.hasQPX()) {
5610 if (++NumFPRsUsed
<= NumFPRs
)
5613 if (++NumVRsUsed
<= NumVRs
)
5619 case MVT::v4f64
: // QPX
5620 case MVT::v4i1
: // QPX
5621 if (++NumFPRsUsed
<= NumFPRs
)
5625 HasParameterArea
= true;
5629 /* Respect alignment of argument on the stack. */
5631 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
5632 NumBytes
= ((NumBytes
+ Align
- 1) / Align
) * Align
;
5634 NumBytes
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
5635 if (Flags
.isInConsecutiveRegsLast())
5636 NumBytes
= ((NumBytes
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
5639 unsigned NumBytesActuallyUsed
= NumBytes
;
5641 // In the old ELFv1 ABI,
5642 // the prolog code of the callee may store up to 8 GPR argument registers to
5643 // the stack, allowing va_start to index over them in memory if its varargs.
5644 // Because we cannot tell if this is needed on the caller side, we have to
5645 // conservatively assume that it is needed. As such, make sure we have at
5646 // least enough stack space for the caller to store the 8 GPRs.
5647 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5648 // really requires memory operands, e.g. a vararg function.
5649 if (HasParameterArea
)
5650 NumBytes
= std::max(NumBytes
, LinkageSize
+ 8 * PtrByteSize
);
5652 NumBytes
= LinkageSize
;
5654 // Tail call needs the stack to be aligned.
5655 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5656 CallConv
== CallingConv::Fast
)
5657 NumBytes
= EnsureStackAlignment(Subtarget
.getFrameLowering(), NumBytes
);
5661 // Calculate by how many bytes the stack has to be adjusted in case of tail
5662 // call optimization.
5664 SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
5666 // To protect arguments on the stack from being clobbered in a tail call,
5667 // force all the loads to happen before doing any other lowering.
5669 Chain
= DAG
.getStackArgumentTokenFactor(Chain
);
5671 // Adjust the stack pointer for the new arguments...
5672 // These operations are automatically eliminated by the prolog/epilog pass
5674 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
5675 SDValue CallSeqStart
= Chain
;
5677 // Load the return address and frame pointer so it can be move somewhere else
5680 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
5682 // Set up a copy of the stack pointer for use loading and storing any
5683 // arguments that may not fit in the registers available for argument
5685 SDValue StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
5687 // Figure out which arguments are going to go in registers, and which in
5688 // memory. Also, if this is a vararg function, floating point operations
5689 // must be stored to our stack, and loaded into integer regs as well, if
5690 // any integer regs are available for argument passing.
5691 unsigned ArgOffset
= LinkageSize
;
5693 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
5694 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
5696 SmallVector
<SDValue
, 8> MemOpChains
;
5697 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5698 SDValue Arg
= OutVals
[i
];
5699 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5700 EVT ArgVT
= Outs
[i
].VT
;
5701 EVT OrigVT
= Outs
[i
].ArgVT
;
5703 // PtrOff will be used to store the current argument to the stack if a
5704 // register cannot be found for it.
5707 // We re-align the argument offset for each argument, except when using the
5708 // fast calling convention, when we need to make sure we do that only when
5709 // we'll actually use a stack slot.
5710 auto ComputePtrOff
= [&]() {
5711 /* Respect alignment of argument on the stack. */
5713 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
5714 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
5716 PtrOff
= DAG
.getConstant(ArgOffset
, dl
, StackPtr
.getValueType());
5718 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
5721 if (CallConv
!= CallingConv::Fast
) {
5724 /* Compute GPR index associated with argument offset. */
5725 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
5726 GPR_idx
= std::min(GPR_idx
, NumGPRs
);
5729 // Promote integers to 64-bit values.
5730 if (Arg
.getValueType() == MVT::i32
|| Arg
.getValueType() == MVT::i1
) {
5731 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5732 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
5733 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
5736 // FIXME memcpy is used way more than necessary. Correctness first.
5737 // Note: "by value" is code for passing a structure by value, not
5739 if (Flags
.isByVal()) {
5740 // Note: Size includes alignment padding, so
5741 // struct x { short a; char b; }
5742 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5743 // These are the proper values we need for right-justifying the
5744 // aggregate in a parameter register.
5745 unsigned Size
= Flags
.getByValSize();
5747 // An empty aggregate parameter takes up no storage and no
5752 if (CallConv
== CallingConv::Fast
)
5755 // All aggregates smaller than 8 bytes must be passed right-justified.
5756 if (Size
==1 || Size
==2 || Size
==4) {
5757 EVT VT
= (Size
==1) ? MVT::i8
: ((Size
==2) ? MVT::i16
: MVT::i32
);
5758 if (GPR_idx
!= NumGPRs
) {
5759 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
5760 MachinePointerInfo(), VT
);
5761 MemOpChains
.push_back(Load
.getValue(1));
5762 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
5764 ArgOffset
+= PtrByteSize
;
5769 if (GPR_idx
== NumGPRs
&& Size
< 8) {
5770 SDValue AddPtr
= PtrOff
;
5771 if (!isLittleEndian
) {
5772 SDValue Const
= DAG
.getConstant(PtrByteSize
- Size
, dl
,
5773 PtrOff
.getValueType());
5774 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
5776 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
5779 ArgOffset
+= PtrByteSize
;
5782 // Copy entire object into memory. There are cases where gcc-generated
5783 // code assumes it is there, even if it could be put entirely into
5784 // registers. (This is not what the doc says.)
5786 // FIXME: The above statement is likely due to a misunderstanding of the
5787 // documents. All arguments must be copied into the parameter area BY
5788 // THE CALLEE in the event that the callee takes the address of any
5789 // formal argument. That has not yet been implemented. However, it is
5790 // reasonable to use the stack area as a staging area for the register
5793 // Skip this for small aggregates, as we will use the same slot for a
5794 // right-justified copy, below.
5796 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, PtrOff
,
5800 // When a register is available, pass a small aggregate right-justified.
5801 if (Size
< 8 && GPR_idx
!= NumGPRs
) {
5802 // The easiest way to get this right-justified in a register
5803 // is to copy the structure into the rightmost portion of a
5804 // local variable slot, then load the whole slot into the
5806 // FIXME: The memcpy seems to produce pretty awful code for
5807 // small aggregates, particularly for packed ones.
5808 // FIXME: It would be preferable to use the slot in the
5809 // parameter save area instead of a new local variable.
5810 SDValue AddPtr
= PtrOff
;
5811 if (!isLittleEndian
) {
5812 SDValue Const
= DAG
.getConstant(8 - Size
, dl
, PtrOff
.getValueType());
5813 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
5815 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
5819 // Load the slot into the register.
5821 DAG
.getLoad(PtrVT
, dl
, Chain
, PtrOff
, MachinePointerInfo());
5822 MemOpChains
.push_back(Load
.getValue(1));
5823 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
5825 // Done with this argument.
5826 ArgOffset
+= PtrByteSize
;
5830 // For aggregates larger than PtrByteSize, copy the pieces of the
5831 // object that fit into registers from the parameter save area.
5832 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
5833 SDValue Const
= DAG
.getConstant(j
, dl
, PtrOff
.getValueType());
5834 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
5835 if (GPR_idx
!= NumGPRs
) {
5837 DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
, MachinePointerInfo());
5838 MemOpChains
.push_back(Load
.getValue(1));
5839 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
5840 ArgOffset
+= PtrByteSize
;
5842 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
5849 switch (Arg
.getSimpleValueType().SimpleTy
) {
5850 default: llvm_unreachable("Unexpected ValueType for argument!");
5854 if (Flags
.isNest()) {
5855 // The 'nest' parameter, if any, is passed in R11.
5856 RegsToPass
.push_back(std::make_pair(PPC::X11
, Arg
));
5861 // These can be scalar arguments or elements of an integer array type
5862 // passed directly. Clang may use those instead of "byval" aggregate
5863 // types to avoid forcing arguments to memory unnecessarily.
5864 if (GPR_idx
!= NumGPRs
) {
5865 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
5867 if (CallConv
== CallingConv::Fast
)
5870 assert(HasParameterArea
&&
5871 "Parameter area must exist to pass an argument in memory.");
5872 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
5873 true, isTailCall
, false, MemOpChains
,
5874 TailCallArguments
, dl
);
5875 if (CallConv
== CallingConv::Fast
)
5876 ArgOffset
+= PtrByteSize
;
5878 if (CallConv
!= CallingConv::Fast
)
5879 ArgOffset
+= PtrByteSize
;
5883 // These can be scalar arguments or elements of a float array type
5884 // passed directly. The latter are used to implement ELFv2 homogenous
5885 // float aggregates.
5887 // Named arguments go into FPRs first, and once they overflow, the
5888 // remaining arguments go into GPRs and then the parameter save area.
5889 // Unnamed arguments for vararg functions always go to GPRs and
5890 // then the parameter save area. For now, put all arguments to vararg
5891 // routines always in both locations (FPR *and* GPR or stack slot).
5892 bool NeedGPROrStack
= isVarArg
|| FPR_idx
== NumFPRs
;
5893 bool NeededLoad
= false;
5895 // First load the argument into the next available FPR.
5896 if (FPR_idx
!= NumFPRs
)
5897 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
5899 // Next, load the argument into GPR or stack slot if needed.
5900 if (!NeedGPROrStack
)
5902 else if (GPR_idx
!= NumGPRs
&& CallConv
!= CallingConv::Fast
) {
5903 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
5904 // once we support fp <-> gpr moves.
5906 // In the non-vararg case, this can only ever happen in the
5907 // presence of f32 array types, since otherwise we never run
5908 // out of FPRs before running out of GPRs.
5911 // Double values are always passed in a single GPR.
5912 if (Arg
.getValueType() != MVT::f32
) {
5913 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i64
, Arg
);
5915 // Non-array float values are extended and passed in a GPR.
5916 } else if (!Flags
.isInConsecutiveRegs()) {
5917 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
5918 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
5920 // If we have an array of floats, we collect every odd element
5921 // together with its predecessor into one GPR.
5922 } else if (ArgOffset
% PtrByteSize
!= 0) {
5924 Lo
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, OutVals
[i
- 1]);
5925 Hi
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
5926 if (!isLittleEndian
)
5928 ArgVal
= DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Lo
, Hi
);
5930 // The final element, if even, goes into the first half of a GPR.
5931 } else if (Flags
.isInConsecutiveRegsLast()) {
5932 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
5933 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
5934 if (!isLittleEndian
)
5935 ArgVal
= DAG
.getNode(ISD::SHL
, dl
, MVT::i64
, ArgVal
,
5936 DAG
.getConstant(32, dl
, MVT::i32
));
5938 // Non-final even elements are skipped; they will be handled
5939 // together the with subsequent argument on the next go-around.
5943 if (ArgVal
.getNode())
5944 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], ArgVal
));
5946 if (CallConv
== CallingConv::Fast
)
5949 // Single-precision floating-point values are mapped to the
5950 // second (rightmost) word of the stack doubleword.
5951 if (Arg
.getValueType() == MVT::f32
&&
5952 !isLittleEndian
&& !Flags
.isInConsecutiveRegs()) {
5953 SDValue ConstFour
= DAG
.getConstant(4, dl
, PtrOff
.getValueType());
5954 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
5957 assert(HasParameterArea
&&
5958 "Parameter area must exist to pass an argument in memory.");
5959 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
5960 true, isTailCall
, false, MemOpChains
,
5961 TailCallArguments
, dl
);
5965 // When passing an array of floats, the array occupies consecutive
5966 // space in the argument area; only round up to the next doubleword
5967 // at the end of the array. Otherwise, each float takes 8 bytes.
5968 if (CallConv
!= CallingConv::Fast
|| NeededLoad
) {
5969 ArgOffset
+= (Arg
.getValueType() == MVT::f32
&&
5970 Flags
.isInConsecutiveRegs()) ? 4 : 8;
5971 if (Flags
.isInConsecutiveRegsLast())
5972 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
5984 if (!Subtarget
.hasQPX()) {
5985 // These can be scalar arguments or elements of a vector array type
5986 // passed directly. The latter are used to implement ELFv2 homogenous
5987 // vector aggregates.
5989 // For a varargs call, named arguments go into VRs or on the stack as
5990 // usual; unnamed arguments always go to the stack or the corresponding
5991 // GPRs when within range. For now, we always put the value in both
5992 // locations (or even all three).
5994 assert(HasParameterArea
&&
5995 "Parameter area must exist if we have a varargs call.");
5996 // We could elide this store in the case where the object fits
5997 // entirely in R registers. Maybe later.
5999 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6000 MemOpChains
.push_back(Store
);
6001 if (VR_idx
!= NumVRs
) {
6003 DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, MachinePointerInfo());
6004 MemOpChains
.push_back(Load
.getValue(1));
6005 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
6008 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
6009 if (GPR_idx
== NumGPRs
)
6011 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6012 DAG
.getConstant(i
, dl
, PtrVT
));
6014 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6015 MemOpChains
.push_back(Load
.getValue(1));
6016 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6021 // Non-varargs Altivec params go into VRs or on the stack.
6022 if (VR_idx
!= NumVRs
) {
6023 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
6025 if (CallConv
== CallingConv::Fast
)
6028 assert(HasParameterArea
&&
6029 "Parameter area must exist to pass an argument in memory.");
6030 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6031 true, isTailCall
, true, MemOpChains
,
6032 TailCallArguments
, dl
);
6033 if (CallConv
== CallingConv::Fast
)
6037 if (CallConv
!= CallingConv::Fast
)
6042 assert(Arg
.getValueType().getSimpleVT().SimpleTy
== MVT::v4f32
&&
6043 "Invalid QPX parameter type");
6048 bool IsF32
= Arg
.getValueType().getSimpleVT().SimpleTy
== MVT::v4f32
;
6050 assert(HasParameterArea
&&
6051 "Parameter area must exist if we have a varargs call.");
6052 // We could elide this store in the case where the object fits
6053 // entirely in R registers. Maybe later.
6055 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6056 MemOpChains
.push_back(Store
);
6057 if (QFPR_idx
!= NumQFPRs
) {
6058 SDValue Load
= DAG
.getLoad(IsF32
? MVT::v4f32
: MVT::v4f64
, dl
, Store
,
6059 PtrOff
, MachinePointerInfo());
6060 MemOpChains
.push_back(Load
.getValue(1));
6061 RegsToPass
.push_back(std::make_pair(QFPR
[QFPR_idx
++], Load
));
6063 ArgOffset
+= (IsF32
? 16 : 32);
6064 for (unsigned i
= 0; i
< (IsF32
? 16U : 32U); i
+= PtrByteSize
) {
6065 if (GPR_idx
== NumGPRs
)
6067 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6068 DAG
.getConstant(i
, dl
, PtrVT
));
6070 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6071 MemOpChains
.push_back(Load
.getValue(1));
6072 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6077 // Non-varargs QPX params go into registers or on the stack.
6078 if (QFPR_idx
!= NumQFPRs
) {
6079 RegsToPass
.push_back(std::make_pair(QFPR
[QFPR_idx
++], Arg
));
6081 if (CallConv
== CallingConv::Fast
)
6084 assert(HasParameterArea
&&
6085 "Parameter area must exist to pass an argument in memory.");
6086 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6087 true, isTailCall
, true, MemOpChains
,
6088 TailCallArguments
, dl
);
6089 if (CallConv
== CallingConv::Fast
)
6090 ArgOffset
+= (IsF32
? 16 : 32);
6093 if (CallConv
!= CallingConv::Fast
)
6094 ArgOffset
+= (IsF32
? 16 : 32);
6100 assert((!HasParameterArea
|| NumBytesActuallyUsed
== ArgOffset
) &&
6101 "mismatch in size of parameter area");
6102 (void)NumBytesActuallyUsed
;
6104 if (!MemOpChains
.empty())
6105 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6107 // Check if this is an indirect call (MTCTR/BCTRL).
6108 // See PrepareCall() for more information about calls through function
6109 // pointers in the 64-bit SVR4 ABI.
6110 if (!isTailCall
&& !isPatchPoint
&&
6111 !isFunctionGlobalAddress(Callee
) &&
6112 !isa
<ExternalSymbolSDNode
>(Callee
)) {
6113 // Load r2 into a virtual register and store it to the TOC save area.
6114 setUsesTOCBasePtr(DAG
);
6115 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, PPC::X2
, MVT::i64
);
6116 // TOC save area offset.
6117 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
6118 SDValue PtrOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
6119 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6120 Chain
= DAG
.getStore(
6121 Val
.getValue(1), dl
, Val
, AddPtr
,
6122 MachinePointerInfo::getStack(DAG
.getMachineFunction(), TOCSaveOffset
));
6123 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6124 // This does not mean the MTCTR instruction must use R12; it's easier
6125 // to model this as an extra parameter, so do that.
6126 if (isELFv2ABI
&& !isPatchPoint
)
6127 RegsToPass
.push_back(std::make_pair((unsigned)PPC::X12
, Callee
));
6130 // Build a sequence of copy-to-reg nodes chained together with token chain
6131 // and flag operands which copy the outgoing args into the appropriate regs.
6133 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6134 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6135 RegsToPass
[i
].second
, InFlag
);
6136 InFlag
= Chain
.getValue(1);
6139 if (isTailCall
&& !IsSibCall
)
6140 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6143 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
, hasNest
,
6144 DAG
, RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
,
6145 SPDiff
, NumBytes
, Ins
, InVals
, CS
);
6148 SDValue
PPCTargetLowering::LowerCall_Darwin(
6149 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
6150 bool isTailCall
, bool isPatchPoint
,
6151 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6152 const SmallVectorImpl
<SDValue
> &OutVals
,
6153 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
6154 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
6155 ImmutableCallSite CS
) const {
6156 unsigned NumOps
= Outs
.size();
6158 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6159 bool isPPC64
= PtrVT
== MVT::i64
;
6160 unsigned PtrByteSize
= isPPC64
? 8 : 4;
6162 MachineFunction
&MF
= DAG
.getMachineFunction();
6164 // Mark this function as potentially containing a function that contains a
6165 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6166 // and restoring the callers stack pointer in this functions epilog. This is
6167 // done because by tail calling the called function might overwrite the value
6168 // in this function's (MF) stack pointer stack slot 0(SP).
6169 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
6170 CallConv
== CallingConv::Fast
)
6171 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
6173 // Count how many bytes are to be pushed on the stack, including the linkage
6174 // area, and parameter passing area. We start with 24/48 bytes, which is
6175 // prereserved space for [SP][CR][LR][3 x unused].
6176 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
6177 unsigned NumBytes
= LinkageSize
;
6179 // Add up all the space actually used.
6180 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6181 // they all go in registers, but we must reserve stack space for them for
6182 // possible use by the caller. In varargs or 64-bit calls, parameters are
6183 // assigned stack space in order, with padding so Altivec parameters are
6185 unsigned nAltivecParamsAtEnd
= 0;
6186 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6187 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6188 EVT ArgVT
= Outs
[i
].VT
;
6189 // Varargs Altivec parameters are padded to a 16 byte boundary.
6190 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
6191 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
6192 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
) {
6193 if (!isVarArg
&& !isPPC64
) {
6194 // Non-varargs Altivec parameters go after all the non-Altivec
6195 // parameters; handle those later so we know how much padding we need.
6196 nAltivecParamsAtEnd
++;
6199 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6200 NumBytes
= ((NumBytes
+15)/16)*16;
6202 NumBytes
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
6205 // Allow for Altivec parameters at the end, if needed.
6206 if (nAltivecParamsAtEnd
) {
6207 NumBytes
= ((NumBytes
+15)/16)*16;
6208 NumBytes
+= 16*nAltivecParamsAtEnd
;
6211 // The prolog code of the callee may store up to 8 GPR argument registers to
6212 // the stack, allowing va_start to index over them in memory if its varargs.
6213 // Because we cannot tell if this is needed on the caller side, we have to
6214 // conservatively assume that it is needed. As such, make sure we have at
6215 // least enough stack space for the caller to store the 8 GPRs.
6216 NumBytes
= std::max(NumBytes
, LinkageSize
+ 8 * PtrByteSize
);
6218 // Tail call needs the stack to be aligned.
6219 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
6220 CallConv
== CallingConv::Fast
)
6221 NumBytes
= EnsureStackAlignment(Subtarget
.getFrameLowering(), NumBytes
);
6223 // Calculate by how many bytes the stack has to be adjusted in case of tail
6224 // call optimization.
6225 int SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
6227 // To protect arguments on the stack from being clobbered in a tail call,
6228 // force all the loads to happen before doing any other lowering.
6230 Chain
= DAG
.getStackArgumentTokenFactor(Chain
);
6232 // Adjust the stack pointer for the new arguments...
6233 // These operations are automatically eliminated by the prolog/epilog pass
6234 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
6235 SDValue CallSeqStart
= Chain
;
6237 // Load the return address and frame pointer so it can be move somewhere else
6240 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
6242 // Set up a copy of the stack pointer for use loading and storing any
6243 // arguments that may not fit in the registers available for argument
6247 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
6249 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
6251 // Figure out which arguments are going to go in registers, and which in
6252 // memory. Also, if this is a vararg function, floating point operations
6253 // must be stored to our stack, and loaded into integer regs as well, if
6254 // any integer regs are available for argument passing.
6255 unsigned ArgOffset
= LinkageSize
;
6256 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
6258 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
6259 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
6260 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
6262 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
6263 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
6264 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
6266 static const MCPhysReg VR
[] = {
6267 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
6268 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
6270 const unsigned NumGPRs
= array_lengthof(GPR_32
);
6271 const unsigned NumFPRs
= 13;
6272 const unsigned NumVRs
= array_lengthof(VR
);
6274 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
6276 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
6277 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
6279 SmallVector
<SDValue
, 8> MemOpChains
;
6280 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6281 SDValue Arg
= OutVals
[i
];
6282 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6284 // PtrOff will be used to store the current argument to the stack if a
6285 // register cannot be found for it.
6288 PtrOff
= DAG
.getConstant(ArgOffset
, dl
, StackPtr
.getValueType());
6290 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6292 // On PPC64, promote integers to 64-bit values.
6293 if (isPPC64
&& Arg
.getValueType() == MVT::i32
) {
6294 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6295 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
6296 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
6299 // FIXME memcpy is used way more than necessary. Correctness first.
6300 // Note: "by value" is code for passing a structure by value, not
6302 if (Flags
.isByVal()) {
6303 unsigned Size
= Flags
.getByValSize();
6304 // Very small objects are passed right-justified. Everything else is
6305 // passed left-justified.
6306 if (Size
==1 || Size
==2) {
6307 EVT VT
= (Size
==1) ? MVT::i8
: MVT::i16
;
6308 if (GPR_idx
!= NumGPRs
) {
6309 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
6310 MachinePointerInfo(), VT
);
6311 MemOpChains
.push_back(Load
.getValue(1));
6312 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6314 ArgOffset
+= PtrByteSize
;
6316 SDValue Const
= DAG
.getConstant(PtrByteSize
- Size
, dl
,
6317 PtrOff
.getValueType());
6318 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
6319 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
6322 ArgOffset
+= PtrByteSize
;
6326 // Copy entire object into memory. There are cases where gcc-generated
6327 // code assumes it is there, even if it could be put entirely into
6328 // registers. (This is not what the doc says.)
6329 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, PtrOff
,
6333 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6334 // copy the pieces of the object that fit into registers from the
6335 // parameter save area.
6336 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
6337 SDValue Const
= DAG
.getConstant(j
, dl
, PtrOff
.getValueType());
6338 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
6339 if (GPR_idx
!= NumGPRs
) {
6341 DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
, MachinePointerInfo());
6342 MemOpChains
.push_back(Load
.getValue(1));
6343 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6344 ArgOffset
+= PtrByteSize
;
6346 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
6353 switch (Arg
.getSimpleValueType().SimpleTy
) {
6354 default: llvm_unreachable("Unexpected ValueType for argument!");
6358 if (GPR_idx
!= NumGPRs
) {
6359 if (Arg
.getValueType() == MVT::i1
)
6360 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, PtrVT
, Arg
);
6362 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6364 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6365 isPPC64
, isTailCall
, false, MemOpChains
,
6366 TailCallArguments
, dl
);
6368 ArgOffset
+= PtrByteSize
;
6372 if (FPR_idx
!= NumFPRs
) {
6373 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6377 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6378 MemOpChains
.push_back(Store
);
6380 // Float varargs are always shadowed in available integer registers
6381 if (GPR_idx
!= NumGPRs
) {
6383 DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, MachinePointerInfo());
6384 MemOpChains
.push_back(Load
.getValue(1));
6385 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6387 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&& !isPPC64
){
6388 SDValue ConstFour
= DAG
.getConstant(4, dl
, PtrOff
.getValueType());
6389 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
6391 DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, MachinePointerInfo());
6392 MemOpChains
.push_back(Load
.getValue(1));
6393 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6396 // If we have any FPRs remaining, we may also have GPRs remaining.
6397 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6399 if (GPR_idx
!= NumGPRs
)
6401 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&&
6402 !isPPC64
) // PPC64 has 64-bit GPR's obviously :)
6406 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6407 isPPC64
, isTailCall
, false, MemOpChains
,
6408 TailCallArguments
, dl
);
6412 ArgOffset
+= Arg
.getValueType() == MVT::f32
? 4 : 8;
6419 // These go aligned on the stack, or in the corresponding R registers
6420 // when within range. The Darwin PPC ABI doc claims they also go in
6421 // V registers; in fact gcc does this only for arguments that are
6422 // prototyped, not for those that match the ... We do it for all
6423 // arguments, seems to work.
6424 while (ArgOffset
% 16 !=0) {
6425 ArgOffset
+= PtrByteSize
;
6426 if (GPR_idx
!= NumGPRs
)
6429 // We could elide this store in the case where the object fits
6430 // entirely in R registers. Maybe later.
6431 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
6432 DAG
.getConstant(ArgOffset
, dl
, PtrVT
));
6434 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6435 MemOpChains
.push_back(Store
);
6436 if (VR_idx
!= NumVRs
) {
6438 DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, MachinePointerInfo());
6439 MemOpChains
.push_back(Load
.getValue(1));
6440 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
6443 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
6444 if (GPR_idx
== NumGPRs
)
6446 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6447 DAG
.getConstant(i
, dl
, PtrVT
));
6449 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6450 MemOpChains
.push_back(Load
.getValue(1));
6451 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6456 // Non-varargs Altivec params generally go in registers, but have
6457 // stack space allocated at the end.
6458 if (VR_idx
!= NumVRs
) {
6459 // Doesn't have GPR space allocated.
6460 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
6461 } else if (nAltivecParamsAtEnd
==0) {
6462 // We are emitting Altivec params in order.
6463 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6464 isPPC64
, isTailCall
, true, MemOpChains
,
6465 TailCallArguments
, dl
);
6471 // If all Altivec parameters fit in registers, as they usually do,
6472 // they get stack space following the non-Altivec parameters. We
6473 // don't track this here because nobody below needs it.
6474 // If there are more Altivec parameters than fit in registers emit
6476 if (!isVarArg
&& nAltivecParamsAtEnd
> NumVRs
) {
6478 // Offset is aligned; skip 1st 12 params which go in V registers.
6479 ArgOffset
= ((ArgOffset
+15)/16)*16;
6481 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6482 SDValue Arg
= OutVals
[i
];
6483 EVT ArgType
= Outs
[i
].VT
;
6484 if (ArgType
==MVT::v4f32
|| ArgType
==MVT::v4i32
||
6485 ArgType
==MVT::v8i16
|| ArgType
==MVT::v16i8
) {
6488 // We are emitting Altivec params in order.
6489 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6490 isPPC64
, isTailCall
, true, MemOpChains
,
6491 TailCallArguments
, dl
);
6498 if (!MemOpChains
.empty())
6499 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6501 // On Darwin, R12 must contain the address of an indirect callee. This does
6502 // not mean the MTCTR instruction must use R12; it's easier to model this as
6503 // an extra parameter, so do that.
6505 !isFunctionGlobalAddress(Callee
) &&
6506 !isa
<ExternalSymbolSDNode
>(Callee
) &&
6507 !isBLACompatibleAddress(Callee
, DAG
))
6508 RegsToPass
.push_back(std::make_pair((unsigned)(isPPC64
? PPC::X12
:
6509 PPC::R12
), Callee
));
6511 // Build a sequence of copy-to-reg nodes chained together with token chain
6512 // and flag operands which copy the outgoing args into the appropriate regs.
6514 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6515 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6516 RegsToPass
[i
].second
, InFlag
);
6517 InFlag
= Chain
.getValue(1);
6521 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6524 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
6525 /* unused except on PPC64 ELFv1 */ false, DAG
,
6526 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
6527 NumBytes
, Ins
, InVals
, CS
);
6531 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv
,
6532 MachineFunction
&MF
, bool isVarArg
,
6533 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6534 LLVMContext
&Context
) const {
6535 SmallVector
<CCValAssign
, 16> RVLocs
;
6536 CCState
CCInfo(CallConv
, isVarArg
, MF
, RVLocs
, Context
);
6537 return CCInfo
.CheckReturn(
6538 Outs
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
6544 PPCTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
6546 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6547 const SmallVectorImpl
<SDValue
> &OutVals
,
6548 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
6549 SmallVector
<CCValAssign
, 16> RVLocs
;
6550 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
6552 CCInfo
.AnalyzeReturn(Outs
,
6553 (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
6558 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
6560 // Copy the result values into the output registers.
6561 for (unsigned i
= 0; i
!= RVLocs
.size(); ++i
) {
6562 CCValAssign
&VA
= RVLocs
[i
];
6563 assert(VA
.isRegLoc() && "Can only return in registers!");
6565 SDValue Arg
= OutVals
[i
];
6567 switch (VA
.getLocInfo()) {
6568 default: llvm_unreachable("Unknown loc info!");
6569 case CCValAssign::Full
: break;
6570 case CCValAssign::AExt
:
6571 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6573 case CCValAssign::ZExt
:
6574 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6576 case CCValAssign::SExt
:
6577 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6581 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Arg
, Flag
);
6582 Flag
= Chain
.getValue(1);
6583 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
6586 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
6587 const MCPhysReg
*I
=
6588 TRI
->getCalleeSavedRegsViaCopy(&DAG
.getMachineFunction());
6592 if (PPC::G8RCRegClass
.contains(*I
))
6593 RetOps
.push_back(DAG
.getRegister(*I
, MVT::i64
));
6594 else if (PPC::F8RCRegClass
.contains(*I
))
6595 RetOps
.push_back(DAG
.getRegister(*I
, MVT::getFloatingPointVT(64)));
6596 else if (PPC::CRRCRegClass
.contains(*I
))
6597 RetOps
.push_back(DAG
.getRegister(*I
, MVT::i1
));
6598 else if (PPC::VRRCRegClass
.contains(*I
))
6599 RetOps
.push_back(DAG
.getRegister(*I
, MVT::Other
));
6601 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6605 RetOps
[0] = Chain
; // Update chain.
6607 // Add the flag if we have it.
6609 RetOps
.push_back(Flag
);
6611 return DAG
.getNode(PPCISD::RET_FLAG
, dl
, MVT::Other
, RetOps
);
6615 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op
,
6616 SelectionDAG
&DAG
) const {
6619 // Get the correct type for integers.
6620 EVT IntVT
= Op
.getValueType();
6623 SDValue Chain
= Op
.getOperand(0);
6624 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
6625 // Build a DYNAREAOFFSET node.
6626 SDValue Ops
[2] = {Chain
, FPSIdx
};
6627 SDVTList VTs
= DAG
.getVTList(IntVT
);
6628 return DAG
.getNode(PPCISD::DYNAREAOFFSET
, dl
, VTs
, Ops
);
6631 SDValue
PPCTargetLowering::LowerSTACKRESTORE(SDValue Op
,
6632 SelectionDAG
&DAG
) const {
6633 // When we pop the dynamic allocation we need to restore the SP link.
6636 // Get the correct type for pointers.
6637 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6639 // Construct the stack pointer operand.
6640 bool isPPC64
= Subtarget
.isPPC64();
6641 unsigned SP
= isPPC64
? PPC::X1
: PPC::R1
;
6642 SDValue StackPtr
= DAG
.getRegister(SP
, PtrVT
);
6644 // Get the operands for the STACKRESTORE.
6645 SDValue Chain
= Op
.getOperand(0);
6646 SDValue SaveSP
= Op
.getOperand(1);
6648 // Load the old link SP.
6649 SDValue LoadLinkSP
=
6650 DAG
.getLoad(PtrVT
, dl
, Chain
, StackPtr
, MachinePointerInfo());
6652 // Restore the stack pointer.
6653 Chain
= DAG
.getCopyToReg(LoadLinkSP
.getValue(1), dl
, SP
, SaveSP
);
6655 // Store the old link SP.
6656 return DAG
.getStore(Chain
, dl
, LoadLinkSP
, StackPtr
, MachinePointerInfo());
6659 SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG
&DAG
) const {
6660 MachineFunction
&MF
= DAG
.getMachineFunction();
6661 bool isPPC64
= Subtarget
.isPPC64();
6662 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
6664 // Get current frame pointer save index. The users of this index will be
6665 // primarily DYNALLOC instructions.
6666 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
6667 int RASI
= FI
->getReturnAddrSaveIndex();
6669 // If the frame pointer save index hasn't been defined yet.
6671 // Find out what the fix offset of the frame pointer save area.
6672 int LROffset
= Subtarget
.getFrameLowering()->getReturnSaveOffset();
6673 // Allocate the frame index for frame pointer save area.
6674 RASI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, LROffset
, false);
6676 FI
->setReturnAddrSaveIndex(RASI
);
6678 return DAG
.getFrameIndex(RASI
, PtrVT
);
6682 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG
& DAG
) const {
6683 MachineFunction
&MF
= DAG
.getMachineFunction();
6684 bool isPPC64
= Subtarget
.isPPC64();
6685 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
6687 // Get current frame pointer save index. The users of this index will be
6688 // primarily DYNALLOC instructions.
6689 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
6690 int FPSI
= FI
->getFramePointerSaveIndex();
6692 // If the frame pointer save index hasn't been defined yet.
6694 // Find out what the fix offset of the frame pointer save area.
6695 int FPOffset
= Subtarget
.getFrameLowering()->getFramePointerSaveOffset();
6696 // Allocate the frame index for frame pointer save area.
6697 FPSI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, FPOffset
, true);
6699 FI
->setFramePointerSaveIndex(FPSI
);
6701 return DAG
.getFrameIndex(FPSI
, PtrVT
);
6704 SDValue
PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
6705 SelectionDAG
&DAG
) const {
6707 SDValue Chain
= Op
.getOperand(0);
6708 SDValue Size
= Op
.getOperand(1);
6711 // Get the correct type for pointers.
6712 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6714 SDValue NegSize
= DAG
.getNode(ISD::SUB
, dl
, PtrVT
,
6715 DAG
.getConstant(0, dl
, PtrVT
), Size
);
6716 // Construct a node for the frame pointer save index.
6717 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
6718 // Build a DYNALLOC node.
6719 SDValue Ops
[3] = { Chain
, NegSize
, FPSIdx
};
6720 SDVTList VTs
= DAG
.getVTList(PtrVT
, MVT::Other
);
6721 return DAG
.getNode(PPCISD::DYNALLOC
, dl
, VTs
, Ops
);
6724 SDValue
PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op
,
6725 SelectionDAG
&DAG
) const {
6726 MachineFunction
&MF
= DAG
.getMachineFunction();
6728 bool isPPC64
= Subtarget
.isPPC64();
6729 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6731 int FI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, 0, false);
6732 return DAG
.getFrameIndex(FI
, PtrVT
);
6735 SDValue
PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op
,
6736 SelectionDAG
&DAG
) const {
6738 return DAG
.getNode(PPCISD::EH_SJLJ_SETJMP
, DL
,
6739 DAG
.getVTList(MVT::i32
, MVT::Other
),
6740 Op
.getOperand(0), Op
.getOperand(1));
6743 SDValue
PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op
,
6744 SelectionDAG
&DAG
) const {
6746 return DAG
.getNode(PPCISD::EH_SJLJ_LONGJMP
, DL
, MVT::Other
,
6747 Op
.getOperand(0), Op
.getOperand(1));
6750 SDValue
PPCTargetLowering::LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
6751 if (Op
.getValueType().isVector())
6752 return LowerVectorLoad(Op
, DAG
);
6754 assert(Op
.getValueType() == MVT::i1
&&
6755 "Custom lowering only for i1 loads");
6757 // First, load 8 bits into 32 bits, then truncate to 1 bit.
6760 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op
);
6762 SDValue Chain
= LD
->getChain();
6763 SDValue BasePtr
= LD
->getBasePtr();
6764 MachineMemOperand
*MMO
= LD
->getMemOperand();
6767 DAG
.getExtLoad(ISD::EXTLOAD
, dl
, getPointerTy(DAG
.getDataLayout()), Chain
,
6768 BasePtr
, MVT::i8
, MMO
);
6769 SDValue Result
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewLD
);
6771 SDValue Ops
[] = { Result
, SDValue(NewLD
.getNode(), 1) };
6772 return DAG
.getMergeValues(Ops
, dl
);
6775 SDValue
PPCTargetLowering::LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
6776 if (Op
.getOperand(1).getValueType().isVector())
6777 return LowerVectorStore(Op
, DAG
);
6779 assert(Op
.getOperand(1).getValueType() == MVT::i1
&&
6780 "Custom lowering only for i1 stores");
6782 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
6785 StoreSDNode
*ST
= cast
<StoreSDNode
>(Op
);
6787 SDValue Chain
= ST
->getChain();
6788 SDValue BasePtr
= ST
->getBasePtr();
6789 SDValue Value
= ST
->getValue();
6790 MachineMemOperand
*MMO
= ST
->getMemOperand();
6792 Value
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, getPointerTy(DAG
.getDataLayout()),
6794 return DAG
.getTruncStore(Chain
, dl
, Value
, BasePtr
, MVT::i8
, MMO
);
6797 // FIXME: Remove this once the ANDI glue bug is fixed:
6798 SDValue
PPCTargetLowering::LowerTRUNCATE(SDValue Op
, SelectionDAG
&DAG
) const {
6799 assert(Op
.getValueType() == MVT::i1
&&
6800 "Custom lowering only for i1 results");
6803 return DAG
.getNode(PPCISD::ANDIo_1_GT_BIT
, DL
, MVT::i1
,
6807 SDValue
PPCTargetLowering::LowerTRUNCATEVector(SDValue Op
,
6808 SelectionDAG
&DAG
) const {
6810 // Implements a vector truncate that fits in a vector register as a shuffle.
6811 // We want to legalize vector truncates down to where the source fits in
6812 // a vector register (and target is therefore smaller than vector register
6813 // size). At that point legalization will try to custom lower the sub-legal
6814 // result and get here - where we can contain the truncate as a single target
6817 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
6818 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
6820 // We will implement it for big-endian ordering as this (where x denotes
6822 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
6823 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
6825 // The same operation in little-endian ordering will be:
6826 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
6827 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
6829 assert(Op
.getValueType().isVector() && "Vector type expected.");
6832 SDValue N1
= Op
.getOperand(0);
6833 unsigned SrcSize
= N1
.getValueType().getSizeInBits();
6834 assert(SrcSize
<= 128 && "Source must fit in an Altivec/VSX vector");
6835 SDValue WideSrc
= SrcSize
== 128 ? N1
: widenVec(DAG
, N1
, DL
);
6837 EVT TrgVT
= Op
.getValueType();
6838 unsigned TrgNumElts
= TrgVT
.getVectorNumElements();
6839 EVT EltVT
= TrgVT
.getVectorElementType();
6840 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
6841 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
6843 // First list the elements we want to keep.
6844 unsigned SizeMult
= SrcSize
/ TrgVT
.getSizeInBits();
6845 SmallVector
<int, 16> ShuffV
;
6846 if (Subtarget
.isLittleEndian())
6847 for (unsigned i
= 0; i
< TrgNumElts
; ++i
)
6848 ShuffV
.push_back(i
* SizeMult
);
6850 for (unsigned i
= 1; i
<= TrgNumElts
; ++i
)
6851 ShuffV
.push_back(i
* SizeMult
- 1);
6853 // Populate the remaining elements with undefs.
6854 for (unsigned i
= TrgNumElts
; i
< WideNumElts
; ++i
)
6855 // ShuffV.push_back(i + WideNumElts);
6856 ShuffV
.push_back(WideNumElts
+ 1);
6858 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, DL
, WideVT
, WideSrc
);
6859 return DAG
.getVectorShuffle(WideVT
, DL
, Conv
, DAG
.getUNDEF(WideVT
), ShuffV
);
6862 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
6864 SDValue
PPCTargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const {
6865 // Not FP? Not a fsel.
6866 if (!Op
.getOperand(0).getValueType().isFloatingPoint() ||
6867 !Op
.getOperand(2).getValueType().isFloatingPoint())
6870 // We might be able to do better than this under some circumstances, but in
6871 // general, fsel-based lowering of select is a finite-math-only optimization.
6872 // For more information, see section F.3 of the 2.06 ISA specification.
6873 if (!DAG
.getTarget().Options
.NoInfsFPMath
||
6874 !DAG
.getTarget().Options
.NoNaNsFPMath
)
6876 // TODO: Propagate flags from the select rather than global settings.
6878 Flags
.setNoInfs(true);
6879 Flags
.setNoNaNs(true);
6881 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
6883 EVT ResVT
= Op
.getValueType();
6884 EVT CmpVT
= Op
.getOperand(0).getValueType();
6885 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
6886 SDValue TV
= Op
.getOperand(2), FV
= Op
.getOperand(3);
6889 // If the RHS of the comparison is a 0.0, we don't need to do the
6890 // subtraction at all.
6892 if (isFloatingPointZero(RHS
))
6894 default: break; // SETUO etc aren't handled by fsel.
6899 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6900 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
6901 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
6902 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6903 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
6904 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
6905 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), Sel1
, FV
);
6908 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
6912 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6913 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
6914 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
6917 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
6921 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6922 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
6923 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
6924 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), TV
, FV
);
6929 default: break; // SETUO etc aren't handled by fsel.
6934 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
6935 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6936 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
6937 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
6938 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6939 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
6940 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
6941 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, Cmp
), Sel1
, FV
);
6944 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
6945 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6946 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
6947 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
6950 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
6951 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6952 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
6953 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
6956 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
6957 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6958 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
6959 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
6962 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
6963 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
6964 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
6965 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
6970 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op
, ReuseLoadInfo
&RLI
,
6972 const SDLoc
&dl
) const {
6973 assert(Op
.getOperand(0).getValueType().isFloatingPoint());
6974 SDValue Src
= Op
.getOperand(0);
6975 if (Src
.getValueType() == MVT::f32
)
6976 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
6979 switch (Op
.getSimpleValueType().SimpleTy
) {
6980 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
6983 Op
.getOpcode() == ISD::FP_TO_SINT
6985 : (Subtarget
.hasFPCVT() ? PPCISD::FCTIWUZ
: PPCISD::FCTIDZ
),
6989 assert((Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT()) &&
6990 "i64 FP_TO_UINT is supported only with FPCVT");
6991 Tmp
= DAG
.getNode(Op
.getOpcode()==ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
6997 // Convert the FP value to an int value through memory.
6998 bool i32Stack
= Op
.getValueType() == MVT::i32
&& Subtarget
.hasSTFIWX() &&
6999 (Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT());
7000 SDValue FIPtr
= DAG
.CreateStackTemporary(i32Stack
? MVT::i32
: MVT::f64
);
7001 int FI
= cast
<FrameIndexSDNode
>(FIPtr
)->getIndex();
7002 MachinePointerInfo MPI
=
7003 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
);
7005 // Emit a store to the stack slot.
7008 MachineFunction
&MF
= DAG
.getMachineFunction();
7009 MachineMemOperand
*MMO
=
7010 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOStore
, 4, 4);
7011 SDValue Ops
[] = { DAG
.getEntryNode(), Tmp
, FIPtr
};
7012 Chain
= DAG
.getMemIntrinsicNode(PPCISD::STFIWX
, dl
,
7013 DAG
.getVTList(MVT::Other
), Ops
, MVT::i32
, MMO
);
7015 Chain
= DAG
.getStore(DAG
.getEntryNode(), dl
, Tmp
, FIPtr
, MPI
);
7017 // Result is a load from the stack slot. If loading 4 bytes, make sure to
7018 // add in a bias on big endian.
7019 if (Op
.getValueType() == MVT::i32
&& !i32Stack
) {
7020 FIPtr
= DAG
.getNode(ISD::ADD
, dl
, FIPtr
.getValueType(), FIPtr
,
7021 DAG
.getConstant(4, dl
, FIPtr
.getValueType()));
7022 MPI
= MPI
.getWithOffset(Subtarget
.isLittleEndian() ? 0 : 4);
7030 /// Custom lowers floating point to integer conversions to use
7031 /// the direct move instructions available in ISA 2.07 to avoid the
7032 /// need for load/store combinations.
7033 SDValue
PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op
,
7035 const SDLoc
&dl
) const {
7036 assert(Op
.getOperand(0).getValueType().isFloatingPoint());
7037 SDValue Src
= Op
.getOperand(0);
7039 if (Src
.getValueType() == MVT::f32
)
7040 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
7043 switch (Op
.getSimpleValueType().SimpleTy
) {
7044 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7047 Op
.getOpcode() == ISD::FP_TO_SINT
7049 : (Subtarget
.hasFPCVT() ? PPCISD::FCTIWUZ
: PPCISD::FCTIDZ
),
7051 Tmp
= DAG
.getNode(PPCISD::MFVSR
, dl
, MVT::i32
, Tmp
);
7054 assert((Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT()) &&
7055 "i64 FP_TO_UINT is supported only with FPCVT");
7056 Tmp
= DAG
.getNode(Op
.getOpcode()==ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
7059 Tmp
= DAG
.getNode(PPCISD::MFVSR
, dl
, MVT::i64
, Tmp
);
7065 SDValue
PPCTargetLowering::LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
,
7066 const SDLoc
&dl
) const {
7068 // FP to INT conversions are legal for f128.
7069 if (EnableQuadPrecision
&& (Op
->getOperand(0).getValueType() == MVT::f128
))
7072 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7073 // PPC (the libcall is not available).
7074 if (Op
.getOperand(0).getValueType() == MVT::ppcf128
) {
7075 if (Op
.getValueType() == MVT::i32
) {
7076 if (Op
.getOpcode() == ISD::FP_TO_SINT
) {
7077 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
7078 MVT::f64
, Op
.getOperand(0),
7079 DAG
.getIntPtrConstant(0, dl
));
7080 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
7081 MVT::f64
, Op
.getOperand(0),
7082 DAG
.getIntPtrConstant(1, dl
));
7084 // Add the two halves of the long double in round-to-zero mode.
7085 SDValue Res
= DAG
.getNode(PPCISD::FADDRTZ
, dl
, MVT::f64
, Lo
, Hi
);
7087 // Now use a smaller FP_TO_SINT.
7088 return DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, Res
);
7090 if (Op
.getOpcode() == ISD::FP_TO_UINT
) {
7091 const uint64_t TwoE31
[] = {0x41e0000000000000LL
, 0};
7092 APFloat APF
= APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31
));
7093 SDValue Tmp
= DAG
.getConstantFP(APF
, dl
, MVT::ppcf128
);
7094 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7095 // FIXME: generated code sucks.
7096 // TODO: Are there fast-math-flags to propagate to this FSUB?
7097 SDValue True
= DAG
.getNode(ISD::FSUB
, dl
, MVT::ppcf128
,
7098 Op
.getOperand(0), Tmp
);
7099 True
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, True
);
7100 True
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, True
,
7101 DAG
.getConstant(0x80000000, dl
, MVT::i32
));
7102 SDValue False
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
,
7104 return DAG
.getSelectCC(dl
, Op
.getOperand(0), Tmp
, True
, False
,
7112 if (Subtarget
.hasDirectMove() && Subtarget
.isPPC64())
7113 return LowerFP_TO_INTDirectMove(Op
, DAG
, dl
);
7116 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
7118 return DAG
.getLoad(Op
.getValueType(), dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
7119 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
7122 // We're trying to insert a regular store, S, and then a load, L. If the
7123 // incoming value, O, is a load, we might just be able to have our load use the
7124 // address used by O. However, we don't know if anything else will store to
7125 // that address before we can load from it. To prevent this situation, we need
7126 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7127 // the same chain operand as O, we create a token factor from the chain results
7128 // of O and L, and we replace all uses of O's chain result with that token
7129 // factor (see spliceIntoChain below for this last part).
7130 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op
, EVT MemVT
,
7133 ISD::LoadExtType ET
) const {
7135 if (ET
== ISD::NON_EXTLOAD
&&
7136 (Op
.getOpcode() == ISD::FP_TO_UINT
||
7137 Op
.getOpcode() == ISD::FP_TO_SINT
) &&
7138 isOperationLegalOrCustom(Op
.getOpcode(),
7139 Op
.getOperand(0).getValueType())) {
7141 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
7145 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
);
7146 if (!LD
|| LD
->getExtensionType() != ET
|| LD
->isVolatile() ||
7147 LD
->isNonTemporal())
7149 if (LD
->getMemoryVT() != MemVT
)
7152 RLI
.Ptr
= LD
->getBasePtr();
7153 if (LD
->isIndexed() && !LD
->getOffset().isUndef()) {
7154 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
7155 "Non-pre-inc AM on PPC?");
7156 RLI
.Ptr
= DAG
.getNode(ISD::ADD
, dl
, RLI
.Ptr
.getValueType(), RLI
.Ptr
,
7160 RLI
.Chain
= LD
->getChain();
7161 RLI
.MPI
= LD
->getPointerInfo();
7162 RLI
.IsDereferenceable
= LD
->isDereferenceable();
7163 RLI
.IsInvariant
= LD
->isInvariant();
7164 RLI
.Alignment
= LD
->getAlignment();
7165 RLI
.AAInfo
= LD
->getAAInfo();
7166 RLI
.Ranges
= LD
->getRanges();
7168 RLI
.ResChain
= SDValue(LD
, LD
->isIndexed() ? 2 : 1);
7172 // Given the head of the old chain, ResChain, insert a token factor containing
7173 // it and NewResChain, and make users of ResChain now be users of that token
7175 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7176 void PPCTargetLowering::spliceIntoChain(SDValue ResChain
,
7177 SDValue NewResChain
,
7178 SelectionDAG
&DAG
) const {
7182 SDLoc
dl(NewResChain
);
7184 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
7185 NewResChain
, DAG
.getUNDEF(MVT::Other
));
7186 assert(TF
.getNode() != NewResChain
.getNode() &&
7187 "A new TF really is required here");
7189 DAG
.ReplaceAllUsesOfValueWith(ResChain
, TF
);
7190 DAG
.UpdateNodeOperands(TF
.getNode(), ResChain
, NewResChain
);
7193 /// Analyze profitability of direct move
7194 /// prefer float load to int load plus direct move
7195 /// when there is no integer use of int load
7196 bool PPCTargetLowering::directMoveIsProfitable(const SDValue
&Op
) const {
7197 SDNode
*Origin
= Op
.getOperand(0).getNode();
7198 if (Origin
->getOpcode() != ISD::LOAD
)
7201 // If there is no LXSIBZX/LXSIHZX, like Power8,
7202 // prefer direct move if the memory size is 1 or 2 bytes.
7203 MachineMemOperand
*MMO
= cast
<LoadSDNode
>(Origin
)->getMemOperand();
7204 if (!Subtarget
.hasP9Vector() && MMO
->getSize() <= 2)
7207 for (SDNode::use_iterator UI
= Origin
->use_begin(),
7208 UE
= Origin
->use_end();
7211 // Only look at the users of the loaded value.
7212 if (UI
.getUse().get().getResNo() != 0)
7215 if (UI
->getOpcode() != ISD::SINT_TO_FP
&&
7216 UI
->getOpcode() != ISD::UINT_TO_FP
)
7223 /// Custom lowers integer to floating point conversions to use
7224 /// the direct move instructions available in ISA 2.07 to avoid the
7225 /// need for load/store combinations.
7226 SDValue
PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op
,
7228 const SDLoc
&dl
) const {
7229 assert((Op
.getValueType() == MVT::f32
||
7230 Op
.getValueType() == MVT::f64
) &&
7231 "Invalid floating point type as target of conversion");
7232 assert(Subtarget
.hasFPCVT() &&
7233 "Int to FP conversions with direct moves require FPCVT");
7235 SDValue Src
= Op
.getOperand(0);
7236 bool SinglePrec
= Op
.getValueType() == MVT::f32
;
7237 bool WordInt
= Src
.getSimpleValueType().SimpleTy
== MVT::i32
;
7238 bool Signed
= Op
.getOpcode() == ISD::SINT_TO_FP
;
7239 unsigned ConvOp
= Signed
? (SinglePrec
? PPCISD::FCFIDS
: PPCISD::FCFID
) :
7240 (SinglePrec
? PPCISD::FCFIDUS
: PPCISD::FCFIDU
);
7243 FP
= DAG
.getNode(Signed
? PPCISD::MTVSRA
: PPCISD::MTVSRZ
,
7245 FP
= DAG
.getNode(ConvOp
, dl
, SinglePrec
? MVT::f32
: MVT::f64
, FP
);
7248 FP
= DAG
.getNode(PPCISD::MTVSRA
, dl
, MVT::f64
, Src
);
7249 FP
= DAG
.getNode(ConvOp
, dl
, SinglePrec
? MVT::f32
: MVT::f64
, FP
);
7255 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
) {
7257 EVT VecVT
= Vec
.getValueType();
7258 assert(VecVT
.isVector() && "Expected a vector type.");
7259 assert(VecVT
.getSizeInBits() < 128 && "Vector is already full width.");
7261 EVT EltVT
= VecVT
.getVectorElementType();
7262 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
7263 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
7265 unsigned NumConcat
= WideNumElts
/ VecVT
.getVectorNumElements();
7266 SmallVector
<SDValue
, 16> Ops(NumConcat
);
7268 SDValue UndefVec
= DAG
.getUNDEF(VecVT
);
7269 for (unsigned i
= 1; i
< NumConcat
; ++i
)
7272 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, WideVT
, Ops
);
7275 SDValue
PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op
, SelectionDAG
&DAG
,
7276 const SDLoc
&dl
) const {
7278 unsigned Opc
= Op
.getOpcode();
7279 assert((Opc
== ISD::UINT_TO_FP
|| Opc
== ISD::SINT_TO_FP
) &&
7280 "Unexpected conversion type");
7281 assert((Op
.getValueType() == MVT::v2f64
|| Op
.getValueType() == MVT::v4f32
) &&
7282 "Supports conversions to v2f64/v4f32 only.");
7284 bool SignedConv
= Opc
== ISD::SINT_TO_FP
;
7285 bool FourEltRes
= Op
.getValueType() == MVT::v4f32
;
7287 SDValue Wide
= widenVec(DAG
, Op
.getOperand(0), dl
);
7288 EVT WideVT
= Wide
.getValueType();
7289 unsigned WideNumElts
= WideVT
.getVectorNumElements();
7290 MVT IntermediateVT
= FourEltRes
? MVT::v4i32
: MVT::v2i64
;
7292 SmallVector
<int, 16> ShuffV
;
7293 for (unsigned i
= 0; i
< WideNumElts
; ++i
)
7294 ShuffV
.push_back(i
+ WideNumElts
);
7296 int Stride
= FourEltRes
? WideNumElts
/ 4 : WideNumElts
/ 2;
7297 int SaveElts
= FourEltRes
? 4 : 2;
7298 if (Subtarget
.isLittleEndian())
7299 for (int i
= 0; i
< SaveElts
; i
++)
7300 ShuffV
[i
* Stride
] = i
;
7302 for (int i
= 1; i
<= SaveElts
; i
++)
7303 ShuffV
[i
* Stride
- 1] = i
- 1;
7305 SDValue ShuffleSrc2
=
7306 SignedConv
? DAG
.getUNDEF(WideVT
) : DAG
.getConstant(0, dl
, WideVT
);
7307 SDValue Arrange
= DAG
.getVectorShuffle(WideVT
, dl
, Wide
, ShuffleSrc2
, ShuffV
);
7309 SignedConv
? (unsigned)PPCISD::SExtVElems
: (unsigned)ISD::BITCAST
;
7312 if (!Subtarget
.hasP9Altivec() && SignedConv
) {
7313 Arrange
= DAG
.getBitcast(IntermediateVT
, Arrange
);
7314 Extend
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, IntermediateVT
, Arrange
,
7315 DAG
.getValueType(Op
.getOperand(0).getValueType()));
7317 Extend
= DAG
.getNode(ExtendOp
, dl
, IntermediateVT
, Arrange
);
7319 return DAG
.getNode(Opc
, dl
, Op
.getValueType(), Extend
);
7322 SDValue
PPCTargetLowering::LowerINT_TO_FP(SDValue Op
,
7323 SelectionDAG
&DAG
) const {
7326 EVT InVT
= Op
.getOperand(0).getValueType();
7327 EVT OutVT
= Op
.getValueType();
7328 if (OutVT
.isVector() && OutVT
.isFloatingPoint() &&
7329 isOperationCustom(Op
.getOpcode(), InVT
))
7330 return LowerINT_TO_FPVector(Op
, DAG
, dl
);
7332 // Conversions to f128 are legal.
7333 if (EnableQuadPrecision
&& (Op
.getValueType() == MVT::f128
))
7336 if (Subtarget
.hasQPX() && Op
.getOperand(0).getValueType() == MVT::v4i1
) {
7337 if (Op
.getValueType() != MVT::v4f32
&& Op
.getValueType() != MVT::v4f64
)
7340 SDValue Value
= Op
.getOperand(0);
7341 // The values are now known to be -1 (false) or 1 (true). To convert this
7342 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7343 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7344 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
7346 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
7348 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
7350 if (Op
.getValueType() != MVT::v4f64
)
7351 Value
= DAG
.getNode(ISD::FP_ROUND
, dl
,
7352 Op
.getValueType(), Value
,
7353 DAG
.getIntPtrConstant(1, dl
));
7357 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7358 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
7361 if (Op
.getOperand(0).getValueType() == MVT::i1
)
7362 return DAG
.getNode(ISD::SELECT
, dl
, Op
.getValueType(), Op
.getOperand(0),
7363 DAG
.getConstantFP(1.0, dl
, Op
.getValueType()),
7364 DAG
.getConstantFP(0.0, dl
, Op
.getValueType()));
7366 // If we have direct moves, we can do all the conversion, skip the store/load
7367 // however, without FPCVT we can't do most conversions.
7368 if (Subtarget
.hasDirectMove() && directMoveIsProfitable(Op
) &&
7369 Subtarget
.isPPC64() && Subtarget
.hasFPCVT())
7370 return LowerINT_TO_FPDirectMove(Op
, DAG
, dl
);
7372 assert((Op
.getOpcode() == ISD::SINT_TO_FP
|| Subtarget
.hasFPCVT()) &&
7373 "UINT_TO_FP is supported only with FPCVT");
7375 // If we have FCFIDS, then use it when converting to single-precision.
7376 // Otherwise, convert to double-precision and then round.
7377 unsigned FCFOp
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
7378 ? (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDUS
7380 : (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDU
7382 MVT FCFTy
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
7386 if (Op
.getOperand(0).getValueType() == MVT::i64
) {
7387 SDValue SINT
= Op
.getOperand(0);
7388 // When converting to single-precision, we actually need to convert
7389 // to double-precision first and then round to single-precision.
7390 // To avoid double-rounding effects during that operation, we have
7391 // to prepare the input operand. Bits that might be truncated when
7392 // converting to double-precision are replaced by a bit that won't
7393 // be lost at this stage, but is below the single-precision rounding
7396 // However, if -enable-unsafe-fp-math is in effect, accept double
7397 // rounding to avoid the extra overhead.
7398 if (Op
.getValueType() == MVT::f32
&&
7399 !Subtarget
.hasFPCVT() &&
7400 !DAG
.getTarget().Options
.UnsafeFPMath
) {
7402 // Twiddle input to make sure the low 11 bits are zero. (If this
7403 // is the case, we are guaranteed the value will fit into the 53 bit
7404 // mantissa of an IEEE double-precision value without rounding.)
7405 // If any of those low 11 bits were not zero originally, make sure
7406 // bit 12 (value 2048) is set instead, so that the final rounding
7407 // to single-precision gets the correct result.
7408 SDValue Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
7409 SINT
, DAG
.getConstant(2047, dl
, MVT::i64
));
7410 Round
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
7411 Round
, DAG
.getConstant(2047, dl
, MVT::i64
));
7412 Round
= DAG
.getNode(ISD::OR
, dl
, MVT::i64
, Round
, SINT
);
7413 Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
7414 Round
, DAG
.getConstant(-2048, dl
, MVT::i64
));
7416 // However, we cannot use that value unconditionally: if the magnitude
7417 // of the input value is small, the bit-twiddling we did above might
7418 // end up visibly changing the output. Fortunately, in that case, we
7419 // don't need to twiddle bits since the original input will convert
7420 // exactly to double-precision floating-point already. Therefore,
7421 // construct a conditional to use the original value if the top 11
7422 // bits are all sign-bit copies, and use the rounded value computed
7424 SDValue Cond
= DAG
.getNode(ISD::SRA
, dl
, MVT::i64
,
7425 SINT
, DAG
.getConstant(53, dl
, MVT::i32
));
7426 Cond
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
7427 Cond
, DAG
.getConstant(1, dl
, MVT::i64
));
7428 Cond
= DAG
.getSetCC(dl
, MVT::i32
,
7429 Cond
, DAG
.getConstant(1, dl
, MVT::i64
), ISD::SETUGT
);
7431 SINT
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i64
, Cond
, Round
, SINT
);
7437 MachineFunction
&MF
= DAG
.getMachineFunction();
7438 if (canReuseLoadAddress(SINT
, MVT::i64
, RLI
, DAG
)) {
7439 Bits
= DAG
.getLoad(MVT::f64
, dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
7440 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
7441 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7442 } else if (Subtarget
.hasLFIWAX() &&
7443 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::SEXTLOAD
)) {
7444 MachineMemOperand
*MMO
=
7445 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7446 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7447 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7448 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWAX
, dl
,
7449 DAG
.getVTList(MVT::f64
, MVT::Other
),
7450 Ops
, MVT::i32
, MMO
);
7451 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7452 } else if (Subtarget
.hasFPCVT() &&
7453 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::ZEXTLOAD
)) {
7454 MachineMemOperand
*MMO
=
7455 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7456 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7457 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7458 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWZX
, dl
,
7459 DAG
.getVTList(MVT::f64
, MVT::Other
),
7460 Ops
, MVT::i32
, MMO
);
7461 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7462 } else if (((Subtarget
.hasLFIWAX() &&
7463 SINT
.getOpcode() == ISD::SIGN_EXTEND
) ||
7464 (Subtarget
.hasFPCVT() &&
7465 SINT
.getOpcode() == ISD::ZERO_EXTEND
)) &&
7466 SINT
.getOperand(0).getValueType() == MVT::i32
) {
7467 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7468 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7470 int FrameIdx
= MFI
.CreateStackObject(4, 4, false);
7471 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7474 DAG
.getStore(DAG
.getEntryNode(), dl
, SINT
.getOperand(0), FIdx
,
7475 MachinePointerInfo::getFixedStack(
7476 DAG
.getMachineFunction(), FrameIdx
));
7478 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
7479 "Expected an i32 store");
7484 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
7487 MachineMemOperand
*MMO
=
7488 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7489 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7490 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7491 Bits
= DAG
.getMemIntrinsicNode(SINT
.getOpcode() == ISD::ZERO_EXTEND
?
7492 PPCISD::LFIWZX
: PPCISD::LFIWAX
,
7493 dl
, DAG
.getVTList(MVT::f64
, MVT::Other
),
7494 Ops
, MVT::i32
, MMO
);
7496 Bits
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, SINT
);
7498 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Bits
);
7500 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT())
7501 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
,
7502 MVT::f32
, FP
, DAG
.getIntPtrConstant(0, dl
));
7506 assert(Op
.getOperand(0).getValueType() == MVT::i32
&&
7507 "Unhandled INT_TO_FP type in custom expander!");
7508 // Since we only generate this in 64-bit mode, we can take advantage of
7509 // 64-bit registers. In particular, sign extend the input value into the
7510 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7511 // then lfd it and fcfid it.
7512 MachineFunction
&MF
= DAG
.getMachineFunction();
7513 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7514 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7517 if (Subtarget
.hasLFIWAX() || Subtarget
.hasFPCVT()) {
7520 if (!(ReusingLoad
= canReuseLoadAddress(Op
.getOperand(0), MVT::i32
, RLI
,
7522 int FrameIdx
= MFI
.CreateStackObject(4, 4, false);
7523 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7526 DAG
.getStore(DAG
.getEntryNode(), dl
, Op
.getOperand(0), FIdx
,
7527 MachinePointerInfo::getFixedStack(
7528 DAG
.getMachineFunction(), FrameIdx
));
7530 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
7531 "Expected an i32 store");
7536 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
7540 MachineMemOperand
*MMO
=
7541 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7542 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7543 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7544 Ld
= DAG
.getMemIntrinsicNode(Op
.getOpcode() == ISD::UINT_TO_FP
?
7545 PPCISD::LFIWZX
: PPCISD::LFIWAX
,
7546 dl
, DAG
.getVTList(MVT::f64
, MVT::Other
),
7547 Ops
, MVT::i32
, MMO
);
7549 spliceIntoChain(RLI
.ResChain
, Ld
.getValue(1), DAG
);
7551 assert(Subtarget
.isPPC64() &&
7552 "i32->FP without LFIWAX supported only on PPC64");
7554 int FrameIdx
= MFI
.CreateStackObject(8, 8, false);
7555 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7557 SDValue Ext64
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i64
,
7560 // STD the extended value into the stack slot.
7561 SDValue Store
= DAG
.getStore(
7562 DAG
.getEntryNode(), dl
, Ext64
, FIdx
,
7563 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
7565 // Load the value as a double.
7567 MVT::f64
, dl
, Store
, FIdx
,
7568 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
7571 // FCFID it and return it.
7572 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Ld
);
7573 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT())
7574 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, FP
,
7575 DAG
.getIntPtrConstant(0, dl
));
7579 SDValue
PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op
,
7580 SelectionDAG
&DAG
) const {
7583 The rounding mode is in bits 30:31 of FPSR, and has the following
7590 FLT_ROUNDS, on the other hand, expects the following:
7597 To perform the conversion, we do:
7598 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7601 MachineFunction
&MF
= DAG
.getMachineFunction();
7602 EVT VT
= Op
.getValueType();
7603 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7605 // Save FP Control Word to register
7607 MVT::f64
, // return register
7608 MVT::Glue
// unused in this context
7610 SDValue Chain
= DAG
.getNode(PPCISD::MFFS
, dl
, NodeTys
, None
);
7612 // Save FP register to stack slot
7613 int SSFI
= MF
.getFrameInfo().CreateStackObject(8, 8, false);
7614 SDValue StackSlot
= DAG
.getFrameIndex(SSFI
, PtrVT
);
7615 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Chain
, StackSlot
,
7616 MachinePointerInfo());
7618 // Load FP Control Word from low 32 bits of stack slot.
7619 SDValue Four
= DAG
.getConstant(4, dl
, PtrVT
);
7620 SDValue Addr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackSlot
, Four
);
7621 SDValue CWD
= DAG
.getLoad(MVT::i32
, dl
, Store
, Addr
, MachinePointerInfo());
7623 // Transform as necessary
7625 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
7626 CWD
, DAG
.getConstant(3, dl
, MVT::i32
));
7628 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
7629 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
7630 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
,
7631 CWD
, DAG
.getConstant(3, dl
, MVT::i32
)),
7632 DAG
.getConstant(3, dl
, MVT::i32
)),
7633 DAG
.getConstant(1, dl
, MVT::i32
));
7636 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, CWD1
, CWD2
);
7638 return DAG
.getNode((VT
.getSizeInBits() < 16 ?
7639 ISD::TRUNCATE
: ISD::ZERO_EXTEND
), dl
, VT
, RetVal
);
7642 SDValue
PPCTargetLowering::LowerSHL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
7643 EVT VT
= Op
.getValueType();
7644 unsigned BitWidth
= VT
.getSizeInBits();
7646 assert(Op
.getNumOperands() == 3 &&
7647 VT
== Op
.getOperand(1).getValueType() &&
7650 // Expand into a bunch of logical ops. Note that these ops
7651 // depend on the PPC behavior for oversized shift amounts.
7652 SDValue Lo
= Op
.getOperand(0);
7653 SDValue Hi
= Op
.getOperand(1);
7654 SDValue Amt
= Op
.getOperand(2);
7655 EVT AmtVT
= Amt
.getValueType();
7657 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
7658 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
7659 SDValue Tmp2
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Amt
);
7660 SDValue Tmp3
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Tmp1
);
7661 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
7662 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
7663 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
7664 SDValue Tmp6
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Tmp5
);
7665 SDValue OutHi
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
7666 SDValue OutLo
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Amt
);
7667 SDValue OutOps
[] = { OutLo
, OutHi
};
7668 return DAG
.getMergeValues(OutOps
, dl
);
7671 SDValue
PPCTargetLowering::LowerSRL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
7672 EVT VT
= Op
.getValueType();
7674 unsigned BitWidth
= VT
.getSizeInBits();
7675 assert(Op
.getNumOperands() == 3 &&
7676 VT
== Op
.getOperand(1).getValueType() &&
7679 // Expand into a bunch of logical ops. Note that these ops
7680 // depend on the PPC behavior for oversized shift amounts.
7681 SDValue Lo
= Op
.getOperand(0);
7682 SDValue Hi
= Op
.getOperand(1);
7683 SDValue Amt
= Op
.getOperand(2);
7684 EVT AmtVT
= Amt
.getValueType();
7686 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
7687 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
7688 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
7689 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
7690 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
7691 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
7692 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
7693 SDValue Tmp6
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Tmp5
);
7694 SDValue OutLo
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
7695 SDValue OutHi
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Amt
);
7696 SDValue OutOps
[] = { OutLo
, OutHi
};
7697 return DAG
.getMergeValues(OutOps
, dl
);
7700 SDValue
PPCTargetLowering::LowerSRA_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
7702 EVT VT
= Op
.getValueType();
7703 unsigned BitWidth
= VT
.getSizeInBits();
7704 assert(Op
.getNumOperands() == 3 &&
7705 VT
== Op
.getOperand(1).getValueType() &&
7708 // Expand into a bunch of logical ops, followed by a select_cc.
7709 SDValue Lo
= Op
.getOperand(0);
7710 SDValue Hi
= Op
.getOperand(1);
7711 SDValue Amt
= Op
.getOperand(2);
7712 EVT AmtVT
= Amt
.getValueType();
7714 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
7715 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
7716 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
7717 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
7718 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
7719 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
7720 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
7721 SDValue Tmp6
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Tmp5
);
7722 SDValue OutHi
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Amt
);
7723 SDValue OutLo
= DAG
.getSelectCC(dl
, Tmp5
, DAG
.getConstant(0, dl
, AmtVT
),
7724 Tmp4
, Tmp6
, ISD::SETLE
);
7725 SDValue OutOps
[] = { OutLo
, OutHi
};
7726 return DAG
.getMergeValues(OutOps
, dl
);
7729 //===----------------------------------------------------------------------===//
7730 // Vector related lowering.
7733 /// BuildSplatI - Build a canonical splati of Val with an element size of
7734 /// SplatSize. Cast the result to VT.
7735 static SDValue
BuildSplatI(int Val
, unsigned SplatSize
, EVT VT
,
7736 SelectionDAG
&DAG
, const SDLoc
&dl
) {
7737 assert(Val
>= -16 && Val
<= 15 && "vsplti is out of range!");
7739 static const MVT VTys
[] = { // canonical VT to use for each size.
7740 MVT::v16i8
, MVT::v8i16
, MVT::Other
, MVT::v4i32
7743 EVT ReqVT
= VT
!= MVT::Other
? VT
: VTys
[SplatSize
-1];
7745 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
7749 EVT CanonicalVT
= VTys
[SplatSize
-1];
7751 // Build a canonical splat for this value.
7752 return DAG
.getBitcast(ReqVT
, DAG
.getConstant(Val
, dl
, CanonicalVT
));
7755 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
7756 /// specified intrinsic ID.
7757 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op
, SelectionDAG
&DAG
,
7758 const SDLoc
&dl
, EVT DestVT
= MVT::Other
) {
7759 if (DestVT
== MVT::Other
) DestVT
= Op
.getValueType();
7760 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
7761 DAG
.getConstant(IID
, dl
, MVT::i32
), Op
);
7764 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
7765 /// specified intrinsic ID.
7766 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue LHS
, SDValue RHS
,
7767 SelectionDAG
&DAG
, const SDLoc
&dl
,
7768 EVT DestVT
= MVT::Other
) {
7769 if (DestVT
== MVT::Other
) DestVT
= LHS
.getValueType();
7770 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
7771 DAG
.getConstant(IID
, dl
, MVT::i32
), LHS
, RHS
);
7774 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
7775 /// specified intrinsic ID.
7776 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op0
, SDValue Op1
,
7777 SDValue Op2
, SelectionDAG
&DAG
, const SDLoc
&dl
,
7778 EVT DestVT
= MVT::Other
) {
7779 if (DestVT
== MVT::Other
) DestVT
= Op0
.getValueType();
7780 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
7781 DAG
.getConstant(IID
, dl
, MVT::i32
), Op0
, Op1
, Op2
);
7784 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
7785 /// amount. The result has the specified value type.
7786 static SDValue
BuildVSLDOI(SDValue LHS
, SDValue RHS
, unsigned Amt
, EVT VT
,
7787 SelectionDAG
&DAG
, const SDLoc
&dl
) {
7788 // Force LHS/RHS to be the right type.
7789 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, LHS
);
7790 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, RHS
);
7793 for (unsigned i
= 0; i
!= 16; ++i
)
7795 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, LHS
, RHS
, Ops
);
7796 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
7799 /// Do we have an efficient pattern in a .td file for this node?
7801 /// \param V - pointer to the BuildVectorSDNode being matched
7802 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
7804 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
7805 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
7806 /// the opposite is true (expansion is beneficial) are:
7807 /// - The node builds a vector out of integers that are not 32 or 64-bits
7808 /// - The node builds a vector out of constants
7809 /// - The node is a "load-and-splat"
7810 /// In all other cases, we will choose to keep the BUILD_VECTOR.
7811 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode
*V
,
7814 EVT VecVT
= V
->getValueType(0);
7815 bool RightType
= VecVT
== MVT::v2f64
||
7816 (HasP8Vector
&& VecVT
== MVT::v4f32
) ||
7817 (HasDirectMove
&& (VecVT
== MVT::v2i64
|| VecVT
== MVT::v4i32
));
7821 bool IsSplat
= true;
7822 bool IsLoad
= false;
7823 SDValue Op0
= V
->getOperand(0);
7825 // This function is called in a block that confirms the node is not a constant
7826 // splat. So a constant BUILD_VECTOR here means the vector is built out of
7827 // different constants.
7828 if (V
->isConstant())
7830 for (int i
= 0, e
= V
->getNumOperands(); i
< e
; ++i
) {
7831 if (V
->getOperand(i
).isUndef())
7833 // We want to expand nodes that represent load-and-splat even if the
7834 // loaded value is a floating point truncation or conversion to int.
7835 if (V
->getOperand(i
).getOpcode() == ISD::LOAD
||
7836 (V
->getOperand(i
).getOpcode() == ISD::FP_ROUND
&&
7837 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
7838 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_SINT
&&
7839 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
7840 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_UINT
&&
7841 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
))
7843 // If the operands are different or the input is not a load and has more
7844 // uses than just this BV node, then it isn't a splat.
7845 if (V
->getOperand(i
) != Op0
||
7846 (!IsLoad
&& !V
->isOnlyUserOf(V
->getOperand(i
).getNode())))
7849 return !(IsSplat
&& IsLoad
);
7852 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
7853 SDValue
PPCTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const {
7856 SDValue Op0
= Op
->getOperand(0);
7858 if (!EnableQuadPrecision
||
7859 (Op
.getValueType() != MVT::f128
) ||
7860 (Op0
.getOpcode() != ISD::BUILD_PAIR
) ||
7861 (Op0
.getOperand(0).getValueType() != MVT::i64
) ||
7862 (Op0
.getOperand(1).getValueType() != MVT::i64
))
7865 return DAG
.getNode(PPCISD::BUILD_FP128
, dl
, MVT::f128
, Op0
.getOperand(0),
7869 // If this is a case we can't handle, return null and let the default
7870 // expansion code take care of it. If we CAN select this case, and if it
7871 // selects to a single instruction, return Op. Otherwise, if we can codegen
7872 // this case more efficiently than a constant pool load, lower it to the
7873 // sequence of ops that should be used.
7874 SDValue
PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op
,
7875 SelectionDAG
&DAG
) const {
7877 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
7878 assert(BVN
&& "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
7880 if (Subtarget
.hasQPX() && Op
.getValueType() == MVT::v4i1
) {
7881 // We first build an i32 vector, load it into a QPX register,
7882 // then convert it to a floating-point vector and compare it
7883 // to a zero vector to get the boolean result.
7884 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
7885 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
7886 MachinePointerInfo PtrInfo
=
7887 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
7888 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7889 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7891 assert(BVN
->getNumOperands() == 4 &&
7892 "BUILD_VECTOR for v4i1 does not have 4 operands");
7894 bool IsConst
= true;
7895 for (unsigned i
= 0; i
< 4; ++i
) {
7896 if (BVN
->getOperand(i
).isUndef()) continue;
7897 if (!isa
<ConstantSDNode
>(BVN
->getOperand(i
))) {
7905 ConstantFP::get(Type::getFloatTy(*DAG
.getContext()), 1.0);
7907 ConstantFP::get(Type::getFloatTy(*DAG
.getContext()), -1.0);
7910 for (unsigned i
= 0; i
< 4; ++i
) {
7911 if (BVN
->getOperand(i
).isUndef())
7912 CV
[i
] = UndefValue::get(Type::getFloatTy(*DAG
.getContext()));
7913 else if (isNullConstant(BVN
->getOperand(i
)))
7919 Constant
*CP
= ConstantVector::get(CV
);
7920 SDValue CPIdx
= DAG
.getConstantPool(CP
, getPointerTy(DAG
.getDataLayout()),
7921 16 /* alignment */);
7923 SDValue Ops
[] = {DAG
.getEntryNode(), CPIdx
};
7924 SDVTList VTs
= DAG
.getVTList({MVT::v4i1
, /*chain*/ MVT::Other
});
7925 return DAG
.getMemIntrinsicNode(
7926 PPCISD::QVLFSb
, dl
, VTs
, Ops
, MVT::v4f32
,
7927 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
7930 SmallVector
<SDValue
, 4> Stores
;
7931 for (unsigned i
= 0; i
< 4; ++i
) {
7932 if (BVN
->getOperand(i
).isUndef()) continue;
7934 unsigned Offset
= 4*i
;
7935 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
7936 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
7938 unsigned StoreSize
= BVN
->getOperand(i
).getValueType().getStoreSize();
7939 if (StoreSize
> 4) {
7941 DAG
.getTruncStore(DAG
.getEntryNode(), dl
, BVN
->getOperand(i
), Idx
,
7942 PtrInfo
.getWithOffset(Offset
), MVT::i32
));
7944 SDValue StoreValue
= BVN
->getOperand(i
);
7946 StoreValue
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, StoreValue
);
7948 Stores
.push_back(DAG
.getStore(DAG
.getEntryNode(), dl
, StoreValue
, Idx
,
7949 PtrInfo
.getWithOffset(Offset
)));
7954 if (!Stores
.empty())
7955 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
7957 StoreChain
= DAG
.getEntryNode();
7959 // Now load from v4i32 into the QPX register; this will extend it to
7960 // v4i64 but not yet convert it to a floating point. Nevertheless, this
7961 // is typed as v4f64 because the QPX register integer states are not
7962 // explicitly represented.
7964 SDValue Ops
[] = {StoreChain
,
7965 DAG
.getConstant(Intrinsic::ppc_qpx_qvlfiwz
, dl
, MVT::i32
),
7967 SDVTList VTs
= DAG
.getVTList({MVT::v4f64
, /*chain*/ MVT::Other
});
7969 SDValue LoadedVect
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
,
7970 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
7971 LoadedVect
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
7972 DAG
.getConstant(Intrinsic::ppc_qpx_qvfcfidu
, dl
, MVT::i32
),
7975 SDValue FPZeros
= DAG
.getConstantFP(0.0, dl
, MVT::v4f64
);
7977 return DAG
.getSetCC(dl
, MVT::v4i1
, LoadedVect
, FPZeros
, ISD::SETEQ
);
7980 // All other QPX vectors are handled by generic code.
7981 if (Subtarget
.hasQPX())
7984 // Check if this is a splat of a constant value.
7985 APInt APSplatBits
, APSplatUndef
;
7986 unsigned SplatBitSize
;
7988 if (! BVN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
7989 HasAnyUndefs
, 0, !Subtarget
.isLittleEndian()) ||
7990 SplatBitSize
> 32) {
7991 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
7992 // lowered to VSX instructions under certain conditions.
7993 // Without VSX, there is no pattern more efficient than expanding the node.
7994 if (Subtarget
.hasVSX() &&
7995 haveEfficientBuildVectorPattern(BVN
, Subtarget
.hasDirectMove(),
7996 Subtarget
.hasP8Vector()))
8001 unsigned SplatBits
= APSplatBits
.getZExtValue();
8002 unsigned SplatUndef
= APSplatUndef
.getZExtValue();
8003 unsigned SplatSize
= SplatBitSize
/ 8;
8005 // First, handle single instruction cases.
8008 if (SplatBits
== 0) {
8009 // Canonicalize all zero vectors to be v4i32.
8010 if (Op
.getValueType() != MVT::v4i32
|| HasAnyUndefs
) {
8011 SDValue Z
= DAG
.getConstant(0, dl
, MVT::v4i32
);
8012 Op
= DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Z
);
8017 // We have XXSPLTIB for constant splats one byte wide
8018 if (Subtarget
.hasP9Vector() && SplatSize
== 1) {
8019 // This is a splat of 1-byte elements with some elements potentially undef.
8020 // Rather than trying to match undef in the SDAG patterns, ensure that all
8021 // elements are the same constant.
8022 if (HasAnyUndefs
|| ISD::isBuildVectorAllOnes(BVN
)) {
8023 SmallVector
<SDValue
, 16> Ops(16, DAG
.getConstant(SplatBits
,
8025 SDValue NewBV
= DAG
.getBuildVector(MVT::v16i8
, dl
, Ops
);
8026 if (Op
.getValueType() != MVT::v16i8
)
8027 return DAG
.getBitcast(Op
.getValueType(), NewBV
);
8031 // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8032 // detect that constant splats like v8i16: 0xABAB are really just splats
8033 // of a 1-byte constant. In this case, we need to convert the node to a
8034 // splat of v16i8 and a bitcast.
8035 if (Op
.getValueType() != MVT::v16i8
)
8036 return DAG
.getBitcast(Op
.getValueType(),
8037 DAG
.getConstant(SplatBits
, dl
, MVT::v16i8
));
8042 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8043 int32_t SextVal
= (int32_t(SplatBits
<< (32-SplatBitSize
)) >>
8045 if (SextVal
>= -16 && SextVal
<= 15)
8046 return BuildSplatI(SextVal
, SplatSize
, Op
.getValueType(), DAG
, dl
);
8048 // Two instruction sequences.
8050 // If this value is in the range [-32,30] and is even, use:
8051 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8052 // If this value is in the range [17,31] and is odd, use:
8053 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8054 // If this value is in the range [-31,-17] and is odd, use:
8055 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8056 // Note the last two are three-instruction sequences.
8057 if (SextVal
>= -32 && SextVal
<= 31) {
8058 // To avoid having these optimizations undone by constant folding,
8059 // we convert to a pseudo that will be expanded later into one of
8061 SDValue Elt
= DAG
.getConstant(SextVal
, dl
, MVT::i32
);
8062 EVT VT
= (SplatSize
== 1 ? MVT::v16i8
:
8063 (SplatSize
== 2 ? MVT::v8i16
: MVT::v4i32
));
8064 SDValue EltSize
= DAG
.getConstant(SplatSize
, dl
, MVT::i32
);
8065 SDValue RetVal
= DAG
.getNode(PPCISD::VADD_SPLAT
, dl
, VT
, Elt
, EltSize
);
8066 if (VT
== Op
.getValueType())
8069 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), RetVal
);
8072 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8073 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8075 if (SplatSize
== 4 && SplatBits
== (0x7FFFFFFF&~SplatUndef
)) {
8076 // Make -1 and vspltisw -1:
8077 SDValue OnesV
= BuildSplatI(-1, 4, MVT::v4i32
, DAG
, dl
);
8079 // Make the VSLW intrinsic, computing 0x8000_0000.
8080 SDValue Res
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, OnesV
,
8083 // xor by OnesV to invert it.
8084 Res
= DAG
.getNode(ISD::XOR
, dl
, MVT::v4i32
, Res
, OnesV
);
8085 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8088 // Check to see if this is a wide variety of vsplti*, binop self cases.
8089 static const signed char SplatCsts
[] = {
8090 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8091 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8094 for (unsigned idx
= 0; idx
< array_lengthof(SplatCsts
); ++idx
) {
8095 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8096 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8097 int i
= SplatCsts
[idx
];
8099 // Figure out what shift amount will be used by altivec if shifted by i in
8101 unsigned TypeShiftAmt
= i
& (SplatBitSize
-1);
8103 // vsplti + shl self.
8104 if (SextVal
== (int)((unsigned)i
<< TypeShiftAmt
)) {
8105 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8106 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8107 Intrinsic::ppc_altivec_vslb
, Intrinsic::ppc_altivec_vslh
, 0,
8108 Intrinsic::ppc_altivec_vslw
8110 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8111 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8114 // vsplti + srl self.
8115 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
8116 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8117 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8118 Intrinsic::ppc_altivec_vsrb
, Intrinsic::ppc_altivec_vsrh
, 0,
8119 Intrinsic::ppc_altivec_vsrw
8121 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8122 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8125 // vsplti + sra self.
8126 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
8127 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8128 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8129 Intrinsic::ppc_altivec_vsrab
, Intrinsic::ppc_altivec_vsrah
, 0,
8130 Intrinsic::ppc_altivec_vsraw
8132 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8133 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8136 // vsplti + rol self.
8137 if (SextVal
== (int)(((unsigned)i
<< TypeShiftAmt
) |
8138 ((unsigned)i
>> (SplatBitSize
-TypeShiftAmt
)))) {
8139 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8140 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8141 Intrinsic::ppc_altivec_vrlb
, Intrinsic::ppc_altivec_vrlh
, 0,
8142 Intrinsic::ppc_altivec_vrlw
8144 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8145 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8148 // t = vsplti c, result = vsldoi t, t, 1
8149 if (SextVal
== (int)(((unsigned)i
<< 8) | (i
< 0 ? 0xFF : 0))) {
8150 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8151 unsigned Amt
= Subtarget
.isLittleEndian() ? 15 : 1;
8152 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8154 // t = vsplti c, result = vsldoi t, t, 2
8155 if (SextVal
== (int)(((unsigned)i
<< 16) | (i
< 0 ? 0xFFFF : 0))) {
8156 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8157 unsigned Amt
= Subtarget
.isLittleEndian() ? 14 : 2;
8158 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8160 // t = vsplti c, result = vsldoi t, t, 3
8161 if (SextVal
== (int)(((unsigned)i
<< 24) | (i
< 0 ? 0xFFFFFF : 0))) {
8162 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8163 unsigned Amt
= Subtarget
.isLittleEndian() ? 13 : 3;
8164 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8171 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8172 /// the specified operations to build the shuffle.
8173 static SDValue
GeneratePerfectShuffle(unsigned PFEntry
, SDValue LHS
,
8174 SDValue RHS
, SelectionDAG
&DAG
,
8176 unsigned OpNum
= (PFEntry
>> 26) & 0x0F;
8177 unsigned LHSID
= (PFEntry
>> 13) & ((1 << 13)-1);
8178 unsigned RHSID
= (PFEntry
>> 0) & ((1 << 13)-1);
8181 OP_COPY
= 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8193 if (OpNum
== OP_COPY
) {
8194 if (LHSID
== (1*9+2)*9+3) return LHS
;
8195 assert(LHSID
== ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8199 SDValue OpLHS
, OpRHS
;
8200 OpLHS
= GeneratePerfectShuffle(PerfectShuffleTable
[LHSID
], LHS
, RHS
, DAG
, dl
);
8201 OpRHS
= GeneratePerfectShuffle(PerfectShuffleTable
[RHSID
], LHS
, RHS
, DAG
, dl
);
8205 default: llvm_unreachable("Unknown i32 permute!");
8207 ShufIdxs
[ 0] = 0; ShufIdxs
[ 1] = 1; ShufIdxs
[ 2] = 2; ShufIdxs
[ 3] = 3;
8208 ShufIdxs
[ 4] = 16; ShufIdxs
[ 5] = 17; ShufIdxs
[ 6] = 18; ShufIdxs
[ 7] = 19;
8209 ShufIdxs
[ 8] = 4; ShufIdxs
[ 9] = 5; ShufIdxs
[10] = 6; ShufIdxs
[11] = 7;
8210 ShufIdxs
[12] = 20; ShufIdxs
[13] = 21; ShufIdxs
[14] = 22; ShufIdxs
[15] = 23;
8213 ShufIdxs
[ 0] = 8; ShufIdxs
[ 1] = 9; ShufIdxs
[ 2] = 10; ShufIdxs
[ 3] = 11;
8214 ShufIdxs
[ 4] = 24; ShufIdxs
[ 5] = 25; ShufIdxs
[ 6] = 26; ShufIdxs
[ 7] = 27;
8215 ShufIdxs
[ 8] = 12; ShufIdxs
[ 9] = 13; ShufIdxs
[10] = 14; ShufIdxs
[11] = 15;
8216 ShufIdxs
[12] = 28; ShufIdxs
[13] = 29; ShufIdxs
[14] = 30; ShufIdxs
[15] = 31;
8219 for (unsigned i
= 0; i
!= 16; ++i
)
8220 ShufIdxs
[i
] = (i
&3)+0;
8223 for (unsigned i
= 0; i
!= 16; ++i
)
8224 ShufIdxs
[i
] = (i
&3)+4;
8227 for (unsigned i
= 0; i
!= 16; ++i
)
8228 ShufIdxs
[i
] = (i
&3)+8;
8231 for (unsigned i
= 0; i
!= 16; ++i
)
8232 ShufIdxs
[i
] = (i
&3)+12;
8235 return BuildVSLDOI(OpLHS
, OpRHS
, 4, OpLHS
.getValueType(), DAG
, dl
);
8237 return BuildVSLDOI(OpLHS
, OpRHS
, 8, OpLHS
.getValueType(), DAG
, dl
);
8239 return BuildVSLDOI(OpLHS
, OpRHS
, 12, OpLHS
.getValueType(), DAG
, dl
);
8241 EVT VT
= OpLHS
.getValueType();
8242 OpLHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpLHS
);
8243 OpRHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpRHS
);
8244 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, OpLHS
, OpRHS
, ShufIdxs
);
8245 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
8248 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8249 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8251 SDValue
PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode
*N
,
8252 SelectionDAG
&DAG
) const {
8253 const unsigned BytesInVector
= 16;
8254 bool IsLE
= Subtarget
.isLittleEndian();
8256 SDValue V1
= N
->getOperand(0);
8257 SDValue V2
= N
->getOperand(1);
8258 unsigned ShiftElts
= 0, InsertAtByte
= 0;
8261 // Shifts required to get the byte we want at element 7.
8262 unsigned LittleEndianShifts
[] = {8, 7, 6, 5, 4, 3, 2, 1,
8263 0, 15, 14, 13, 12, 11, 10, 9};
8264 unsigned BigEndianShifts
[] = {9, 10, 11, 12, 13, 14, 15, 0,
8265 1, 2, 3, 4, 5, 6, 7, 8};
8267 ArrayRef
<int> Mask
= N
->getMask();
8268 int OriginalOrder
[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8270 // For each mask element, find out if we're just inserting something
8271 // from V2 into V1 or vice versa.
8272 // Possible permutations inserting an element from V2 into V1:
8273 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8274 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8276 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8277 // Inserting from V1 into V2 will be similar, except mask range will be
8280 bool FoundCandidate
= false;
8281 // If both vector operands for the shuffle are the same vector, the mask
8282 // will contain only elements from the first one and the second one will be
8284 unsigned VINSERTBSrcElem
= IsLE
? 8 : 7;
8285 // Go through the mask of half-words to find an element that's being moved
8286 // from one vector to the other.
8287 for (unsigned i
= 0; i
< BytesInVector
; ++i
) {
8288 unsigned CurrentElement
= Mask
[i
];
8289 // If 2nd operand is undefined, we should only look for element 7 in the
8291 if (V2
.isUndef() && CurrentElement
!= VINSERTBSrcElem
)
8294 bool OtherElementsInOrder
= true;
8295 // Examine the other elements in the Mask to see if they're in original
8297 for (unsigned j
= 0; j
< BytesInVector
; ++j
) {
8300 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8301 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
8302 // in which we always assume we're always picking from the 1st operand.
8304 (!V2
.isUndef() && CurrentElement
< BytesInVector
) ? BytesInVector
: 0;
8305 if (Mask
[j
] != OriginalOrder
[j
] + MaskOffset
) {
8306 OtherElementsInOrder
= false;
8310 // If other elements are in original order, we record the number of shifts
8311 // we need to get the element we want into element 7. Also record which byte
8312 // in the vector we should insert into.
8313 if (OtherElementsInOrder
) {
8314 // If 2nd operand is undefined, we assume no shifts and no swapping.
8319 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8320 ShiftElts
= IsLE
? LittleEndianShifts
[CurrentElement
& 0xF]
8321 : BigEndianShifts
[CurrentElement
& 0xF];
8322 Swap
= CurrentElement
< BytesInVector
;
8324 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) : i
;
8325 FoundCandidate
= true;
8330 if (!FoundCandidate
)
8333 // Candidate found, construct the proper SDAG sequence with VINSERTB,
8334 // optionally with VECSHL if shift is required.
8340 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
8341 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8342 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, Shl
,
8343 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8345 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, V2
,
8346 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8349 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8350 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8352 SDValue
PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode
*N
,
8353 SelectionDAG
&DAG
) const {
8354 const unsigned NumHalfWords
= 8;
8355 const unsigned BytesInVector
= NumHalfWords
* 2;
8356 // Check that the shuffle is on half-words.
8357 if (!isNByteElemShuffleMask(N
, 2, 1))
8360 bool IsLE
= Subtarget
.isLittleEndian();
8362 SDValue V1
= N
->getOperand(0);
8363 SDValue V2
= N
->getOperand(1);
8364 unsigned ShiftElts
= 0, InsertAtByte
= 0;
8367 // Shifts required to get the half-word we want at element 3.
8368 unsigned LittleEndianShifts
[] = {4, 3, 2, 1, 0, 7, 6, 5};
8369 unsigned BigEndianShifts
[] = {5, 6, 7, 0, 1, 2, 3, 4};
8372 uint32_t OriginalOrderLow
= 0x1234567;
8373 uint32_t OriginalOrderHigh
= 0x89ABCDEF;
8374 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
8375 // 32-bit space, only need 4-bit nibbles per element.
8376 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
8377 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
8378 Mask
|= ((uint32_t)(N
->getMaskElt(i
* 2) / 2) << MaskShift
);
8381 // For each mask element, find out if we're just inserting something
8382 // from V2 into V1 or vice versa. Possible permutations inserting an element
8384 // X, 1, 2, 3, 4, 5, 6, 7
8385 // 0, X, 2, 3, 4, 5, 6, 7
8386 // 0, 1, X, 3, 4, 5, 6, 7
8387 // 0, 1, 2, X, 4, 5, 6, 7
8388 // 0, 1, 2, 3, X, 5, 6, 7
8389 // 0, 1, 2, 3, 4, X, 6, 7
8390 // 0, 1, 2, 3, 4, 5, X, 7
8391 // 0, 1, 2, 3, 4, 5, 6, X
8392 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8394 bool FoundCandidate
= false;
8395 // Go through the mask of half-words to find an element that's being moved
8396 // from one vector to the other.
8397 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
8398 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
8399 uint32_t MaskOneElt
= (Mask
>> MaskShift
) & 0xF;
8400 uint32_t MaskOtherElts
= ~(0xF << MaskShift
);
8401 uint32_t TargetOrder
= 0x0;
8403 // If both vector operands for the shuffle are the same vector, the mask
8404 // will contain only elements from the first one and the second one will be
8408 unsigned VINSERTHSrcElem
= IsLE
? 4 : 3;
8409 TargetOrder
= OriginalOrderLow
;
8411 // Skip if not the correct element or mask of other elements don't equal
8412 // to our expected order.
8413 if (MaskOneElt
== VINSERTHSrcElem
&&
8414 (Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
8415 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
8416 FoundCandidate
= true;
8419 } else { // If both operands are defined.
8420 // Target order is [8,15] if the current mask is between [0,7].
8422 (MaskOneElt
< NumHalfWords
) ? OriginalOrderHigh
: OriginalOrderLow
;
8423 // Skip if mask of other elements don't equal our expected order.
8424 if ((Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
8425 // We only need the last 3 bits for the number of shifts.
8426 ShiftElts
= IsLE
? LittleEndianShifts
[MaskOneElt
& 0x7]
8427 : BigEndianShifts
[MaskOneElt
& 0x7];
8428 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
8429 Swap
= MaskOneElt
< NumHalfWords
;
8430 FoundCandidate
= true;
8436 if (!FoundCandidate
)
8439 // Candidate found, construct the proper SDAG sequence with VINSERTH,
8440 // optionally with VECSHL if shift is required.
8445 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
8447 // Double ShiftElts because we're left shifting on v16i8 type.
8448 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
8449 DAG
.getConstant(2 * ShiftElts
, dl
, MVT::i32
));
8450 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, Shl
);
8451 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
8452 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8453 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8455 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V2
);
8456 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
8457 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8458 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8461 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
8462 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
8463 /// return the code it can be lowered into. Worst case, it can always be
8464 /// lowered into a vperm.
8465 SDValue
PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
,
8466 SelectionDAG
&DAG
) const {
8468 SDValue V1
= Op
.getOperand(0);
8469 SDValue V2
= Op
.getOperand(1);
8470 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(Op
);
8471 EVT VT
= Op
.getValueType();
8472 bool isLittleEndian
= Subtarget
.isLittleEndian();
8474 unsigned ShiftElts
, InsertAtByte
;
8476 if (Subtarget
.hasP9Vector() &&
8477 PPC::isXXINSERTWMask(SVOp
, ShiftElts
, InsertAtByte
, Swap
,
8481 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8482 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
);
8484 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv2
, Conv2
,
8485 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8486 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Shl
,
8487 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8488 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8490 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Conv2
,
8491 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8492 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8495 if (Subtarget
.hasP9Altivec()) {
8497 if ((NewISDNode
= lowerToVINSERTH(SVOp
, DAG
)))
8500 if ((NewISDNode
= lowerToVINSERTB(SVOp
, DAG
)))
8504 if (Subtarget
.hasVSX() &&
8505 PPC::isXXSLDWIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
8508 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8510 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
.isUndef() ? V1
: V2
);
8512 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv1
, Conv2
,
8513 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8514 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Shl
);
8517 if (Subtarget
.hasVSX() &&
8518 PPC::isXXPERMDIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
8521 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
8523 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V2
.isUndef() ? V1
: V2
);
8525 SDValue PermDI
= DAG
.getNode(PPCISD::XXPERMDI
, dl
, MVT::v2i64
, Conv1
, Conv2
,
8526 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8527 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, PermDI
);
8530 if (Subtarget
.hasP9Vector()) {
8531 if (PPC::isXXBRHShuffleMask(SVOp
)) {
8532 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
8533 SDValue ReveHWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v8i16
, Conv
);
8534 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveHWord
);
8535 } else if (PPC::isXXBRWShuffleMask(SVOp
)) {
8536 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8537 SDValue ReveWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v4i32
, Conv
);
8538 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveWord
);
8539 } else if (PPC::isXXBRDShuffleMask(SVOp
)) {
8540 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
8541 SDValue ReveDWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v2i64
, Conv
);
8542 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveDWord
);
8543 } else if (PPC::isXXBRQShuffleMask(SVOp
)) {
8544 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v1i128
, V1
);
8545 SDValue ReveQWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v1i128
, Conv
);
8546 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveQWord
);
8550 if (Subtarget
.hasVSX()) {
8551 if (V2
.isUndef() && PPC::isSplatShuffleMask(SVOp
, 4)) {
8552 int SplatIdx
= PPC::getVSPLTImmediate(SVOp
, 4, DAG
);
8554 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8555 SDValue Splat
= DAG
.getNode(PPCISD::XXSPLT
, dl
, MVT::v4i32
, Conv
,
8556 DAG
.getConstant(SplatIdx
, dl
, MVT::i32
));
8557 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Splat
);
8560 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8561 if (V2
.isUndef() && PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) == 8) {
8562 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, V1
);
8563 SDValue Swap
= DAG
.getNode(PPCISD::SWAP_NO_CHAIN
, dl
, MVT::v2f64
, Conv
);
8564 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Swap
);
8568 if (Subtarget
.hasQPX()) {
8569 if (VT
.getVectorNumElements() != 4)
8572 if (V2
.isUndef()) V2
= V1
;
8574 int AlignIdx
= PPC::isQVALIGNIShuffleMask(SVOp
);
8575 if (AlignIdx
!= -1) {
8576 return DAG
.getNode(PPCISD::QVALIGNI
, dl
, VT
, V1
, V2
,
8577 DAG
.getConstant(AlignIdx
, dl
, MVT::i32
));
8578 } else if (SVOp
->isSplat()) {
8579 int SplatIdx
= SVOp
->getSplatIndex();
8580 if (SplatIdx
>= 4) {
8585 return DAG
.getNode(PPCISD::QVESPLATI
, dl
, VT
, V1
,
8586 DAG
.getConstant(SplatIdx
, dl
, MVT::i32
));
8589 // Lower this into a qvgpci/qvfperm pair.
8591 // Compute the qvgpci literal
8593 for (unsigned i
= 0; i
< 4; ++i
) {
8594 int m
= SVOp
->getMaskElt(i
);
8595 unsigned mm
= m
>= 0 ? (unsigned) m
: i
;
8596 idx
|= mm
<< (3-i
)*3;
8599 SDValue V3
= DAG
.getNode(PPCISD::QVGPCI
, dl
, MVT::v4f64
,
8600 DAG
.getConstant(idx
, dl
, MVT::i32
));
8601 return DAG
.getNode(PPCISD::QVFPERM
, dl
, VT
, V1
, V2
, V3
);
8604 // Cases that are handled by instructions that take permute immediates
8605 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8606 // selected by the instruction selector.
8608 if (PPC::isSplatShuffleMask(SVOp
, 1) ||
8609 PPC::isSplatShuffleMask(SVOp
, 2) ||
8610 PPC::isSplatShuffleMask(SVOp
, 4) ||
8611 PPC::isVPKUWUMShuffleMask(SVOp
, 1, DAG
) ||
8612 PPC::isVPKUHUMShuffleMask(SVOp
, 1, DAG
) ||
8613 PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) != -1 ||
8614 PPC::isVMRGLShuffleMask(SVOp
, 1, 1, DAG
) ||
8615 PPC::isVMRGLShuffleMask(SVOp
, 2, 1, DAG
) ||
8616 PPC::isVMRGLShuffleMask(SVOp
, 4, 1, DAG
) ||
8617 PPC::isVMRGHShuffleMask(SVOp
, 1, 1, DAG
) ||
8618 PPC::isVMRGHShuffleMask(SVOp
, 2, 1, DAG
) ||
8619 PPC::isVMRGHShuffleMask(SVOp
, 4, 1, DAG
) ||
8620 (Subtarget
.hasP8Altivec() && (
8621 PPC::isVPKUDUMShuffleMask(SVOp
, 1, DAG
) ||
8622 PPC::isVMRGEOShuffleMask(SVOp
, true, 1, DAG
) ||
8623 PPC::isVMRGEOShuffleMask(SVOp
, false, 1, DAG
)))) {
8628 // Altivec has a variety of "shuffle immediates" that take two vector inputs
8629 // and produce a fixed permutation. If any of these match, do not lower to
8631 unsigned int ShuffleKind
= isLittleEndian
? 2 : 0;
8632 if (PPC::isVPKUWUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
8633 PPC::isVPKUHUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
8634 PPC::isVSLDOIShuffleMask(SVOp
, ShuffleKind
, DAG
) != -1 ||
8635 PPC::isVMRGLShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
8636 PPC::isVMRGLShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
8637 PPC::isVMRGLShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
8638 PPC::isVMRGHShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
8639 PPC::isVMRGHShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
8640 PPC::isVMRGHShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
8641 (Subtarget
.hasP8Altivec() && (
8642 PPC::isVPKUDUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
8643 PPC::isVMRGEOShuffleMask(SVOp
, true, ShuffleKind
, DAG
) ||
8644 PPC::isVMRGEOShuffleMask(SVOp
, false, ShuffleKind
, DAG
))))
8647 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
8648 // perfect shuffle table to emit an optimal matching sequence.
8649 ArrayRef
<int> PermMask
= SVOp
->getMask();
8651 unsigned PFIndexes
[4];
8652 bool isFourElementShuffle
= true;
8653 for (unsigned i
= 0; i
!= 4 && isFourElementShuffle
; ++i
) { // Element number
8654 unsigned EltNo
= 8; // Start out undef.
8655 for (unsigned j
= 0; j
!= 4; ++j
) { // Intra-element byte.
8656 if (PermMask
[i
*4+j
] < 0)
8657 continue; // Undef, ignore it.
8659 unsigned ByteSource
= PermMask
[i
*4+j
];
8660 if ((ByteSource
& 3) != j
) {
8661 isFourElementShuffle
= false;
8666 EltNo
= ByteSource
/4;
8667 } else if (EltNo
!= ByteSource
/4) {
8668 isFourElementShuffle
= false;
8672 PFIndexes
[i
] = EltNo
;
8675 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8676 // perfect shuffle vector to determine if it is cost effective to do this as
8677 // discrete instructions, or whether we should use a vperm.
8678 // For now, we skip this for little endian until such time as we have a
8679 // little-endian perfect shuffle table.
8680 if (isFourElementShuffle
&& !isLittleEndian
) {
8681 // Compute the index in the perfect shuffle table.
8682 unsigned PFTableIndex
=
8683 PFIndexes
[0]*9*9*9+PFIndexes
[1]*9*9+PFIndexes
[2]*9+PFIndexes
[3];
8685 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
8686 unsigned Cost
= (PFEntry
>> 30);
8688 // Determining when to avoid vperm is tricky. Many things affect the cost
8689 // of vperm, particularly how many times the perm mask needs to be computed.
8690 // For example, if the perm mask can be hoisted out of a loop or is already
8691 // used (perhaps because there are multiple permutes with the same shuffle
8692 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
8693 // the loop requires an extra register.
8695 // As a compromise, we only emit discrete instructions if the shuffle can be
8696 // generated in 3 or fewer operations. When we have loop information
8697 // available, if this block is within a loop, we should avoid using vperm
8698 // for 3-operation perms and use a constant pool load instead.
8700 return GeneratePerfectShuffle(PFEntry
, V1
, V2
, DAG
, dl
);
8703 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
8704 // vector that will get spilled to the constant pool.
8705 if (V2
.isUndef()) V2
= V1
;
8707 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
8708 // that it is in input element units, not in bytes. Convert now.
8710 // For little endian, the order of the input vectors is reversed, and
8711 // the permutation mask is complemented with respect to 31. This is
8712 // necessary to produce proper semantics with the big-endian-biased vperm
8714 EVT EltVT
= V1
.getValueType().getVectorElementType();
8715 unsigned BytesPerElement
= EltVT
.getSizeInBits()/8;
8717 SmallVector
<SDValue
, 16> ResultMask
;
8718 for (unsigned i
= 0, e
= VT
.getVectorNumElements(); i
!= e
; ++i
) {
8719 unsigned SrcElt
= PermMask
[i
] < 0 ? 0 : PermMask
[i
];
8721 for (unsigned j
= 0; j
!= BytesPerElement
; ++j
)
8723 ResultMask
.push_back(DAG
.getConstant(31 - (SrcElt
*BytesPerElement
+ j
),
8726 ResultMask
.push_back(DAG
.getConstant(SrcElt
*BytesPerElement
+ j
, dl
,
8730 SDValue VPermMask
= DAG
.getBuildVector(MVT::v16i8
, dl
, ResultMask
);
8732 return DAG
.getNode(PPCISD::VPERM
, dl
, V1
.getValueType(),
8735 return DAG
.getNode(PPCISD::VPERM
, dl
, V1
.getValueType(),
8739 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
8740 /// vector comparison. If it is, return true and fill in Opc/isDot with
8741 /// information about the intrinsic.
8742 static bool getVectorCompareInfo(SDValue Intrin
, int &CompareOpc
,
8743 bool &isDot
, const PPCSubtarget
&Subtarget
) {
8744 unsigned IntrinsicID
=
8745 cast
<ConstantSDNode
>(Intrin
.getOperand(0))->getZExtValue();
8748 switch (IntrinsicID
) {
8751 // Comparison predicates.
8752 case Intrinsic::ppc_altivec_vcmpbfp_p
:
8756 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
8760 case Intrinsic::ppc_altivec_vcmpequb_p
:
8764 case Intrinsic::ppc_altivec_vcmpequh_p
:
8768 case Intrinsic::ppc_altivec_vcmpequw_p
:
8772 case Intrinsic::ppc_altivec_vcmpequd_p
:
8773 if (Subtarget
.hasP8Altivec()) {
8779 case Intrinsic::ppc_altivec_vcmpneb_p
:
8780 case Intrinsic::ppc_altivec_vcmpneh_p
:
8781 case Intrinsic::ppc_altivec_vcmpnew_p
:
8782 case Intrinsic::ppc_altivec_vcmpnezb_p
:
8783 case Intrinsic::ppc_altivec_vcmpnezh_p
:
8784 case Intrinsic::ppc_altivec_vcmpnezw_p
:
8785 if (Subtarget
.hasP9Altivec()) {
8786 switch (IntrinsicID
) {
8788 llvm_unreachable("Unknown comparison intrinsic.");
8789 case Intrinsic::ppc_altivec_vcmpneb_p
:
8792 case Intrinsic::ppc_altivec_vcmpneh_p
:
8795 case Intrinsic::ppc_altivec_vcmpnew_p
:
8798 case Intrinsic::ppc_altivec_vcmpnezb_p
:
8801 case Intrinsic::ppc_altivec_vcmpnezh_p
:
8804 case Intrinsic::ppc_altivec_vcmpnezw_p
:
8812 case Intrinsic::ppc_altivec_vcmpgefp_p
:
8816 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
8820 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
8824 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
8828 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
8832 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
8833 if (Subtarget
.hasP8Altivec()) {
8839 case Intrinsic::ppc_altivec_vcmpgtub_p
:
8843 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
8847 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
8851 case Intrinsic::ppc_altivec_vcmpgtud_p
:
8852 if (Subtarget
.hasP8Altivec()) {
8859 // VSX predicate comparisons use the same infrastructure
8860 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
8861 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
8862 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
8863 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
8864 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
8865 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
8866 if (Subtarget
.hasVSX()) {
8867 switch (IntrinsicID
) {
8868 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
8871 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
8874 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
8877 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
8880 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
8883 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
8892 // Normal Comparisons.
8893 case Intrinsic::ppc_altivec_vcmpbfp
:
8896 case Intrinsic::ppc_altivec_vcmpeqfp
:
8899 case Intrinsic::ppc_altivec_vcmpequb
:
8902 case Intrinsic::ppc_altivec_vcmpequh
:
8905 case Intrinsic::ppc_altivec_vcmpequw
:
8908 case Intrinsic::ppc_altivec_vcmpequd
:
8909 if (Subtarget
.hasP8Altivec())
8914 case Intrinsic::ppc_altivec_vcmpneb
:
8915 case Intrinsic::ppc_altivec_vcmpneh
:
8916 case Intrinsic::ppc_altivec_vcmpnew
:
8917 case Intrinsic::ppc_altivec_vcmpnezb
:
8918 case Intrinsic::ppc_altivec_vcmpnezh
:
8919 case Intrinsic::ppc_altivec_vcmpnezw
:
8920 if (Subtarget
.hasP9Altivec())
8921 switch (IntrinsicID
) {
8923 llvm_unreachable("Unknown comparison intrinsic.");
8924 case Intrinsic::ppc_altivec_vcmpneb
:
8927 case Intrinsic::ppc_altivec_vcmpneh
:
8930 case Intrinsic::ppc_altivec_vcmpnew
:
8933 case Intrinsic::ppc_altivec_vcmpnezb
:
8936 case Intrinsic::ppc_altivec_vcmpnezh
:
8939 case Intrinsic::ppc_altivec_vcmpnezw
:
8946 case Intrinsic::ppc_altivec_vcmpgefp
:
8949 case Intrinsic::ppc_altivec_vcmpgtfp
:
8952 case Intrinsic::ppc_altivec_vcmpgtsb
:
8955 case Intrinsic::ppc_altivec_vcmpgtsh
:
8958 case Intrinsic::ppc_altivec_vcmpgtsw
:
8961 case Intrinsic::ppc_altivec_vcmpgtsd
:
8962 if (Subtarget
.hasP8Altivec())
8967 case Intrinsic::ppc_altivec_vcmpgtub
:
8970 case Intrinsic::ppc_altivec_vcmpgtuh
:
8973 case Intrinsic::ppc_altivec_vcmpgtuw
:
8976 case Intrinsic::ppc_altivec_vcmpgtud
:
8977 if (Subtarget
.hasP8Altivec())
8986 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
8987 /// lower, do it, otherwise return null.
8988 SDValue
PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
8989 SelectionDAG
&DAG
) const {
8990 unsigned IntrinsicID
=
8991 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
8995 if (IntrinsicID
== Intrinsic::thread_pointer
) {
8996 // Reads the thread pointer register, used for __builtin_thread_pointer.
8997 if (Subtarget
.isPPC64())
8998 return DAG
.getRegister(PPC::X13
, MVT::i64
);
8999 return DAG
.getRegister(PPC::R2
, MVT::i32
);
9002 // If this is a lowered altivec predicate compare, CompareOpc is set to the
9003 // opcode number of the comparison.
9006 if (!getVectorCompareInfo(Op
, CompareOpc
, isDot
, Subtarget
))
9007 return SDValue(); // Don't custom lower most intrinsics.
9009 // If this is a non-dot comparison, make the VCMP node and we are done.
9011 SDValue Tmp
= DAG
.getNode(PPCISD::VCMP
, dl
, Op
.getOperand(2).getValueType(),
9012 Op
.getOperand(1), Op
.getOperand(2),
9013 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
));
9014 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Tmp
);
9017 // Create the PPCISD altivec 'dot' comparison node.
9019 Op
.getOperand(2), // LHS
9020 Op
.getOperand(3), // RHS
9021 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
9023 EVT VTs
[] = { Op
.getOperand(2).getValueType(), MVT::Glue
};
9024 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
);
9026 // Now that we have the comparison, emit a copy from the CR to a GPR.
9027 // This is flagged to the above dot comparison.
9028 SDValue Flags
= DAG
.getNode(PPCISD::MFOCRF
, dl
, MVT::i32
,
9029 DAG
.getRegister(PPC::CR6
, MVT::i32
),
9030 CompNode
.getValue(1));
9032 // Unpack the result based on how the target uses it.
9033 unsigned BitNo
; // Bit # of CR6.
9034 bool InvertBit
; // Invert result?
9035 switch (cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue()) {
9036 default: // Can't happen, don't crash on invalid number though.
9037 case 0: // Return the value of the EQ bit of CR6.
9038 BitNo
= 0; InvertBit
= false;
9040 case 1: // Return the inverted value of the EQ bit of CR6.
9041 BitNo
= 0; InvertBit
= true;
9043 case 2: // Return the value of the LT bit of CR6.
9044 BitNo
= 2; InvertBit
= false;
9046 case 3: // Return the inverted value of the LT bit of CR6.
9047 BitNo
= 2; InvertBit
= true;
9051 // Shift the bit into the low position.
9052 Flags
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
, Flags
,
9053 DAG
.getConstant(8 - (3 - BitNo
), dl
, MVT::i32
));
9055 Flags
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Flags
,
9056 DAG
.getConstant(1, dl
, MVT::i32
));
9058 // If we are supposed to, toggle the bit.
9060 Flags
= DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, Flags
,
9061 DAG
.getConstant(1, dl
, MVT::i32
));
9065 SDValue
PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
9066 SelectionDAG
&DAG
) const {
9067 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9068 // the beginning of the argument list.
9069 int ArgStart
= isa
<ConstantSDNode
>(Op
.getOperand(0)) ? 0 : 1;
9071 switch (cast
<ConstantSDNode
>(Op
.getOperand(ArgStart
))->getZExtValue()) {
9072 case Intrinsic::ppc_cfence
: {
9073 assert(ArgStart
== 1 && "llvm.ppc.cfence must carry a chain argument.");
9074 assert(Subtarget
.isPPC64() && "Only 64-bit is supported for now.");
9075 return SDValue(DAG
.getMachineNode(PPC::CFENCE8
, DL
, MVT::Other
,
9076 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
,
9077 Op
.getOperand(ArgStart
+ 1)),
9087 SDValue
PPCTargetLowering::LowerREM(SDValue Op
, SelectionDAG
&DAG
) const {
9088 // Check for a DIV with the same operands as this REM.
9089 for (auto UI
: Op
.getOperand(1)->uses()) {
9090 if ((Op
.getOpcode() == ISD::SREM
&& UI
->getOpcode() == ISD::SDIV
) ||
9091 (Op
.getOpcode() == ISD::UREM
&& UI
->getOpcode() == ISD::UDIV
))
9092 if (UI
->getOperand(0) == Op
.getOperand(0) &&
9093 UI
->getOperand(1) == Op
.getOperand(1))
9099 // Lower scalar BSWAP64 to xxbrd.
9100 SDValue
PPCTargetLowering::LowerBSWAP(SDValue Op
, SelectionDAG
&DAG
) const {
9103 Op
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
, Op
.getOperand(0),
9106 Op
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v2i64
, Op
);
9108 int VectorIndex
= 0;
9109 if (Subtarget
.isLittleEndian())
9111 Op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Op
,
9112 DAG
.getTargetConstant(VectorIndex
, dl
, MVT::i32
));
9116 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9117 // compared to a value that is atomically loaded (atomic loads zero-extend).
9118 SDValue
PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op
,
9119 SelectionDAG
&DAG
) const {
9120 assert(Op
.getOpcode() == ISD::ATOMIC_CMP_SWAP
&&
9121 "Expecting an atomic compare-and-swap here.");
9123 auto *AtomicNode
= cast
<AtomicSDNode
>(Op
.getNode());
9124 EVT MemVT
= AtomicNode
->getMemoryVT();
9125 if (MemVT
.getSizeInBits() >= 32)
9128 SDValue CmpOp
= Op
.getOperand(2);
9129 // If this is already correctly zero-extended, leave it alone.
9130 auto HighBits
= APInt::getHighBitsSet(32, 32 - MemVT
.getSizeInBits());
9131 if (DAG
.MaskedValueIsZero(CmpOp
, HighBits
))
9134 // Clear the high bits of the compare operand.
9135 unsigned MaskVal
= (1 << MemVT
.getSizeInBits()) - 1;
9137 DAG
.getNode(ISD::AND
, dl
, MVT::i32
, CmpOp
,
9138 DAG
.getConstant(MaskVal
, dl
, MVT::i32
));
9140 // Replace the existing compare operand with the properly zero-extended one.
9141 SmallVector
<SDValue
, 4> Ops
;
9142 for (int i
= 0, e
= AtomicNode
->getNumOperands(); i
< e
; i
++)
9143 Ops
.push_back(AtomicNode
->getOperand(i
));
9145 MachineMemOperand
*MMO
= AtomicNode
->getMemOperand();
9146 SDVTList Tys
= DAG
.getVTList(MVT::i32
, MVT::Other
);
9148 (MemVT
== MVT::i8
) ? PPCISD::ATOMIC_CMP_SWAP_8
: PPCISD::ATOMIC_CMP_SWAP_16
;
9149 return DAG
.getMemIntrinsicNode(NodeTy
, dl
, Tys
, Ops
, MemVT
, MMO
);
9152 SDValue
PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op
,
9153 SelectionDAG
&DAG
) const {
9155 // Create a stack slot that is 16-byte aligned.
9156 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9157 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9158 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9159 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9161 // Store the input value into Value#0 of the stack slot.
9162 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Op
.getOperand(0), FIdx
,
9163 MachinePointerInfo());
9165 return DAG
.getLoad(Op
.getValueType(), dl
, Store
, FIdx
, MachinePointerInfo());
9168 SDValue
PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
9169 SelectionDAG
&DAG
) const {
9170 assert(Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&&
9171 "Should only be called for ISD::INSERT_VECTOR_ELT");
9173 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
9174 // We have legal lowering for constant indices but not for variable ones.
9178 EVT VT
= Op
.getValueType();
9180 SDValue V1
= Op
.getOperand(0);
9181 SDValue V2
= Op
.getOperand(1);
9182 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9183 if (VT
== MVT::v8i16
|| VT
== MVT::v16i8
) {
9184 SDValue Mtvsrz
= DAG
.getNode(PPCISD::MTVSRZ
, dl
, VT
, V2
);
9185 unsigned BytesInEachElement
= VT
.getVectorElementType().getSizeInBits() / 8;
9186 unsigned InsertAtElement
= C
->getZExtValue();
9187 unsigned InsertAtByte
= InsertAtElement
* BytesInEachElement
;
9188 if (Subtarget
.isLittleEndian()) {
9189 InsertAtByte
= (16 - BytesInEachElement
) - InsertAtByte
;
9191 return DAG
.getNode(PPCISD::VECINSERT
, dl
, VT
, V1
, Mtvsrz
,
9192 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
9197 SDValue
PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
9198 SelectionDAG
&DAG
) const {
9200 SDNode
*N
= Op
.getNode();
9202 assert(N
->getOperand(0).getValueType() == MVT::v4i1
&&
9203 "Unknown extract_vector_elt type");
9205 SDValue Value
= N
->getOperand(0);
9207 // The first part of this is like the store lowering except that we don't
9208 // need to track the chain.
9210 // The values are now known to be -1 (false) or 1 (true). To convert this
9211 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9212 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9213 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
9215 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9216 // understand how to form the extending load.
9217 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
9219 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
9221 // Now convert to an integer and store.
9222 Value
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
9223 DAG
.getConstant(Intrinsic::ppc_qpx_qvfctiwu
, dl
, MVT::i32
),
9226 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9227 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9228 MachinePointerInfo PtrInfo
=
9229 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
9230 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9231 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9233 SDValue StoreChain
= DAG
.getEntryNode();
9234 SDValue Ops
[] = {StoreChain
,
9235 DAG
.getConstant(Intrinsic::ppc_qpx_qvstfiw
, dl
, MVT::i32
),
9237 SDVTList VTs
= DAG
.getVTList(/*chain*/ MVT::Other
);
9239 StoreChain
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
,
9240 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
9242 // Extract the value requested.
9243 unsigned Offset
= 4*cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
9244 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
9245 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
9248 DAG
.getLoad(MVT::i32
, dl
, StoreChain
, Idx
, PtrInfo
.getWithOffset(Offset
));
9250 if (!Subtarget
.useCRBits())
9253 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, IntVal
);
9256 /// Lowering for QPX v4i1 loads
9257 SDValue
PPCTargetLowering::LowerVectorLoad(SDValue Op
,
9258 SelectionDAG
&DAG
) const {
9260 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
9261 SDValue LoadChain
= LN
->getChain();
9262 SDValue BasePtr
= LN
->getBasePtr();
9264 if (Op
.getValueType() == MVT::v4f64
||
9265 Op
.getValueType() == MVT::v4f32
) {
9266 EVT MemVT
= LN
->getMemoryVT();
9267 unsigned Alignment
= LN
->getAlignment();
9269 // If this load is properly aligned, then it is legal.
9270 if (Alignment
>= MemVT
.getStoreSize())
9273 EVT ScalarVT
= Op
.getValueType().getScalarType(),
9274 ScalarMemVT
= MemVT
.getScalarType();
9275 unsigned Stride
= ScalarMemVT
.getStoreSize();
9277 SDValue Vals
[4], LoadChains
[4];
9278 for (unsigned Idx
= 0; Idx
< 4; ++Idx
) {
9280 if (ScalarVT
!= ScalarMemVT
)
9281 Load
= DAG
.getExtLoad(LN
->getExtensionType(), dl
, ScalarVT
, LoadChain
,
9283 LN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9284 ScalarMemVT
, MinAlign(Alignment
, Idx
* Stride
),
9285 LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9287 Load
= DAG
.getLoad(ScalarVT
, dl
, LoadChain
, BasePtr
,
9288 LN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9289 MinAlign(Alignment
, Idx
* Stride
),
9290 LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9292 if (Idx
== 0 && LN
->isIndexed()) {
9293 assert(LN
->getAddressingMode() == ISD::PRE_INC
&&
9294 "Unknown addressing mode on vector load");
9295 Load
= DAG
.getIndexedLoad(Load
, dl
, BasePtr
, LN
->getOffset(),
9296 LN
->getAddressingMode());
9300 LoadChains
[Idx
] = Load
.getValue(1);
9302 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
9303 DAG
.getConstant(Stride
, dl
,
9304 BasePtr
.getValueType()));
9307 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, LoadChains
);
9308 SDValue Value
= DAG
.getBuildVector(Op
.getValueType(), dl
, Vals
);
9310 if (LN
->isIndexed()) {
9311 SDValue RetOps
[] = { Value
, Vals
[0].getValue(1), TF
};
9312 return DAG
.getMergeValues(RetOps
, dl
);
9315 SDValue RetOps
[] = { Value
, TF
};
9316 return DAG
.getMergeValues(RetOps
, dl
);
9319 assert(Op
.getValueType() == MVT::v4i1
&& "Unknown load to lower");
9320 assert(LN
->isUnindexed() && "Indexed v4i1 loads are not supported");
9322 // To lower v4i1 from a byte array, we load the byte elements of the
9323 // vector and then reuse the BUILD_VECTOR logic.
9325 SDValue VectElmts
[4], VectElmtChains
[4];
9326 for (unsigned i
= 0; i
< 4; ++i
) {
9327 SDValue Idx
= DAG
.getConstant(i
, dl
, BasePtr
.getValueType());
9328 Idx
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
, Idx
);
9330 VectElmts
[i
] = DAG
.getExtLoad(
9331 ISD::EXTLOAD
, dl
, MVT::i32
, LoadChain
, Idx
,
9332 LN
->getPointerInfo().getWithOffset(i
), MVT::i8
,
9333 /* Alignment = */ 1, LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9334 VectElmtChains
[i
] = VectElmts
[i
].getValue(1);
9337 LoadChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, VectElmtChains
);
9338 SDValue Value
= DAG
.getBuildVector(MVT::v4i1
, dl
, VectElmts
);
9340 SDValue RVals
[] = { Value
, LoadChain
};
9341 return DAG
.getMergeValues(RVals
, dl
);
9344 /// Lowering for QPX v4i1 stores
9345 SDValue
PPCTargetLowering::LowerVectorStore(SDValue Op
,
9346 SelectionDAG
&DAG
) const {
9348 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
9349 SDValue StoreChain
= SN
->getChain();
9350 SDValue BasePtr
= SN
->getBasePtr();
9351 SDValue Value
= SN
->getValue();
9353 if (Value
.getValueType() == MVT::v4f64
||
9354 Value
.getValueType() == MVT::v4f32
) {
9355 EVT MemVT
= SN
->getMemoryVT();
9356 unsigned Alignment
= SN
->getAlignment();
9358 // If this store is properly aligned, then it is legal.
9359 if (Alignment
>= MemVT
.getStoreSize())
9362 EVT ScalarVT
= Value
.getValueType().getScalarType(),
9363 ScalarMemVT
= MemVT
.getScalarType();
9364 unsigned Stride
= ScalarMemVT
.getStoreSize();
9367 for (unsigned Idx
= 0; Idx
< 4; ++Idx
) {
9368 SDValue Ex
= DAG
.getNode(
9369 ISD::EXTRACT_VECTOR_ELT
, dl
, ScalarVT
, Value
,
9370 DAG
.getConstant(Idx
, dl
, getVectorIdxTy(DAG
.getDataLayout())));
9372 if (ScalarVT
!= ScalarMemVT
)
9374 DAG
.getTruncStore(StoreChain
, dl
, Ex
, BasePtr
,
9375 SN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9376 ScalarMemVT
, MinAlign(Alignment
, Idx
* Stride
),
9377 SN
->getMemOperand()->getFlags(), SN
->getAAInfo());
9379 Store
= DAG
.getStore(StoreChain
, dl
, Ex
, BasePtr
,
9380 SN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9381 MinAlign(Alignment
, Idx
* Stride
),
9382 SN
->getMemOperand()->getFlags(), SN
->getAAInfo());
9384 if (Idx
== 0 && SN
->isIndexed()) {
9385 assert(SN
->getAddressingMode() == ISD::PRE_INC
&&
9386 "Unknown addressing mode on vector store");
9387 Store
= DAG
.getIndexedStore(Store
, dl
, BasePtr
, SN
->getOffset(),
9388 SN
->getAddressingMode());
9391 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
9392 DAG
.getConstant(Stride
, dl
,
9393 BasePtr
.getValueType()));
9394 Stores
[Idx
] = Store
;
9397 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
9399 if (SN
->isIndexed()) {
9400 SDValue RetOps
[] = { TF
, Stores
[0].getValue(1) };
9401 return DAG
.getMergeValues(RetOps
, dl
);
9407 assert(SN
->isUnindexed() && "Indexed v4i1 stores are not supported");
9408 assert(Value
.getValueType() == MVT::v4i1
&& "Unknown store to lower");
9410 // The values are now known to be -1 (false) or 1 (true). To convert this
9411 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9412 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9413 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
9415 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9416 // understand how to form the extending load.
9417 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
9419 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
9421 // Now convert to an integer and store.
9422 Value
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
9423 DAG
.getConstant(Intrinsic::ppc_qpx_qvfctiwu
, dl
, MVT::i32
),
9426 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9427 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9428 MachinePointerInfo PtrInfo
=
9429 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
9430 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9431 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9433 SDValue Ops
[] = {StoreChain
,
9434 DAG
.getConstant(Intrinsic::ppc_qpx_qvstfiw
, dl
, MVT::i32
),
9436 SDVTList VTs
= DAG
.getVTList(/*chain*/ MVT::Other
);
9438 StoreChain
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
,
9439 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
9441 // Move data into the byte array.
9442 SDValue Loads
[4], LoadChains
[4];
9443 for (unsigned i
= 0; i
< 4; ++i
) {
9444 unsigned Offset
= 4*i
;
9445 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
9446 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
9448 Loads
[i
] = DAG
.getLoad(MVT::i32
, dl
, StoreChain
, Idx
,
9449 PtrInfo
.getWithOffset(Offset
));
9450 LoadChains
[i
] = Loads
[i
].getValue(1);
9453 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, LoadChains
);
9456 for (unsigned i
= 0; i
< 4; ++i
) {
9457 SDValue Idx
= DAG
.getConstant(i
, dl
, BasePtr
.getValueType());
9458 Idx
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
, Idx
);
9460 Stores
[i
] = DAG
.getTruncStore(
9461 StoreChain
, dl
, Loads
[i
], Idx
, SN
->getPointerInfo().getWithOffset(i
),
9462 MVT::i8
, /* Alignment = */ 1, SN
->getMemOperand()->getFlags(),
9466 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
9471 SDValue
PPCTargetLowering::LowerMUL(SDValue Op
, SelectionDAG
&DAG
) const {
9473 if (Op
.getValueType() == MVT::v4i32
) {
9474 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9476 SDValue Zero
= BuildSplatI( 0, 1, MVT::v4i32
, DAG
, dl
);
9477 SDValue Neg16
= BuildSplatI(-16, 4, MVT::v4i32
, DAG
, dl
);//+16 as shift amt.
9479 SDValue RHSSwap
= // = vrlw RHS, 16
9480 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw
, RHS
, Neg16
, DAG
, dl
);
9482 // Shrinkify inputs to v8i16.
9483 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, LHS
);
9484 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHS
);
9485 RHSSwap
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHSSwap
);
9487 // Low parts multiplied together, generating 32-bit results (we ignore the
9489 SDValue LoProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh
,
9490 LHS
, RHS
, DAG
, dl
, MVT::v4i32
);
9492 SDValue HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm
,
9493 LHS
, RHSSwap
, Zero
, DAG
, dl
, MVT::v4i32
);
9494 // Shift the high parts up 16 bits.
9495 HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, HiProd
,
9497 return DAG
.getNode(ISD::ADD
, dl
, MVT::v4i32
, LoProd
, HiProd
);
9498 } else if (Op
.getValueType() == MVT::v8i16
) {
9499 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9501 SDValue Zero
= BuildSplatI(0, 1, MVT::v8i16
, DAG
, dl
);
9503 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm
,
9504 LHS
, RHS
, Zero
, DAG
, dl
);
9505 } else if (Op
.getValueType() == MVT::v16i8
) {
9506 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9507 bool isLittleEndian
= Subtarget
.isLittleEndian();
9509 // Multiply the even 8-bit parts, producing 16-bit sums.
9510 SDValue EvenParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub
,
9511 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
9512 EvenParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, EvenParts
);
9514 // Multiply the odd 8-bit parts, producing 16-bit sums.
9515 SDValue OddParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub
,
9516 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
9517 OddParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OddParts
);
9519 // Merge the results together. Because vmuleub and vmuloub are
9520 // instructions with a big-endian bias, we must reverse the
9521 // element numbering and reverse the meaning of "odd" and "even"
9522 // when generating little endian code.
9524 for (unsigned i
= 0; i
!= 8; ++i
) {
9525 if (isLittleEndian
) {
9527 Ops
[i
*2+1] = 2*i
+16;
9530 Ops
[i
*2+1] = 2*i
+1+16;
9534 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, OddParts
, EvenParts
, Ops
);
9536 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, EvenParts
, OddParts
, Ops
);
9538 llvm_unreachable("Unknown mul to lower!");
9542 SDValue
PPCTargetLowering::LowerABS(SDValue Op
, SelectionDAG
&DAG
) const {
9544 assert(Op
.getOpcode() == ISD::ABS
&& "Should only be called for ISD::ABS");
9546 EVT VT
= Op
.getValueType();
9547 assert(VT
.isVector() &&
9548 "Only set vector abs as custom, scalar abs shouldn't reach here!");
9549 assert((VT
== MVT::v2i64
|| VT
== MVT::v4i32
|| VT
== MVT::v8i16
||
9550 VT
== MVT::v16i8
) &&
9551 "Unexpected vector element type!");
9552 assert((VT
!= MVT::v2i64
|| Subtarget
.hasP8Altivec()) &&
9553 "Current subtarget doesn't support smax v2i64!");
9555 // For vector abs, it can be lowered to:
9562 SDValue X
= Op
.getOperand(0);
9563 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
9564 SDValue Y
= DAG
.getNode(ISD::SUB
, dl
, VT
, Zero
, X
);
9566 // SMAX patch https://reviews.llvm.org/D47332
9567 // hasn't landed yet, so use intrinsic first here.
9568 // TODO: Should use SMAX directly once SMAX patch landed
9569 Intrinsic::ID BifID
= Intrinsic::ppc_altivec_vmaxsw
;
9570 if (VT
== MVT::v2i64
)
9571 BifID
= Intrinsic::ppc_altivec_vmaxsd
;
9572 else if (VT
== MVT::v8i16
)
9573 BifID
= Intrinsic::ppc_altivec_vmaxsh
;
9574 else if (VT
== MVT::v16i8
)
9575 BifID
= Intrinsic::ppc_altivec_vmaxsb
;
9577 return BuildIntrinsicOp(BifID
, X
, Y
, DAG
, dl
, VT
);
9580 /// LowerOperation - Provide custom lowering hooks for some operations.
9582 SDValue
PPCTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
9583 switch (Op
.getOpcode()) {
9584 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
9585 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
9586 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
9587 case ISD::GlobalAddress
: return LowerGlobalAddress(Op
, DAG
);
9588 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
9589 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
9590 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
9591 case ISD::INIT_TRAMPOLINE
: return LowerINIT_TRAMPOLINE(Op
, DAG
);
9592 case ISD::ADJUST_TRAMPOLINE
: return LowerADJUST_TRAMPOLINE(Op
, DAG
);
9594 // Variable argument lowering.
9595 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
9596 case ISD::VAARG
: return LowerVAARG(Op
, DAG
);
9597 case ISD::VACOPY
: return LowerVACOPY(Op
, DAG
);
9599 case ISD::STACKRESTORE
: return LowerSTACKRESTORE(Op
, DAG
);
9600 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
9601 case ISD::GET_DYNAMIC_AREA_OFFSET
:
9602 return LowerGET_DYNAMIC_AREA_OFFSET(Op
, DAG
);
9604 // Exception handling lowering.
9605 case ISD::EH_DWARF_CFA
: return LowerEH_DWARF_CFA(Op
, DAG
);
9606 case ISD::EH_SJLJ_SETJMP
: return lowerEH_SJLJ_SETJMP(Op
, DAG
);
9607 case ISD::EH_SJLJ_LONGJMP
: return lowerEH_SJLJ_LONGJMP(Op
, DAG
);
9609 case ISD::LOAD
: return LowerLOAD(Op
, DAG
);
9610 case ISD::STORE
: return LowerSTORE(Op
, DAG
);
9611 case ISD::TRUNCATE
: return LowerTRUNCATE(Op
, DAG
);
9612 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
9613 case ISD::FP_TO_UINT
:
9614 case ISD::FP_TO_SINT
: return LowerFP_TO_INT(Op
, DAG
, SDLoc(Op
));
9615 case ISD::UINT_TO_FP
:
9616 case ISD::SINT_TO_FP
: return LowerINT_TO_FP(Op
, DAG
);
9617 case ISD::FLT_ROUNDS_
: return LowerFLT_ROUNDS_(Op
, DAG
);
9619 // Lower 64-bit shifts.
9620 case ISD::SHL_PARTS
: return LowerSHL_PARTS(Op
, DAG
);
9621 case ISD::SRL_PARTS
: return LowerSRL_PARTS(Op
, DAG
);
9622 case ISD::SRA_PARTS
: return LowerSRA_PARTS(Op
, DAG
);
9624 // Vector-related lowering.
9625 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
9626 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
9627 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
9628 case ISD::SCALAR_TO_VECTOR
: return LowerSCALAR_TO_VECTOR(Op
, DAG
);
9629 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
9630 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
9631 case ISD::MUL
: return LowerMUL(Op
, DAG
);
9632 case ISD::ABS
: return LowerABS(Op
, DAG
);
9634 // For counter-based loop handling.
9635 case ISD::INTRINSIC_W_CHAIN
: return SDValue();
9637 case ISD::BITCAST
: return LowerBITCAST(Op
, DAG
);
9639 // Frame & Return address.
9640 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
9641 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
9643 case ISD::INTRINSIC_VOID
:
9644 return LowerINTRINSIC_VOID(Op
, DAG
);
9647 return LowerREM(Op
, DAG
);
9649 return LowerBSWAP(Op
, DAG
);
9650 case ISD::ATOMIC_CMP_SWAP
:
9651 return LowerATOMIC_CMP_SWAP(Op
, DAG
);
9655 void PPCTargetLowering::ReplaceNodeResults(SDNode
*N
,
9656 SmallVectorImpl
<SDValue
>&Results
,
9657 SelectionDAG
&DAG
) const {
9659 switch (N
->getOpcode()) {
9661 llvm_unreachable("Do not know how to custom type legalize this operation!");
9662 case ISD::READCYCLECOUNTER
: {
9663 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
9664 SDValue RTB
= DAG
.getNode(PPCISD::READ_TIME_BASE
, dl
, VTs
, N
->getOperand(0));
9666 Results
.push_back(RTB
);
9667 Results
.push_back(RTB
.getValue(1));
9668 Results
.push_back(RTB
.getValue(2));
9671 case ISD::INTRINSIC_W_CHAIN
: {
9672 if (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue() !=
9673 Intrinsic::ppc_is_decremented_ctr_nonzero
)
9676 assert(N
->getValueType(0) == MVT::i1
&&
9677 "Unexpected result type for CTR decrement intrinsic");
9678 EVT SVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(),
9679 N
->getValueType(0));
9680 SDVTList VTs
= DAG
.getVTList(SVT
, MVT::Other
);
9681 SDValue NewInt
= DAG
.getNode(N
->getOpcode(), dl
, VTs
, N
->getOperand(0),
9684 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewInt
));
9685 Results
.push_back(NewInt
.getValue(1));
9689 if (!Subtarget
.isSVR4ABI() || Subtarget
.isPPC64())
9692 EVT VT
= N
->getValueType(0);
9694 if (VT
== MVT::i64
) {
9695 SDValue NewNode
= LowerVAARG(SDValue(N
, 1), DAG
);
9697 Results
.push_back(NewNode
);
9698 Results
.push_back(NewNode
.getValue(1));
9702 case ISD::FP_TO_SINT
:
9703 case ISD::FP_TO_UINT
:
9704 // LowerFP_TO_INT() can only handle f32 and f64.
9705 if (N
->getOperand(0).getValueType() == MVT::ppcf128
)
9707 Results
.push_back(LowerFP_TO_INT(SDValue(N
, 0), DAG
, dl
));
9709 case ISD::TRUNCATE
: {
9710 EVT TrgVT
= N
->getValueType(0);
9711 if (TrgVT
.isVector() &&
9712 isOperationCustom(N
->getOpcode(), TrgVT
) &&
9713 N
->getOperand(0).getValueType().getSizeInBits() <= 128)
9714 Results
.push_back(LowerTRUNCATEVector(SDValue(N
, 0), DAG
));
9718 // Don't handle bitcast here.
9723 //===----------------------------------------------------------------------===//
9724 // Other Lowering Code
9725 //===----------------------------------------------------------------------===//
9727 static Instruction
* callIntrinsic(IRBuilder
<> &Builder
, Intrinsic::ID Id
) {
9728 Module
*M
= Builder
.GetInsertBlock()->getParent()->getParent();
9729 Function
*Func
= Intrinsic::getDeclaration(M
, Id
);
9730 return Builder
.CreateCall(Func
, {});
9733 // The mappings for emitLeading/TrailingFence is taken from
9734 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
9735 Instruction
*PPCTargetLowering::emitLeadingFence(IRBuilder
<> &Builder
,
9737 AtomicOrdering Ord
) const {
9738 if (Ord
== AtomicOrdering::SequentiallyConsistent
)
9739 return callIntrinsic(Builder
, Intrinsic::ppc_sync
);
9740 if (isReleaseOrStronger(Ord
))
9741 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
9745 Instruction
*PPCTargetLowering::emitTrailingFence(IRBuilder
<> &Builder
,
9747 AtomicOrdering Ord
) const {
9748 if (Inst
->hasAtomicLoad() && isAcquireOrStronger(Ord
)) {
9749 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
9750 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
9751 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
9752 if (isa
<LoadInst
>(Inst
) && Subtarget
.isPPC64())
9753 return Builder
.CreateCall(
9754 Intrinsic::getDeclaration(
9755 Builder
.GetInsertBlock()->getParent()->getParent(),
9756 Intrinsic::ppc_cfence
, {Inst
->getType()}),
9758 // FIXME: Can use isync for rmw operation.
9759 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
9765 PPCTargetLowering::EmitAtomicBinary(MachineInstr
&MI
, MachineBasicBlock
*BB
,
9766 unsigned AtomicSize
,
9769 unsigned CmpPred
) const {
9770 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9771 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
9773 auto LoadMnemonic
= PPC::LDARX
;
9774 auto StoreMnemonic
= PPC::STDCX
;
9775 switch (AtomicSize
) {
9777 llvm_unreachable("Unexpected size of atomic entity");
9779 LoadMnemonic
= PPC::LBARX
;
9780 StoreMnemonic
= PPC::STBCX
;
9781 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
9784 LoadMnemonic
= PPC::LHARX
;
9785 StoreMnemonic
= PPC::STHCX
;
9786 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
9789 LoadMnemonic
= PPC::LWARX
;
9790 StoreMnemonic
= PPC::STWCX
;
9793 LoadMnemonic
= PPC::LDARX
;
9794 StoreMnemonic
= PPC::STDCX
;
9798 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
9799 MachineFunction
*F
= BB
->getParent();
9800 MachineFunction::iterator It
= ++BB
->getIterator();
9802 unsigned dest
= MI
.getOperand(0).getReg();
9803 unsigned ptrA
= MI
.getOperand(1).getReg();
9804 unsigned ptrB
= MI
.getOperand(2).getReg();
9805 unsigned incr
= MI
.getOperand(3).getReg();
9806 DebugLoc dl
= MI
.getDebugLoc();
9808 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
9809 MachineBasicBlock
*loop2MBB
=
9810 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
9811 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
9812 F
->insert(It
, loopMBB
);
9814 F
->insert(It
, loop2MBB
);
9815 F
->insert(It
, exitMBB
);
9816 exitMBB
->splice(exitMBB
->begin(), BB
,
9817 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
9818 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
9820 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
9821 unsigned TmpReg
= (!BinOpcode
) ? incr
:
9822 RegInfo
.createVirtualRegister( AtomicSize
== 8 ? &PPC::G8RCRegClass
9823 : &PPC::GPRCRegClass
);
9827 // fallthrough --> loopMBB
9828 BB
->addSuccessor(loopMBB
);
9831 // l[wd]arx dest, ptr
9832 // add r0, dest, incr
9833 // st[wd]cx. r0, ptr
9835 // fallthrough --> exitMBB
9839 // l[wd]arx dest, ptr
9840 // cmpl?[wd] incr, dest
9843 // st[wd]cx. dest, ptr
9845 // fallthrough --> exitMBB
9848 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
)
9849 .addReg(ptrA
).addReg(ptrB
);
9851 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
).addReg(incr
).addReg(dest
);
9853 // Signed comparisons of byte or halfword values must be sign-extended.
9854 if (CmpOpcode
== PPC::CMPW
&& AtomicSize
< 4) {
9855 unsigned ExtReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
9856 BuildMI(BB
, dl
, TII
->get(AtomicSize
== 1 ? PPC::EXTSB
: PPC::EXTSH
),
9857 ExtReg
).addReg(dest
);
9858 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
9859 .addReg(incr
).addReg(ExtReg
);
9861 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
9862 .addReg(incr
).addReg(dest
);
9864 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
9865 .addImm(CmpPred
).addReg(PPC::CR0
).addMBB(exitMBB
);
9866 BB
->addSuccessor(loop2MBB
);
9867 BB
->addSuccessor(exitMBB
);
9870 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
9871 .addReg(TmpReg
).addReg(ptrA
).addReg(ptrB
);
9872 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
9873 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(loopMBB
);
9874 BB
->addSuccessor(loopMBB
);
9875 BB
->addSuccessor(exitMBB
);
9883 MachineBasicBlock
*PPCTargetLowering::EmitPartwordAtomicBinary(
9884 MachineInstr
&MI
, MachineBasicBlock
*BB
,
9885 bool is8bit
, // operation
9886 unsigned BinOpcode
, unsigned CmpOpcode
, unsigned CmpPred
) const {
9887 // If we support part-word atomic mnemonics, just use them
9888 if (Subtarget
.hasPartwordAtomics())
9889 return EmitAtomicBinary(MI
, BB
, is8bit
? 1 : 2, BinOpcode
, CmpOpcode
,
9892 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
9893 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
9894 // In 64 bit mode we have to use 64 bits for addresses, even though the
9895 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
9896 // registers without caring whether they're 32 or 64, but here we're
9897 // doing actual arithmetic on the addresses.
9898 bool is64bit
= Subtarget
.isPPC64();
9899 bool isLittleEndian
= Subtarget
.isLittleEndian();
9900 unsigned ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
9902 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
9903 MachineFunction
*F
= BB
->getParent();
9904 MachineFunction::iterator It
= ++BB
->getIterator();
9906 unsigned dest
= MI
.getOperand(0).getReg();
9907 unsigned ptrA
= MI
.getOperand(1).getReg();
9908 unsigned ptrB
= MI
.getOperand(2).getReg();
9909 unsigned incr
= MI
.getOperand(3).getReg();
9910 DebugLoc dl
= MI
.getDebugLoc();
9912 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
9913 MachineBasicBlock
*loop2MBB
=
9914 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
9915 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
9916 F
->insert(It
, loopMBB
);
9918 F
->insert(It
, loop2MBB
);
9919 F
->insert(It
, exitMBB
);
9920 exitMBB
->splice(exitMBB
->begin(), BB
,
9921 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
9922 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
9924 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
9925 const TargetRegisterClass
*RC
=
9926 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
9927 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
9929 unsigned PtrReg
= RegInfo
.createVirtualRegister(RC
);
9930 unsigned Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
9932 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
9933 unsigned Incr2Reg
= RegInfo
.createVirtualRegister(GPRC
);
9934 unsigned MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
9935 unsigned Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
9936 unsigned Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
9937 unsigned Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
9938 unsigned Tmp3Reg
= RegInfo
.createVirtualRegister(GPRC
);
9939 unsigned Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
9940 unsigned TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
9943 (!BinOpcode
) ? Incr2Reg
: RegInfo
.createVirtualRegister(GPRC
);
9947 // fallthrough --> loopMBB
9948 BB
->addSuccessor(loopMBB
);
9950 // The 4-byte load must be aligned, while a char or short may be
9951 // anywhere in the word. Hence all this nasty bookkeeping code.
9952 // add ptr1, ptrA, ptrB [copy if ptrA==0]
9953 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
9954 // xori shift, shift1, 24 [16]
9955 // rlwinm ptr, ptr1, 0, 0, 29
9956 // slw incr2, incr, shift
9957 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
9958 // slw mask, mask2, shift
9960 // lwarx tmpDest, ptr
9961 // add tmp, tmpDest, incr2
9962 // andc tmp2, tmpDest, mask
9963 // and tmp3, tmp, mask
9964 // or tmp4, tmp3, tmp2
9967 // fallthrough --> exitMBB
9968 // srw dest, tmpDest, shift
9969 if (ptrA
!= ZeroReg
) {
9970 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
9971 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
9977 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
9979 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
9980 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
9983 .addImm(is8bit
? 28 : 27);
9984 if (!isLittleEndian
)
9985 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
9987 .addImm(is8bit
? 24 : 16);
9989 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
9994 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
9999 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), Incr2Reg
).addReg(incr
).addReg(ShiftReg
);
10001 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
10003 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
10004 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
10008 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
10013 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
10017 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
)
10019 .addReg(TmpDestReg
);
10020 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
10021 .addReg(TmpDestReg
)
10023 BuildMI(BB
, dl
, TII
->get(PPC::AND
), Tmp3Reg
).addReg(TmpReg
).addReg(MaskReg
);
10025 // For unsigned comparisons, we can directly compare the shifted values.
10026 // For signed comparisons we shift and sign extend.
10027 unsigned SReg
= RegInfo
.createVirtualRegister(GPRC
);
10028 BuildMI(BB
, dl
, TII
->get(PPC::AND
), SReg
)
10029 .addReg(TmpDestReg
)
10031 unsigned ValueReg
= SReg
;
10032 unsigned CmpReg
= Incr2Reg
;
10033 if (CmpOpcode
== PPC::CMPW
) {
10034 ValueReg
= RegInfo
.createVirtualRegister(GPRC
);
10035 BuildMI(BB
, dl
, TII
->get(PPC::SRW
), ValueReg
)
10038 unsigned ValueSReg
= RegInfo
.createVirtualRegister(GPRC
);
10039 BuildMI(BB
, dl
, TII
->get(is8bit
? PPC::EXTSB
: PPC::EXTSH
), ValueSReg
)
10041 ValueReg
= ValueSReg
;
10044 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10047 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10051 BB
->addSuccessor(loop2MBB
);
10052 BB
->addSuccessor(exitMBB
);
10055 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
).addReg(Tmp3Reg
).addReg(Tmp2Reg
);
10056 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
10060 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10061 .addImm(PPC::PRED_NE
)
10064 BB
->addSuccessor(loopMBB
);
10065 BB
->addSuccessor(exitMBB
);
10070 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), dest
)
10071 .addReg(TmpDestReg
)
10076 llvm::MachineBasicBlock
*
10077 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr
&MI
,
10078 MachineBasicBlock
*MBB
) const {
10079 DebugLoc DL
= MI
.getDebugLoc();
10080 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10081 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
10083 MachineFunction
*MF
= MBB
->getParent();
10084 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
10086 const BasicBlock
*BB
= MBB
->getBasicBlock();
10087 MachineFunction::iterator I
= ++MBB
->getIterator();
10089 unsigned DstReg
= MI
.getOperand(0).getReg();
10090 const TargetRegisterClass
*RC
= MRI
.getRegClass(DstReg
);
10091 assert(TRI
->isTypeLegalForClass(*RC
, MVT::i32
) && "Invalid destination!");
10092 unsigned mainDstReg
= MRI
.createVirtualRegister(RC
);
10093 unsigned restoreDstReg
= MRI
.createVirtualRegister(RC
);
10095 MVT PVT
= getPointerTy(MF
->getDataLayout());
10096 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
10097 "Invalid Pointer Size!");
10098 // For v = setjmp(buf), we generate
10101 // SjLjSetup mainMBB
10107 // buf[LabelOffset] = LR
10111 // v = phi(main, restore)
10114 MachineBasicBlock
*thisMBB
= MBB
;
10115 MachineBasicBlock
*mainMBB
= MF
->CreateMachineBasicBlock(BB
);
10116 MachineBasicBlock
*sinkMBB
= MF
->CreateMachineBasicBlock(BB
);
10117 MF
->insert(I
, mainMBB
);
10118 MF
->insert(I
, sinkMBB
);
10120 MachineInstrBuilder MIB
;
10122 // Transfer the remainder of BB and its successor edges to sinkMBB.
10123 sinkMBB
->splice(sinkMBB
->begin(), MBB
,
10124 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
10125 sinkMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
10127 // Note that the structure of the jmp_buf used here is not compatible
10128 // with that used by libc, and is not designed to be. Specifically, it
10129 // stores only those 'reserved' registers that LLVM does not otherwise
10130 // understand how to spill. Also, by convention, by the time this
10131 // intrinsic is called, Clang has already stored the frame address in the
10132 // first slot of the buffer and stack address in the third. Following the
10133 // X86 target code, we'll store the jump address in the second slot. We also
10134 // need to save the TOC pointer (R2) to handle jumps between shared
10135 // libraries, and that will be stored in the fourth slot. The thread
10136 // identifier (R13) is not affected.
10139 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
10140 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
10141 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
10143 // Prepare IP either in reg.
10144 const TargetRegisterClass
*PtrRC
= getRegClassFor(PVT
);
10145 unsigned LabelReg
= MRI
.createVirtualRegister(PtrRC
);
10146 unsigned BufReg
= MI
.getOperand(1).getReg();
10148 if (Subtarget
.isPPC64() && Subtarget
.isSVR4ABI()) {
10149 setUsesTOCBasePtr(*MBB
->getParent());
10150 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::STD
))
10157 // Naked functions never have a base pointer, and so we use r1. For all
10158 // other functions, this decision must be delayed until during PEI.
10160 if (MF
->getFunction().hasFnAttribute(Attribute::Naked
))
10161 BaseReg
= Subtarget
.isPPC64() ? PPC::X1
: PPC::R1
;
10163 BaseReg
= Subtarget
.isPPC64() ? PPC::BP8
: PPC::BP
;
10165 MIB
= BuildMI(*thisMBB
, MI
, DL
,
10166 TII
->get(Subtarget
.isPPC64() ? PPC::STD
: PPC::STW
))
10173 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::BCLalways
)).addMBB(mainMBB
);
10174 MIB
.addRegMask(TRI
->getNoPreservedMask());
10176 BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::LI
), restoreDstReg
).addImm(1);
10178 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::EH_SjLj_Setup
))
10180 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::B
)).addMBB(sinkMBB
);
10182 thisMBB
->addSuccessor(mainMBB
, BranchProbability::getZero());
10183 thisMBB
->addSuccessor(sinkMBB
, BranchProbability::getOne());
10188 BuildMI(mainMBB
, DL
,
10189 TII
->get(Subtarget
.isPPC64() ? PPC::MFLR8
: PPC::MFLR
), LabelReg
);
10192 if (Subtarget
.isPPC64()) {
10193 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STD
))
10195 .addImm(LabelOffset
)
10198 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STW
))
10200 .addImm(LabelOffset
)
10203 MIB
.cloneMemRefs(MI
);
10205 BuildMI(mainMBB
, DL
, TII
->get(PPC::LI
), mainDstReg
).addImm(0);
10206 mainMBB
->addSuccessor(sinkMBB
);
10209 BuildMI(*sinkMBB
, sinkMBB
->begin(), DL
,
10210 TII
->get(PPC::PHI
), DstReg
)
10211 .addReg(mainDstReg
).addMBB(mainMBB
)
10212 .addReg(restoreDstReg
).addMBB(thisMBB
);
10214 MI
.eraseFromParent();
10218 MachineBasicBlock
*
10219 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr
&MI
,
10220 MachineBasicBlock
*MBB
) const {
10221 DebugLoc DL
= MI
.getDebugLoc();
10222 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10224 MachineFunction
*MF
= MBB
->getParent();
10225 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
10227 MVT PVT
= getPointerTy(MF
->getDataLayout());
10228 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
10229 "Invalid Pointer Size!");
10231 const TargetRegisterClass
*RC
=
10232 (PVT
== MVT::i64
) ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
10233 unsigned Tmp
= MRI
.createVirtualRegister(RC
);
10234 // Since FP is only updated here but NOT referenced, it's treated as GPR.
10235 unsigned FP
= (PVT
== MVT::i64
) ? PPC::X31
: PPC::R31
;
10236 unsigned SP
= (PVT
== MVT::i64
) ? PPC::X1
: PPC::R1
;
10240 : (Subtarget
.isSVR4ABI() && isPositionIndependent() ? PPC::R29
10243 MachineInstrBuilder MIB
;
10245 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
10246 const int64_t SPOffset
= 2 * PVT
.getStoreSize();
10247 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
10248 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
10250 unsigned BufReg
= MI
.getOperand(0).getReg();
10252 // Reload FP (the jumped-to function may not have had a
10253 // frame pointer, and if so, then its r31 will be restored
10255 if (PVT
== MVT::i64
) {
10256 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), FP
)
10260 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), FP
)
10264 MIB
.cloneMemRefs(MI
);
10267 if (PVT
== MVT::i64
) {
10268 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), Tmp
)
10269 .addImm(LabelOffset
)
10272 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), Tmp
)
10273 .addImm(LabelOffset
)
10276 MIB
.cloneMemRefs(MI
);
10279 if (PVT
== MVT::i64
) {
10280 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), SP
)
10284 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), SP
)
10288 MIB
.cloneMemRefs(MI
);
10291 if (PVT
== MVT::i64
) {
10292 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), BP
)
10296 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), BP
)
10300 MIB
.cloneMemRefs(MI
);
10303 if (PVT
== MVT::i64
&& Subtarget
.isSVR4ABI()) {
10304 setUsesTOCBasePtr(*MBB
->getParent());
10305 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), PPC::X2
)
10312 BuildMI(*MBB
, MI
, DL
,
10313 TII
->get(PVT
== MVT::i64
? PPC::MTCTR8
: PPC::MTCTR
)).addReg(Tmp
);
10314 BuildMI(*MBB
, MI
, DL
, TII
->get(PVT
== MVT::i64
? PPC::BCTR8
: PPC::BCTR
));
10316 MI
.eraseFromParent();
10320 MachineBasicBlock
*
10321 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
10322 MachineBasicBlock
*BB
) const {
10323 if (MI
.getOpcode() == TargetOpcode::STACKMAP
||
10324 MI
.getOpcode() == TargetOpcode::PATCHPOINT
) {
10325 if (Subtarget
.isPPC64() && Subtarget
.isSVR4ABI() &&
10326 MI
.getOpcode() == TargetOpcode::PATCHPOINT
) {
10327 // Call lowering should have added an r2 operand to indicate a dependence
10328 // on the TOC base pointer value. It can't however, because there is no
10329 // way to mark the dependence as implicit there, and so the stackmap code
10330 // will confuse it with a regular operand. Instead, add the dependence
10332 MI
.addOperand(MachineOperand::CreateReg(PPC::X2
, false, true));
10335 return emitPatchPoint(MI
, BB
);
10338 if (MI
.getOpcode() == PPC::EH_SjLj_SetJmp32
||
10339 MI
.getOpcode() == PPC::EH_SjLj_SetJmp64
) {
10340 return emitEHSjLjSetJmp(MI
, BB
);
10341 } else if (MI
.getOpcode() == PPC::EH_SjLj_LongJmp32
||
10342 MI
.getOpcode() == PPC::EH_SjLj_LongJmp64
) {
10343 return emitEHSjLjLongJmp(MI
, BB
);
10346 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10348 // To "insert" these instructions we actually have to insert their
10349 // control-flow patterns.
10350 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10351 MachineFunction::iterator It
= ++BB
->getIterator();
10353 MachineFunction
*F
= BB
->getParent();
10355 if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10356 MI
.getOpcode() == PPC::SELECT_CC_I8
|| MI
.getOpcode() == PPC::SELECT_I4
||
10357 MI
.getOpcode() == PPC::SELECT_I8
) {
10358 SmallVector
<MachineOperand
, 2> Cond
;
10359 if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10360 MI
.getOpcode() == PPC::SELECT_CC_I8
)
10361 Cond
.push_back(MI
.getOperand(4));
10363 Cond
.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET
));
10364 Cond
.push_back(MI
.getOperand(1));
10366 DebugLoc dl
= MI
.getDebugLoc();
10367 TII
->insertSelect(*BB
, MI
, dl
, MI
.getOperand(0).getReg(), Cond
,
10368 MI
.getOperand(2).getReg(), MI
.getOperand(3).getReg());
10369 } else if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10370 MI
.getOpcode() == PPC::SELECT_CC_I8
||
10371 MI
.getOpcode() == PPC::SELECT_CC_F4
||
10372 MI
.getOpcode() == PPC::SELECT_CC_F8
||
10373 MI
.getOpcode() == PPC::SELECT_CC_F16
||
10374 MI
.getOpcode() == PPC::SELECT_CC_QFRC
||
10375 MI
.getOpcode() == PPC::SELECT_CC_QSRC
||
10376 MI
.getOpcode() == PPC::SELECT_CC_QBRC
||
10377 MI
.getOpcode() == PPC::SELECT_CC_VRRC
||
10378 MI
.getOpcode() == PPC::SELECT_CC_VSFRC
||
10379 MI
.getOpcode() == PPC::SELECT_CC_VSSRC
||
10380 MI
.getOpcode() == PPC::SELECT_CC_VSRC
||
10381 MI
.getOpcode() == PPC::SELECT_CC_SPE4
||
10382 MI
.getOpcode() == PPC::SELECT_CC_SPE
||
10383 MI
.getOpcode() == PPC::SELECT_I4
||
10384 MI
.getOpcode() == PPC::SELECT_I8
||
10385 MI
.getOpcode() == PPC::SELECT_F4
||
10386 MI
.getOpcode() == PPC::SELECT_F8
||
10387 MI
.getOpcode() == PPC::SELECT_F16
||
10388 MI
.getOpcode() == PPC::SELECT_QFRC
||
10389 MI
.getOpcode() == PPC::SELECT_QSRC
||
10390 MI
.getOpcode() == PPC::SELECT_QBRC
||
10391 MI
.getOpcode() == PPC::SELECT_SPE
||
10392 MI
.getOpcode() == PPC::SELECT_SPE4
||
10393 MI
.getOpcode() == PPC::SELECT_VRRC
||
10394 MI
.getOpcode() == PPC::SELECT_VSFRC
||
10395 MI
.getOpcode() == PPC::SELECT_VSSRC
||
10396 MI
.getOpcode() == PPC::SELECT_VSRC
) {
10397 // The incoming instruction knows the destination vreg to set, the
10398 // condition code register to branch on, the true/false values to
10399 // select between, and a branch opcode to use.
10404 // cmpTY ccX, r1, r2
10406 // fallthrough --> copy0MBB
10407 MachineBasicBlock
*thisMBB
= BB
;
10408 MachineBasicBlock
*copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10409 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10410 DebugLoc dl
= MI
.getDebugLoc();
10411 F
->insert(It
, copy0MBB
);
10412 F
->insert(It
, sinkMBB
);
10414 // Transfer the remainder of BB and its successor edges to sinkMBB.
10415 sinkMBB
->splice(sinkMBB
->begin(), BB
,
10416 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10417 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10419 // Next, add the true and fallthrough blocks as its successors.
10420 BB
->addSuccessor(copy0MBB
);
10421 BB
->addSuccessor(sinkMBB
);
10423 if (MI
.getOpcode() == PPC::SELECT_I4
|| MI
.getOpcode() == PPC::SELECT_I8
||
10424 MI
.getOpcode() == PPC::SELECT_F4
|| MI
.getOpcode() == PPC::SELECT_F8
||
10425 MI
.getOpcode() == PPC::SELECT_F16
||
10426 MI
.getOpcode() == PPC::SELECT_SPE4
||
10427 MI
.getOpcode() == PPC::SELECT_SPE
||
10428 MI
.getOpcode() == PPC::SELECT_QFRC
||
10429 MI
.getOpcode() == PPC::SELECT_QSRC
||
10430 MI
.getOpcode() == PPC::SELECT_QBRC
||
10431 MI
.getOpcode() == PPC::SELECT_VRRC
||
10432 MI
.getOpcode() == PPC::SELECT_VSFRC
||
10433 MI
.getOpcode() == PPC::SELECT_VSSRC
||
10434 MI
.getOpcode() == PPC::SELECT_VSRC
) {
10435 BuildMI(BB
, dl
, TII
->get(PPC::BC
))
10436 .addReg(MI
.getOperand(1).getReg())
10439 unsigned SelectPred
= MI
.getOperand(4).getImm();
10440 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10441 .addImm(SelectPred
)
10442 .addReg(MI
.getOperand(1).getReg())
10447 // %FalseValue = ...
10448 // # fallthrough to sinkMBB
10451 // Update machine-CFG edges
10452 BB
->addSuccessor(sinkMBB
);
10455 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10458 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::PHI
), MI
.getOperand(0).getReg())
10459 .addReg(MI
.getOperand(3).getReg())
10461 .addReg(MI
.getOperand(2).getReg())
10463 } else if (MI
.getOpcode() == PPC::ReadTB
) {
10464 // To read the 64-bit time-base register on a 32-bit target, we read the
10465 // two halves. Should the counter have wrapped while it was being read, we
10466 // need to try again.
10469 // mfspr Rx,TBU # load from TBU
10470 // mfspr Ry,TB # load from TB
10471 // mfspr Rz,TBU # load from TBU
10472 // cmpw crX,Rx,Rz # check if 'old'='new'
10473 // bne readLoop # branch if they're not equal
10476 MachineBasicBlock
*readMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10477 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10478 DebugLoc dl
= MI
.getDebugLoc();
10479 F
->insert(It
, readMBB
);
10480 F
->insert(It
, sinkMBB
);
10482 // Transfer the remainder of BB and its successor edges to sinkMBB.
10483 sinkMBB
->splice(sinkMBB
->begin(), BB
,
10484 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10485 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10487 BB
->addSuccessor(readMBB
);
10490 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10491 unsigned ReadAgainReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
10492 unsigned LoReg
= MI
.getOperand(0).getReg();
10493 unsigned HiReg
= MI
.getOperand(1).getReg();
10495 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), HiReg
).addImm(269);
10496 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), LoReg
).addImm(268);
10497 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), ReadAgainReg
).addImm(269);
10499 unsigned CmpReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
10501 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), CmpReg
)
10503 .addReg(ReadAgainReg
);
10504 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10505 .addImm(PPC::PRED_NE
)
10509 BB
->addSuccessor(readMBB
);
10510 BB
->addSuccessor(sinkMBB
);
10511 } else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8
)
10512 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::ADD4
);
10513 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16
)
10514 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::ADD4
);
10515 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32
)
10516 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::ADD4
);
10517 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64
)
10518 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::ADD8
);
10520 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I8
)
10521 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::AND
);
10522 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I16
)
10523 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::AND
);
10524 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I32
)
10525 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::AND
);
10526 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I64
)
10527 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::AND8
);
10529 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I8
)
10530 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::OR
);
10531 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I16
)
10532 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::OR
);
10533 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I32
)
10534 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::OR
);
10535 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I64
)
10536 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::OR8
);
10538 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8
)
10539 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::XOR
);
10540 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16
)
10541 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::XOR
);
10542 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32
)
10543 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::XOR
);
10544 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64
)
10545 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::XOR8
);
10547 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8
)
10548 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::NAND
);
10549 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16
)
10550 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::NAND
);
10551 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32
)
10552 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::NAND
);
10553 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64
)
10554 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::NAND8
);
10556 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8
)
10557 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::SUBF
);
10558 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16
)
10559 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::SUBF
);
10560 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32
)
10561 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::SUBF
);
10562 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64
)
10563 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::SUBF8
);
10565 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8
)
10566 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_GE
);
10567 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16
)
10568 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_GE
);
10569 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32
)
10570 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_GE
);
10571 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64
)
10572 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_GE
);
10574 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8
)
10575 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_LE
);
10576 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16
)
10577 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_LE
);
10578 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32
)
10579 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_LE
);
10580 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64
)
10581 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_LE
);
10583 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8
)
10584 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_GE
);
10585 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16
)
10586 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_GE
);
10587 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32
)
10588 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_GE
);
10589 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64
)
10590 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_GE
);
10592 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8
)
10593 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_LE
);
10594 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16
)
10595 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_LE
);
10596 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32
)
10597 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_LE
);
10598 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64
)
10599 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_LE
);
10601 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I8
)
10602 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0);
10603 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I16
)
10604 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0);
10605 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I32
)
10606 BB
= EmitAtomicBinary(MI
, BB
, 4, 0);
10607 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I64
)
10608 BB
= EmitAtomicBinary(MI
, BB
, 8, 0);
10609 else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32
||
10610 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
||
10611 (Subtarget
.hasPartwordAtomics() &&
10612 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
) ||
10613 (Subtarget
.hasPartwordAtomics() &&
10614 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
)) {
10615 bool is64bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
;
10617 auto LoadMnemonic
= PPC::LDARX
;
10618 auto StoreMnemonic
= PPC::STDCX
;
10619 switch (MI
.getOpcode()) {
10621 llvm_unreachable("Compare and swap of unknown size");
10622 case PPC::ATOMIC_CMP_SWAP_I8
:
10623 LoadMnemonic
= PPC::LBARX
;
10624 StoreMnemonic
= PPC::STBCX
;
10625 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
10627 case PPC::ATOMIC_CMP_SWAP_I16
:
10628 LoadMnemonic
= PPC::LHARX
;
10629 StoreMnemonic
= PPC::STHCX
;
10630 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
10632 case PPC::ATOMIC_CMP_SWAP_I32
:
10633 LoadMnemonic
= PPC::LWARX
;
10634 StoreMnemonic
= PPC::STWCX
;
10636 case PPC::ATOMIC_CMP_SWAP_I64
:
10637 LoadMnemonic
= PPC::LDARX
;
10638 StoreMnemonic
= PPC::STDCX
;
10641 unsigned dest
= MI
.getOperand(0).getReg();
10642 unsigned ptrA
= MI
.getOperand(1).getReg();
10643 unsigned ptrB
= MI
.getOperand(2).getReg();
10644 unsigned oldval
= MI
.getOperand(3).getReg();
10645 unsigned newval
= MI
.getOperand(4).getReg();
10646 DebugLoc dl
= MI
.getDebugLoc();
10648 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10649 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10650 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10651 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10652 F
->insert(It
, loop1MBB
);
10653 F
->insert(It
, loop2MBB
);
10654 F
->insert(It
, midMBB
);
10655 F
->insert(It
, exitMBB
);
10656 exitMBB
->splice(exitMBB
->begin(), BB
,
10657 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10658 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10662 // fallthrough --> loopMBB
10663 BB
->addSuccessor(loop1MBB
);
10666 // l[bhwd]arx dest, ptr
10667 // cmp[wd] dest, oldval
10670 // st[bhwd]cx. newval, ptr
10674 // st[bhwd]cx. dest, ptr
10677 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
).addReg(ptrA
).addReg(ptrB
);
10678 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::CMPD
: PPC::CMPW
), PPC::CR0
)
10681 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10682 .addImm(PPC::PRED_NE
)
10685 BB
->addSuccessor(loop2MBB
);
10686 BB
->addSuccessor(midMBB
);
10689 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
10693 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10694 .addImm(PPC::PRED_NE
)
10697 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
10698 BB
->addSuccessor(loop1MBB
);
10699 BB
->addSuccessor(exitMBB
);
10702 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
10706 BB
->addSuccessor(exitMBB
);
10711 } else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
||
10712 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
) {
10713 // We must use 64-bit registers for addresses when targeting 64-bit,
10714 // since we're actually doing arithmetic on them. Other registers
10716 bool is64bit
= Subtarget
.isPPC64();
10717 bool isLittleEndian
= Subtarget
.isLittleEndian();
10718 bool is8bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
;
10720 unsigned dest
= MI
.getOperand(0).getReg();
10721 unsigned ptrA
= MI
.getOperand(1).getReg();
10722 unsigned ptrB
= MI
.getOperand(2).getReg();
10723 unsigned oldval
= MI
.getOperand(3).getReg();
10724 unsigned newval
= MI
.getOperand(4).getReg();
10725 DebugLoc dl
= MI
.getDebugLoc();
10727 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10728 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10729 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10730 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10731 F
->insert(It
, loop1MBB
);
10732 F
->insert(It
, loop2MBB
);
10733 F
->insert(It
, midMBB
);
10734 F
->insert(It
, exitMBB
);
10735 exitMBB
->splice(exitMBB
->begin(), BB
,
10736 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10737 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10739 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10740 const TargetRegisterClass
*RC
=
10741 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
10742 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
10744 unsigned PtrReg
= RegInfo
.createVirtualRegister(RC
);
10745 unsigned Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
10746 unsigned ShiftReg
=
10747 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
10748 unsigned NewVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10749 unsigned NewVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
10750 unsigned OldVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10751 unsigned OldVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
10752 unsigned MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
10753 unsigned Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10754 unsigned Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
10755 unsigned Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10756 unsigned Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
10757 unsigned TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
10759 unsigned TmpReg
= RegInfo
.createVirtualRegister(GPRC
);
10760 unsigned ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
10763 // fallthrough --> loopMBB
10764 BB
->addSuccessor(loop1MBB
);
10766 // The 4-byte load must be aligned, while a char or short may be
10767 // anywhere in the word. Hence all this nasty bookkeeping code.
10768 // add ptr1, ptrA, ptrB [copy if ptrA==0]
10769 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10770 // xori shift, shift1, 24 [16]
10771 // rlwinm ptr, ptr1, 0, 0, 29
10772 // slw newval2, newval, shift
10773 // slw oldval2, oldval,shift
10774 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10775 // slw mask, mask2, shift
10776 // and newval3, newval2, mask
10777 // and oldval3, oldval2, mask
10779 // lwarx tmpDest, ptr
10780 // and tmp, tmpDest, mask
10781 // cmpw tmp, oldval3
10784 // andc tmp2, tmpDest, mask
10785 // or tmp4, tmp2, newval3
10786 // stwcx. tmp4, ptr
10790 // stwcx. tmpDest, ptr
10792 // srw dest, tmpDest, shift
10793 if (ptrA
!= ZeroReg
) {
10794 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
10795 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
10802 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10804 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
10805 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
10808 .addImm(is8bit
? 28 : 27);
10809 if (!isLittleEndian
)
10810 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
10812 .addImm(is8bit
? 24 : 16);
10814 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
10819 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
10824 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), NewVal2Reg
)
10827 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), OldVal2Reg
)
10831 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
10833 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
10834 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
10838 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
10841 BuildMI(BB
, dl
, TII
->get(PPC::AND
), NewVal3Reg
)
10842 .addReg(NewVal2Reg
)
10844 BuildMI(BB
, dl
, TII
->get(PPC::AND
), OldVal3Reg
)
10845 .addReg(OldVal2Reg
)
10849 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
10852 BuildMI(BB
, dl
, TII
->get(PPC::AND
), TmpReg
)
10853 .addReg(TmpDestReg
)
10855 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), PPC::CR0
)
10857 .addReg(OldVal3Reg
);
10858 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10859 .addImm(PPC::PRED_NE
)
10862 BB
->addSuccessor(loop2MBB
);
10863 BB
->addSuccessor(midMBB
);
10866 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
10867 .addReg(TmpDestReg
)
10869 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
)
10871 .addReg(NewVal3Reg
);
10872 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
10876 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10877 .addImm(PPC::PRED_NE
)
10880 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
10881 BB
->addSuccessor(loop1MBB
);
10882 BB
->addSuccessor(exitMBB
);
10885 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
10886 .addReg(TmpDestReg
)
10889 BB
->addSuccessor(exitMBB
);
10894 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), dest
)
10897 } else if (MI
.getOpcode() == PPC::FADDrtz
) {
10898 // This pseudo performs an FADD with rounding mode temporarily forced
10899 // to round-to-zero. We emit this via custom inserter since the FPSCR
10900 // is not modeled at the SelectionDAG level.
10901 unsigned Dest
= MI
.getOperand(0).getReg();
10902 unsigned Src1
= MI
.getOperand(1).getReg();
10903 unsigned Src2
= MI
.getOperand(2).getReg();
10904 DebugLoc dl
= MI
.getDebugLoc();
10906 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10907 unsigned MFFSReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
10909 // Save FPSCR value.
10910 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), MFFSReg
);
10912 // Set rounding mode to round-to-zero.
10913 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB1
)).addImm(31);
10914 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB0
)).addImm(30);
10916 // Perform addition.
10917 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::FADD
), Dest
).addReg(Src1
).addReg(Src2
);
10919 // Restore FPSCR value.
10920 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSFb
)).addImm(1).addReg(MFFSReg
);
10921 } else if (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT
||
10922 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT
||
10923 MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
||
10924 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT8
) {
10925 unsigned Opcode
= (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
||
10926 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT8
)
10929 bool isEQ
= (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT
||
10930 MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
);
10932 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10933 unsigned Dest
= RegInfo
.createVirtualRegister(
10934 Opcode
== PPC::ANDIo
? &PPC::GPRCRegClass
: &PPC::G8RCRegClass
);
10936 DebugLoc dl
= MI
.getDebugLoc();
10937 BuildMI(*BB
, MI
, dl
, TII
->get(Opcode
), Dest
)
10938 .addReg(MI
.getOperand(1).getReg())
10940 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::COPY
),
10941 MI
.getOperand(0).getReg())
10942 .addReg(isEQ
? PPC::CR0EQ
: PPC::CR0GT
);
10943 } else if (MI
.getOpcode() == PPC::TCHECK_RET
) {
10944 DebugLoc Dl
= MI
.getDebugLoc();
10945 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10946 unsigned CRReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
10947 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::TCHECK
), CRReg
);
10950 llvm_unreachable("Unexpected instr type to insert");
10953 MI
.eraseFromParent(); // The pseudo instruction is gone now.
10957 //===----------------------------------------------------------------------===//
10958 // Target Optimization Hooks
10959 //===----------------------------------------------------------------------===//
10961 static int getEstimateRefinementSteps(EVT VT
, const PPCSubtarget
&Subtarget
) {
10962 // For the estimates, convergence is quadratic, so we essentially double the
10963 // number of digits correct after every iteration. For both FRE and FRSQRTE,
10964 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
10965 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
10966 int RefinementSteps
= Subtarget
.hasRecipPrec() ? 1 : 3;
10967 if (VT
.getScalarType() == MVT::f64
)
10969 return RefinementSteps
;
10972 SDValue
PPCTargetLowering::getSqrtEstimate(SDValue Operand
, SelectionDAG
&DAG
,
10973 int Enabled
, int &RefinementSteps
,
10974 bool &UseOneConstNR
,
10975 bool Reciprocal
) const {
10976 EVT VT
= Operand
.getValueType();
10977 if ((VT
== MVT::f32
&& Subtarget
.hasFRSQRTES()) ||
10978 (VT
== MVT::f64
&& Subtarget
.hasFRSQRTE()) ||
10979 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
10980 (VT
== MVT::v2f64
&& Subtarget
.hasVSX()) ||
10981 (VT
== MVT::v4f32
&& Subtarget
.hasQPX()) ||
10982 (VT
== MVT::v4f64
&& Subtarget
.hasQPX())) {
10983 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
10984 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
10986 UseOneConstNR
= true;
10987 return DAG
.getNode(PPCISD::FRSQRTE
, SDLoc(Operand
), VT
, Operand
);
10992 SDValue
PPCTargetLowering::getRecipEstimate(SDValue Operand
, SelectionDAG
&DAG
,
10994 int &RefinementSteps
) const {
10995 EVT VT
= Operand
.getValueType();
10996 if ((VT
== MVT::f32
&& Subtarget
.hasFRES()) ||
10997 (VT
== MVT::f64
&& Subtarget
.hasFRE()) ||
10998 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
10999 (VT
== MVT::v2f64
&& Subtarget
.hasVSX()) ||
11000 (VT
== MVT::v4f32
&& Subtarget
.hasQPX()) ||
11001 (VT
== MVT::v4f64
&& Subtarget
.hasQPX())) {
11002 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
11003 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
11004 return DAG
.getNode(PPCISD::FRE
, SDLoc(Operand
), VT
, Operand
);
11009 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11010 // Note: This functionality is used only when unsafe-fp-math is enabled, and
11011 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11012 // enabled for division), this functionality is redundant with the default
11013 // combiner logic (once the division -> reciprocal/multiply transformation
11014 // has taken place). As a result, this matters more for older cores than for
11017 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11018 // reciprocal if there are two or more FDIVs (for embedded cores with only
11019 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11020 switch (Subtarget
.getDarwinDirective()) {
11025 case PPC::DIR_E500
:
11026 case PPC::DIR_E500mc
:
11027 case PPC::DIR_E5500
:
11032 // isConsecutiveLSLoc needs to work even if all adds have not yet been
11033 // collapsed, and so we need to look through chains of them.
11034 static void getBaseWithConstantOffset(SDValue Loc
, SDValue
&Base
,
11035 int64_t& Offset
, SelectionDAG
&DAG
) {
11036 if (DAG
.isBaseWithConstantOffset(Loc
)) {
11037 Base
= Loc
.getOperand(0);
11038 Offset
+= cast
<ConstantSDNode
>(Loc
.getOperand(1))->getSExtValue();
11040 // The base might itself be a base plus an offset, and if so, accumulate
11042 getBaseWithConstantOffset(Loc
.getOperand(0), Base
, Offset
, DAG
);
11046 static bool isConsecutiveLSLoc(SDValue Loc
, EVT VT
, LSBaseSDNode
*Base
,
11047 unsigned Bytes
, int Dist
,
11048 SelectionDAG
&DAG
) {
11049 if (VT
.getSizeInBits() / 8 != Bytes
)
11052 SDValue BaseLoc
= Base
->getBasePtr();
11053 if (Loc
.getOpcode() == ISD::FrameIndex
) {
11054 if (BaseLoc
.getOpcode() != ISD::FrameIndex
)
11056 const MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
11057 int FI
= cast
<FrameIndexSDNode
>(Loc
)->getIndex();
11058 int BFI
= cast
<FrameIndexSDNode
>(BaseLoc
)->getIndex();
11059 int FS
= MFI
.getObjectSize(FI
);
11060 int BFS
= MFI
.getObjectSize(BFI
);
11061 if (FS
!= BFS
|| FS
!= (int)Bytes
) return false;
11062 return MFI
.getObjectOffset(FI
) == (MFI
.getObjectOffset(BFI
) + Dist
*Bytes
);
11065 SDValue Base1
= Loc
, Base2
= BaseLoc
;
11066 int64_t Offset1
= 0, Offset2
= 0;
11067 getBaseWithConstantOffset(Loc
, Base1
, Offset1
, DAG
);
11068 getBaseWithConstantOffset(BaseLoc
, Base2
, Offset2
, DAG
);
11069 if (Base1
== Base2
&& Offset1
== (Offset2
+ Dist
* Bytes
))
11072 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
11073 const GlobalValue
*GV1
= nullptr;
11074 const GlobalValue
*GV2
= nullptr;
11077 bool isGA1
= TLI
.isGAPlusOffset(Loc
.getNode(), GV1
, Offset1
);
11078 bool isGA2
= TLI
.isGAPlusOffset(BaseLoc
.getNode(), GV2
, Offset2
);
11079 if (isGA1
&& isGA2
&& GV1
== GV2
)
11080 return Offset1
== (Offset2
+ Dist
*Bytes
);
11084 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11085 // not enforce equality of the chain operands.
11086 static bool isConsecutiveLS(SDNode
*N
, LSBaseSDNode
*Base
,
11087 unsigned Bytes
, int Dist
,
11088 SelectionDAG
&DAG
) {
11089 if (LSBaseSDNode
*LS
= dyn_cast
<LSBaseSDNode
>(N
)) {
11090 EVT VT
= LS
->getMemoryVT();
11091 SDValue Loc
= LS
->getBasePtr();
11092 return isConsecutiveLSLoc(Loc
, VT
, Base
, Bytes
, Dist
, DAG
);
11095 if (N
->getOpcode() == ISD::INTRINSIC_W_CHAIN
) {
11097 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
11098 default: return false;
11099 case Intrinsic::ppc_qpx_qvlfd
:
11100 case Intrinsic::ppc_qpx_qvlfda
:
11103 case Intrinsic::ppc_qpx_qvlfs
:
11104 case Intrinsic::ppc_qpx_qvlfsa
:
11107 case Intrinsic::ppc_qpx_qvlfcd
:
11108 case Intrinsic::ppc_qpx_qvlfcda
:
11111 case Intrinsic::ppc_qpx_qvlfcs
:
11112 case Intrinsic::ppc_qpx_qvlfcsa
:
11115 case Intrinsic::ppc_qpx_qvlfiwa
:
11116 case Intrinsic::ppc_qpx_qvlfiwz
:
11117 case Intrinsic::ppc_altivec_lvx
:
11118 case Intrinsic::ppc_altivec_lvxl
:
11119 case Intrinsic::ppc_vsx_lxvw4x
:
11120 case Intrinsic::ppc_vsx_lxvw4x_be
:
11123 case Intrinsic::ppc_vsx_lxvd2x
:
11124 case Intrinsic::ppc_vsx_lxvd2x_be
:
11127 case Intrinsic::ppc_altivec_lvebx
:
11130 case Intrinsic::ppc_altivec_lvehx
:
11133 case Intrinsic::ppc_altivec_lvewx
:
11138 return isConsecutiveLSLoc(N
->getOperand(2), VT
, Base
, Bytes
, Dist
, DAG
);
11141 if (N
->getOpcode() == ISD::INTRINSIC_VOID
) {
11143 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
11144 default: return false;
11145 case Intrinsic::ppc_qpx_qvstfd
:
11146 case Intrinsic::ppc_qpx_qvstfda
:
11149 case Intrinsic::ppc_qpx_qvstfs
:
11150 case Intrinsic::ppc_qpx_qvstfsa
:
11153 case Intrinsic::ppc_qpx_qvstfcd
:
11154 case Intrinsic::ppc_qpx_qvstfcda
:
11157 case Intrinsic::ppc_qpx_qvstfcs
:
11158 case Intrinsic::ppc_qpx_qvstfcsa
:
11161 case Intrinsic::ppc_qpx_qvstfiw
:
11162 case Intrinsic::ppc_qpx_qvstfiwa
:
11163 case Intrinsic::ppc_altivec_stvx
:
11164 case Intrinsic::ppc_altivec_stvxl
:
11165 case Intrinsic::ppc_vsx_stxvw4x
:
11168 case Intrinsic::ppc_vsx_stxvd2x
:
11171 case Intrinsic::ppc_vsx_stxvw4x_be
:
11174 case Intrinsic::ppc_vsx_stxvd2x_be
:
11177 case Intrinsic::ppc_altivec_stvebx
:
11180 case Intrinsic::ppc_altivec_stvehx
:
11183 case Intrinsic::ppc_altivec_stvewx
:
11188 return isConsecutiveLSLoc(N
->getOperand(3), VT
, Base
, Bytes
, Dist
, DAG
);
11194 // Return true is there is a nearyby consecutive load to the one provided
11195 // (regardless of alignment). We search up and down the chain, looking though
11196 // token factors and other loads (but nothing else). As a result, a true result
11197 // indicates that it is safe to create a new consecutive load adjacent to the
11199 static bool findConsecutiveLoad(LoadSDNode
*LD
, SelectionDAG
&DAG
) {
11200 SDValue Chain
= LD
->getChain();
11201 EVT VT
= LD
->getMemoryVT();
11203 SmallSet
<SDNode
*, 16> LoadRoots
;
11204 SmallVector
<SDNode
*, 8> Queue(1, Chain
.getNode());
11205 SmallSet
<SDNode
*, 16> Visited
;
11207 // First, search up the chain, branching to follow all token-factor operands.
11208 // If we find a consecutive load, then we're done, otherwise, record all
11209 // nodes just above the top-level loads and token factors.
11210 while (!Queue
.empty()) {
11211 SDNode
*ChainNext
= Queue
.pop_back_val();
11212 if (!Visited
.insert(ChainNext
).second
)
11215 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(ChainNext
)) {
11216 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
11219 if (!Visited
.count(ChainLD
->getChain().getNode()))
11220 Queue
.push_back(ChainLD
->getChain().getNode());
11221 } else if (ChainNext
->getOpcode() == ISD::TokenFactor
) {
11222 for (const SDUse
&O
: ChainNext
->ops())
11223 if (!Visited
.count(O
.getNode()))
11224 Queue
.push_back(O
.getNode());
11226 LoadRoots
.insert(ChainNext
);
11229 // Second, search down the chain, starting from the top-level nodes recorded
11230 // in the first phase. These top-level nodes are the nodes just above all
11231 // loads and token factors. Starting with their uses, recursively look though
11232 // all loads (just the chain uses) and token factors to find a consecutive
11237 for (SmallSet
<SDNode
*, 16>::iterator I
= LoadRoots
.begin(),
11238 IE
= LoadRoots
.end(); I
!= IE
; ++I
) {
11239 Queue
.push_back(*I
);
11241 while (!Queue
.empty()) {
11242 SDNode
*LoadRoot
= Queue
.pop_back_val();
11243 if (!Visited
.insert(LoadRoot
).second
)
11246 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(LoadRoot
))
11247 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
11250 for (SDNode::use_iterator UI
= LoadRoot
->use_begin(),
11251 UE
= LoadRoot
->use_end(); UI
!= UE
; ++UI
)
11252 if (((isa
<MemSDNode
>(*UI
) &&
11253 cast
<MemSDNode
>(*UI
)->getChain().getNode() == LoadRoot
) ||
11254 UI
->getOpcode() == ISD::TokenFactor
) && !Visited
.count(*UI
))
11255 Queue
.push_back(*UI
);
11262 /// This function is called when we have proved that a SETCC node can be replaced
11263 /// by subtraction (and other supporting instructions) so that the result of
11264 /// comparison is kept in a GPR instead of CR. This function is purely for
11265 /// codegen purposes and has some flags to guide the codegen process.
11266 static SDValue
generateEquivalentSub(SDNode
*N
, int Size
, bool Complement
,
11267 bool Swap
, SDLoc
&DL
, SelectionDAG
&DAG
) {
11268 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
11270 // Zero extend the operands to the largest legal integer. Originally, they
11271 // must be of a strictly smaller size.
11272 auto Op0
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(0),
11273 DAG
.getConstant(Size
, DL
, MVT::i32
));
11274 auto Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(1),
11275 DAG
.getConstant(Size
, DL
, MVT::i32
));
11277 // Swap if needed. Depends on the condition code.
11279 std::swap(Op0
, Op1
);
11281 // Subtract extended integers.
11282 auto SubNode
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, Op0
, Op1
);
11284 // Move the sign bit to the least significant position and zero out the rest.
11285 // Now the least significant bit carries the result of original comparison.
11286 auto Shifted
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, SubNode
,
11287 DAG
.getConstant(Size
- 1, DL
, MVT::i32
));
11288 auto Final
= Shifted
;
11290 // Complement the result if needed. Based on the condition code.
11292 Final
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, Shifted
,
11293 DAG
.getConstant(1, DL
, MVT::i64
));
11295 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, Final
);
11298 SDValue
PPCTargetLowering::ConvertSETCCToSubtract(SDNode
*N
,
11299 DAGCombinerInfo
&DCI
) const {
11300 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
11302 SelectionDAG
&DAG
= DCI
.DAG
;
11305 // Size of integers being compared has a critical role in the following
11306 // analysis, so we prefer to do this when all types are legal.
11307 if (!DCI
.isAfterLegalizeDAG())
11310 // If all users of SETCC extend its value to a legal integer type
11311 // then we replace SETCC with a subtraction
11312 for (SDNode::use_iterator UI
= N
->use_begin(),
11313 UE
= N
->use_end(); UI
!= UE
; ++UI
) {
11314 if (UI
->getOpcode() != ISD::ZERO_EXTEND
)
11318 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
11319 auto OpSize
= N
->getOperand(0).getValueSizeInBits();
11321 unsigned Size
= DAG
.getDataLayout().getLargestLegalIntTypeSizeInBits();
11323 if (OpSize
< Size
) {
11327 return generateEquivalentSub(N
, Size
, false, false, DL
, DAG
);
11329 return generateEquivalentSub(N
, Size
, true, true, DL
, DAG
);
11331 return generateEquivalentSub(N
, Size
, false, true, DL
, DAG
);
11333 return generateEquivalentSub(N
, Size
, true, false, DL
, DAG
);
11340 SDValue
PPCTargetLowering::DAGCombineTruncBoolExt(SDNode
*N
,
11341 DAGCombinerInfo
&DCI
) const {
11342 SelectionDAG
&DAG
= DCI
.DAG
;
11345 assert(Subtarget
.useCRBits() && "Expecting to be tracking CR bits");
11346 // If we're tracking CR bits, we need to be careful that we don't have:
11347 // trunc(binary-ops(zext(x), zext(y)))
11349 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11350 // such that we're unnecessarily moving things into GPRs when it would be
11351 // better to keep them in CR bits.
11353 // Note that trunc here can be an actual i1 trunc, or can be the effective
11354 // truncation that comes from a setcc or select_cc.
11355 if (N
->getOpcode() == ISD::TRUNCATE
&&
11356 N
->getValueType(0) != MVT::i1
)
11359 if (N
->getOperand(0).getValueType() != MVT::i32
&&
11360 N
->getOperand(0).getValueType() != MVT::i64
)
11363 if (N
->getOpcode() == ISD::SETCC
||
11364 N
->getOpcode() == ISD::SELECT_CC
) {
11365 // If we're looking at a comparison, then we need to make sure that the
11366 // high bits (all except for the first) don't matter the result.
11368 cast
<CondCodeSDNode
>(N
->getOperand(
11369 N
->getOpcode() == ISD::SETCC
? 2 : 4))->get();
11370 unsigned OpBits
= N
->getOperand(0).getValueSizeInBits();
11372 if (ISD::isSignedIntSetCC(CC
)) {
11373 if (DAG
.ComputeNumSignBits(N
->getOperand(0)) != OpBits
||
11374 DAG
.ComputeNumSignBits(N
->getOperand(1)) != OpBits
)
11376 } else if (ISD::isUnsignedIntSetCC(CC
)) {
11377 if (!DAG
.MaskedValueIsZero(N
->getOperand(0),
11378 APInt::getHighBitsSet(OpBits
, OpBits
-1)) ||
11379 !DAG
.MaskedValueIsZero(N
->getOperand(1),
11380 APInt::getHighBitsSet(OpBits
, OpBits
-1)))
11381 return (N
->getOpcode() == ISD::SETCC
? ConvertSETCCToSubtract(N
, DCI
)
11384 // This is neither a signed nor an unsigned comparison, just make sure
11385 // that the high bits are equal.
11386 KnownBits Op1Known
= DAG
.computeKnownBits(N
->getOperand(0));
11387 KnownBits Op2Known
= DAG
.computeKnownBits(N
->getOperand(1));
11389 // We don't really care about what is known about the first bit (if
11390 // anything), so clear it in all masks prior to comparing them.
11391 Op1Known
.Zero
.clearBit(0); Op1Known
.One
.clearBit(0);
11392 Op2Known
.Zero
.clearBit(0); Op2Known
.One
.clearBit(0);
11394 if (Op1Known
.Zero
!= Op2Known
.Zero
|| Op1Known
.One
!= Op2Known
.One
)
11399 // We now know that the higher-order bits are irrelevant, we just need to
11400 // make sure that all of the intermediate operations are bit operations, and
11401 // all inputs are extensions.
11402 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
11403 N
->getOperand(0).getOpcode() != ISD::OR
&&
11404 N
->getOperand(0).getOpcode() != ISD::XOR
&&
11405 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
11406 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
&&
11407 N
->getOperand(0).getOpcode() != ISD::TRUNCATE
&&
11408 N
->getOperand(0).getOpcode() != ISD::SIGN_EXTEND
&&
11409 N
->getOperand(0).getOpcode() != ISD::ZERO_EXTEND
&&
11410 N
->getOperand(0).getOpcode() != ISD::ANY_EXTEND
)
11413 if ((N
->getOpcode() == ISD::SETCC
|| N
->getOpcode() == ISD::SELECT_CC
) &&
11414 N
->getOperand(1).getOpcode() != ISD::AND
&&
11415 N
->getOperand(1).getOpcode() != ISD::OR
&&
11416 N
->getOperand(1).getOpcode() != ISD::XOR
&&
11417 N
->getOperand(1).getOpcode() != ISD::SELECT
&&
11418 N
->getOperand(1).getOpcode() != ISD::SELECT_CC
&&
11419 N
->getOperand(1).getOpcode() != ISD::TRUNCATE
&&
11420 N
->getOperand(1).getOpcode() != ISD::SIGN_EXTEND
&&
11421 N
->getOperand(1).getOpcode() != ISD::ZERO_EXTEND
&&
11422 N
->getOperand(1).getOpcode() != ISD::ANY_EXTEND
)
11425 SmallVector
<SDValue
, 4> Inputs
;
11426 SmallVector
<SDValue
, 8> BinOps
, PromOps
;
11427 SmallPtrSet
<SDNode
*, 16> Visited
;
11429 for (unsigned i
= 0; i
< 2; ++i
) {
11430 if (((N
->getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
11431 N
->getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
11432 N
->getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
11433 N
->getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
11434 isa
<ConstantSDNode
>(N
->getOperand(i
)))
11435 Inputs
.push_back(N
->getOperand(i
));
11437 BinOps
.push_back(N
->getOperand(i
));
11439 if (N
->getOpcode() == ISD::TRUNCATE
)
11443 // Visit all inputs, collect all binary operations (and, or, xor and
11444 // select) that are all fed by extensions.
11445 while (!BinOps
.empty()) {
11446 SDValue BinOp
= BinOps
.back();
11449 if (!Visited
.insert(BinOp
.getNode()).second
)
11452 PromOps
.push_back(BinOp
);
11454 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
11455 // The condition of the select is not promoted.
11456 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
11458 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
11461 if (((BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
11462 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
11463 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
11464 BinOp
.getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
11465 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
11466 Inputs
.push_back(BinOp
.getOperand(i
));
11467 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
11468 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
11469 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
11470 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
11471 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
||
11472 BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
11473 BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
11474 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
11475 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) {
11476 BinOps
.push_back(BinOp
.getOperand(i
));
11478 // We have an input that is not an extension or another binary
11479 // operation; we'll abort this transformation.
11485 // Make sure that this is a self-contained cluster of operations (which
11486 // is not quite the same thing as saying that everything has only one
11488 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
11489 if (isa
<ConstantSDNode
>(Inputs
[i
]))
11492 for (SDNode::use_iterator UI
= Inputs
[i
].getNode()->use_begin(),
11493 UE
= Inputs
[i
].getNode()->use_end();
11495 SDNode
*User
= *UI
;
11496 if (User
!= N
&& !Visited
.count(User
))
11499 // Make sure that we're not going to promote the non-output-value
11500 // operand(s) or SELECT or SELECT_CC.
11501 // FIXME: Although we could sometimes handle this, and it does occur in
11502 // practice that one of the condition inputs to the select is also one of
11503 // the outputs, we currently can't deal with this.
11504 if (User
->getOpcode() == ISD::SELECT
) {
11505 if (User
->getOperand(0) == Inputs
[i
])
11507 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
11508 if (User
->getOperand(0) == Inputs
[i
] ||
11509 User
->getOperand(1) == Inputs
[i
])
11515 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
11516 for (SDNode::use_iterator UI
= PromOps
[i
].getNode()->use_begin(),
11517 UE
= PromOps
[i
].getNode()->use_end();
11519 SDNode
*User
= *UI
;
11520 if (User
!= N
&& !Visited
.count(User
))
11523 // Make sure that we're not going to promote the non-output-value
11524 // operand(s) or SELECT or SELECT_CC.
11525 // FIXME: Although we could sometimes handle this, and it does occur in
11526 // practice that one of the condition inputs to the select is also one of
11527 // the outputs, we currently can't deal with this.
11528 if (User
->getOpcode() == ISD::SELECT
) {
11529 if (User
->getOperand(0) == PromOps
[i
])
11531 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
11532 if (User
->getOperand(0) == PromOps
[i
] ||
11533 User
->getOperand(1) == PromOps
[i
])
11539 // Replace all inputs with the extension operand.
11540 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
11541 // Constants may have users outside the cluster of to-be-promoted nodes,
11542 // and so we need to replace those as we do the promotions.
11543 if (isa
<ConstantSDNode
>(Inputs
[i
]))
11546 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], Inputs
[i
].getOperand(0));
11549 std::list
<HandleSDNode
> PromOpHandles
;
11550 for (auto &PromOp
: PromOps
)
11551 PromOpHandles
.emplace_back(PromOp
);
11553 // Replace all operations (these are all the same, but have a different
11554 // (i1) return type). DAG.getNode will validate that the types of
11555 // a binary operator match, so go through the list in reverse so that
11556 // we've likely promoted both operands first. Any intermediate truncations or
11557 // extensions disappear.
11558 while (!PromOpHandles
.empty()) {
11559 SDValue PromOp
= PromOpHandles
.back().getValue();
11560 PromOpHandles
.pop_back();
11562 if (PromOp
.getOpcode() == ISD::TRUNCATE
||
11563 PromOp
.getOpcode() == ISD::SIGN_EXTEND
||
11564 PromOp
.getOpcode() == ISD::ZERO_EXTEND
||
11565 PromOp
.getOpcode() == ISD::ANY_EXTEND
) {
11566 if (!isa
<ConstantSDNode
>(PromOp
.getOperand(0)) &&
11567 PromOp
.getOperand(0).getValueType() != MVT::i1
) {
11568 // The operand is not yet ready (see comment below).
11569 PromOpHandles
.emplace_front(PromOp
);
11573 SDValue RepValue
= PromOp
.getOperand(0);
11574 if (isa
<ConstantSDNode
>(RepValue
))
11575 RepValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, RepValue
);
11577 DAG
.ReplaceAllUsesOfValueWith(PromOp
, RepValue
);
11582 switch (PromOp
.getOpcode()) {
11583 default: C
= 0; break;
11584 case ISD::SELECT
: C
= 1; break;
11585 case ISD::SELECT_CC
: C
= 2; break;
11588 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
11589 PromOp
.getOperand(C
).getValueType() != MVT::i1
) ||
11590 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
11591 PromOp
.getOperand(C
+1).getValueType() != MVT::i1
)) {
11592 // The to-be-promoted operands of this node have not yet been
11593 // promoted (this should be rare because we're going through the
11594 // list backward, but if one of the operands has several users in
11595 // this cluster of to-be-promoted nodes, it is possible).
11596 PromOpHandles
.emplace_front(PromOp
);
11600 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
11601 PromOp
.getNode()->op_end());
11603 // If there are any constant inputs, make sure they're replaced now.
11604 for (unsigned i
= 0; i
< 2; ++i
)
11605 if (isa
<ConstantSDNode
>(Ops
[C
+i
]))
11606 Ops
[C
+i
] = DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, Ops
[C
+i
]);
11608 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
11609 DAG
.getNode(PromOp
.getOpcode(), dl
, MVT::i1
, Ops
));
11612 // Now we're left with the initial truncation itself.
11613 if (N
->getOpcode() == ISD::TRUNCATE
)
11614 return N
->getOperand(0);
11616 // Otherwise, this is a comparison. The operands to be compared have just
11617 // changed type (to i1), but everything else is the same.
11618 return SDValue(N
, 0);
11621 SDValue
PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode
*N
,
11622 DAGCombinerInfo
&DCI
) const {
11623 SelectionDAG
&DAG
= DCI
.DAG
;
11626 // If we're tracking CR bits, we need to be careful that we don't have:
11627 // zext(binary-ops(trunc(x), trunc(y)))
11629 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
11630 // such that we're unnecessarily moving things into CR bits that can more
11631 // efficiently stay in GPRs. Note that if we're not certain that the high
11632 // bits are set as required by the final extension, we still may need to do
11633 // some masking to get the proper behavior.
11635 // This same functionality is important on PPC64 when dealing with
11636 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
11637 // the return values of functions. Because it is so similar, it is handled
11640 if (N
->getValueType(0) != MVT::i32
&&
11641 N
->getValueType(0) != MVT::i64
)
11644 if (!((N
->getOperand(0).getValueType() == MVT::i1
&& Subtarget
.useCRBits()) ||
11645 (N
->getOperand(0).getValueType() == MVT::i32
&& Subtarget
.isPPC64())))
11648 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
11649 N
->getOperand(0).getOpcode() != ISD::OR
&&
11650 N
->getOperand(0).getOpcode() != ISD::XOR
&&
11651 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
11652 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
)
11655 SmallVector
<SDValue
, 4> Inputs
;
11656 SmallVector
<SDValue
, 8> BinOps(1, N
->getOperand(0)), PromOps
;
11657 SmallPtrSet
<SDNode
*, 16> Visited
;
11659 // Visit all inputs, collect all binary operations (and, or, xor and
11660 // select) that are all fed by truncations.
11661 while (!BinOps
.empty()) {
11662 SDValue BinOp
= BinOps
.back();
11665 if (!Visited
.insert(BinOp
.getNode()).second
)
11668 PromOps
.push_back(BinOp
);
11670 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
11671 // The condition of the select is not promoted.
11672 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
11674 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
11677 if (BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
11678 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
11679 Inputs
.push_back(BinOp
.getOperand(i
));
11680 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
11681 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
11682 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
11683 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
11684 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
) {
11685 BinOps
.push_back(BinOp
.getOperand(i
));
11687 // We have an input that is not a truncation or another binary
11688 // operation; we'll abort this transformation.
11694 // The operands of a select that must be truncated when the select is
11695 // promoted because the operand is actually part of the to-be-promoted set.
11696 DenseMap
<SDNode
*, EVT
> SelectTruncOp
[2];
11698 // Make sure that this is a self-contained cluster of operations (which
11699 // is not quite the same thing as saying that everything has only one
11701 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
11702 if (isa
<ConstantSDNode
>(Inputs
[i
]))
11705 for (SDNode::use_iterator UI
= Inputs
[i
].getNode()->use_begin(),
11706 UE
= Inputs
[i
].getNode()->use_end();
11708 SDNode
*User
= *UI
;
11709 if (User
!= N
&& !Visited
.count(User
))
11712 // If we're going to promote the non-output-value operand(s) or SELECT or
11713 // SELECT_CC, record them for truncation.
11714 if (User
->getOpcode() == ISD::SELECT
) {
11715 if (User
->getOperand(0) == Inputs
[i
])
11716 SelectTruncOp
[0].insert(std::make_pair(User
,
11717 User
->getOperand(0).getValueType()));
11718 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
11719 if (User
->getOperand(0) == Inputs
[i
])
11720 SelectTruncOp
[0].insert(std::make_pair(User
,
11721 User
->getOperand(0).getValueType()));
11722 if (User
->getOperand(1) == Inputs
[i
])
11723 SelectTruncOp
[1].insert(std::make_pair(User
,
11724 User
->getOperand(1).getValueType()));
11729 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
11730 for (SDNode::use_iterator UI
= PromOps
[i
].getNode()->use_begin(),
11731 UE
= PromOps
[i
].getNode()->use_end();
11733 SDNode
*User
= *UI
;
11734 if (User
!= N
&& !Visited
.count(User
))
11737 // If we're going to promote the non-output-value operand(s) or SELECT or
11738 // SELECT_CC, record them for truncation.
11739 if (User
->getOpcode() == ISD::SELECT
) {
11740 if (User
->getOperand(0) == PromOps
[i
])
11741 SelectTruncOp
[0].insert(std::make_pair(User
,
11742 User
->getOperand(0).getValueType()));
11743 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
11744 if (User
->getOperand(0) == PromOps
[i
])
11745 SelectTruncOp
[0].insert(std::make_pair(User
,
11746 User
->getOperand(0).getValueType()));
11747 if (User
->getOperand(1) == PromOps
[i
])
11748 SelectTruncOp
[1].insert(std::make_pair(User
,
11749 User
->getOperand(1).getValueType()));
11754 unsigned PromBits
= N
->getOperand(0).getValueSizeInBits();
11755 bool ReallyNeedsExt
= false;
11756 if (N
->getOpcode() != ISD::ANY_EXTEND
) {
11757 // If all of the inputs are not already sign/zero extended, then
11758 // we'll still need to do that at the end.
11759 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
11760 if (isa
<ConstantSDNode
>(Inputs
[i
]))
11764 Inputs
[i
].getOperand(0).getValueSizeInBits();
11765 assert(PromBits
< OpBits
&& "Truncation not to a smaller bit count?");
11767 if ((N
->getOpcode() == ISD::ZERO_EXTEND
&&
11768 !DAG
.MaskedValueIsZero(Inputs
[i
].getOperand(0),
11769 APInt::getHighBitsSet(OpBits
,
11770 OpBits
-PromBits
))) ||
11771 (N
->getOpcode() == ISD::SIGN_EXTEND
&&
11772 DAG
.ComputeNumSignBits(Inputs
[i
].getOperand(0)) <
11773 (OpBits
-(PromBits
-1)))) {
11774 ReallyNeedsExt
= true;
11780 // Replace all inputs, either with the truncation operand, or a
11781 // truncation or extension to the final output type.
11782 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
11783 // Constant inputs need to be replaced with the to-be-promoted nodes that
11784 // use them because they might have users outside of the cluster of
11786 if (isa
<ConstantSDNode
>(Inputs
[i
]))
11789 SDValue InSrc
= Inputs
[i
].getOperand(0);
11790 if (Inputs
[i
].getValueType() == N
->getValueType(0))
11791 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], InSrc
);
11792 else if (N
->getOpcode() == ISD::SIGN_EXTEND
)
11793 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
11794 DAG
.getSExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
11795 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
11796 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
11797 DAG
.getZExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
11799 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
11800 DAG
.getAnyExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
11803 std::list
<HandleSDNode
> PromOpHandles
;
11804 for (auto &PromOp
: PromOps
)
11805 PromOpHandles
.emplace_back(PromOp
);
11807 // Replace all operations (these are all the same, but have a different
11808 // (promoted) return type). DAG.getNode will validate that the types of
11809 // a binary operator match, so go through the list in reverse so that
11810 // we've likely promoted both operands first.
11811 while (!PromOpHandles
.empty()) {
11812 SDValue PromOp
= PromOpHandles
.back().getValue();
11813 PromOpHandles
.pop_back();
11816 switch (PromOp
.getOpcode()) {
11817 default: C
= 0; break;
11818 case ISD::SELECT
: C
= 1; break;
11819 case ISD::SELECT_CC
: C
= 2; break;
11822 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
11823 PromOp
.getOperand(C
).getValueType() != N
->getValueType(0)) ||
11824 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
11825 PromOp
.getOperand(C
+1).getValueType() != N
->getValueType(0))) {
11826 // The to-be-promoted operands of this node have not yet been
11827 // promoted (this should be rare because we're going through the
11828 // list backward, but if one of the operands has several users in
11829 // this cluster of to-be-promoted nodes, it is possible).
11830 PromOpHandles
.emplace_front(PromOp
);
11834 // For SELECT and SELECT_CC nodes, we do a similar check for any
11835 // to-be-promoted comparison inputs.
11836 if (PromOp
.getOpcode() == ISD::SELECT
||
11837 PromOp
.getOpcode() == ISD::SELECT_CC
) {
11838 if ((SelectTruncOp
[0].count(PromOp
.getNode()) &&
11839 PromOp
.getOperand(0).getValueType() != N
->getValueType(0)) ||
11840 (SelectTruncOp
[1].count(PromOp
.getNode()) &&
11841 PromOp
.getOperand(1).getValueType() != N
->getValueType(0))) {
11842 PromOpHandles
.emplace_front(PromOp
);
11847 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
11848 PromOp
.getNode()->op_end());
11850 // If this node has constant inputs, then they'll need to be promoted here.
11851 for (unsigned i
= 0; i
< 2; ++i
) {
11852 if (!isa
<ConstantSDNode
>(Ops
[C
+i
]))
11854 if (Ops
[C
+i
].getValueType() == N
->getValueType(0))
11857 if (N
->getOpcode() == ISD::SIGN_EXTEND
)
11858 Ops
[C
+i
] = DAG
.getSExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
11859 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
11860 Ops
[C
+i
] = DAG
.getZExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
11862 Ops
[C
+i
] = DAG
.getAnyExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
11865 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
11866 // truncate them again to the original value type.
11867 if (PromOp
.getOpcode() == ISD::SELECT
||
11868 PromOp
.getOpcode() == ISD::SELECT_CC
) {
11869 auto SI0
= SelectTruncOp
[0].find(PromOp
.getNode());
11870 if (SI0
!= SelectTruncOp
[0].end())
11871 Ops
[0] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI0
->second
, Ops
[0]);
11872 auto SI1
= SelectTruncOp
[1].find(PromOp
.getNode());
11873 if (SI1
!= SelectTruncOp
[1].end())
11874 Ops
[1] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI1
->second
, Ops
[1]);
11877 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
11878 DAG
.getNode(PromOp
.getOpcode(), dl
, N
->getValueType(0), Ops
));
11881 // Now we're left with the initial extension itself.
11882 if (!ReallyNeedsExt
)
11883 return N
->getOperand(0);
11885 // To zero extend, just mask off everything except for the first bit (in the
11887 if (N
->getOpcode() == ISD::ZERO_EXTEND
)
11888 return DAG
.getNode(ISD::AND
, dl
, N
->getValueType(0), N
->getOperand(0),
11889 DAG
.getConstant(APInt::getLowBitsSet(
11890 N
->getValueSizeInBits(0), PromBits
),
11891 dl
, N
->getValueType(0)));
11893 assert(N
->getOpcode() == ISD::SIGN_EXTEND
&&
11894 "Invalid extension type");
11895 EVT ShiftAmountTy
= getShiftAmountTy(N
->getValueType(0), DAG
.getDataLayout());
11897 DAG
.getConstant(N
->getValueSizeInBits(0) - PromBits
, dl
, ShiftAmountTy
);
11898 return DAG
.getNode(
11899 ISD::SRA
, dl
, N
->getValueType(0),
11900 DAG
.getNode(ISD::SHL
, dl
, N
->getValueType(0), N
->getOperand(0), ShiftCst
),
11904 SDValue
PPCTargetLowering::combineSetCC(SDNode
*N
,
11905 DAGCombinerInfo
&DCI
) const {
11906 assert(N
->getOpcode() == ISD::SETCC
&&
11907 "Should be called with a SETCC node");
11909 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
11910 if (CC
== ISD::SETNE
|| CC
== ISD::SETEQ
) {
11911 SDValue LHS
= N
->getOperand(0);
11912 SDValue RHS
= N
->getOperand(1);
11914 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
11915 if (LHS
.getOpcode() == ISD::SUB
&& isNullConstant(LHS
.getOperand(0)) &&
11917 std::swap(LHS
, RHS
);
11919 // x == 0-y --> x+y == 0
11920 // x != 0-y --> x+y != 0
11921 if (RHS
.getOpcode() == ISD::SUB
&& isNullConstant(RHS
.getOperand(0)) &&
11924 SelectionDAG
&DAG
= DCI
.DAG
;
11925 EVT VT
= N
->getValueType(0);
11926 EVT OpVT
= LHS
.getValueType();
11927 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, OpVT
, LHS
, RHS
.getOperand(1));
11928 return DAG
.getSetCC(DL
, VT
, Add
, DAG
.getConstant(0, DL
, OpVT
), CC
);
11932 return DAGCombineTruncBoolExt(N
, DCI
);
11935 // Is this an extending load from an f32 to an f64?
11936 static bool isFPExtLoad(SDValue Op
) {
11937 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
.getNode()))
11938 return LD
->getExtensionType() == ISD::EXTLOAD
&&
11939 Op
.getValueType() == MVT::f64
;
11943 /// Reduces the number of fp-to-int conversion when building a vector.
11945 /// If this vector is built out of floating to integer conversions,
11946 /// transform it to a vector built out of floating point values followed by a
11947 /// single floating to integer conversion of the vector.
11948 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
11949 /// becomes (fptosi (build_vector ($A, $B, ...)))
11950 SDValue
PPCTargetLowering::
11951 combineElementTruncationToVectorTruncation(SDNode
*N
,
11952 DAGCombinerInfo
&DCI
) const {
11953 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
11954 "Should be called with a BUILD_VECTOR node");
11956 SelectionDAG
&DAG
= DCI
.DAG
;
11959 SDValue FirstInput
= N
->getOperand(0);
11960 assert(FirstInput
.getOpcode() == PPCISD::MFVSR
&&
11961 "The input operand must be an fp-to-int conversion.");
11963 // This combine happens after legalization so the fp_to_[su]i nodes are
11964 // already converted to PPCSISD nodes.
11965 unsigned FirstConversion
= FirstInput
.getOperand(0).getOpcode();
11966 if (FirstConversion
== PPCISD::FCTIDZ
||
11967 FirstConversion
== PPCISD::FCTIDUZ
||
11968 FirstConversion
== PPCISD::FCTIWZ
||
11969 FirstConversion
== PPCISD::FCTIWUZ
) {
11970 bool IsSplat
= true;
11971 bool Is32Bit
= FirstConversion
== PPCISD::FCTIWZ
||
11972 FirstConversion
== PPCISD::FCTIWUZ
;
11973 EVT SrcVT
= FirstInput
.getOperand(0).getValueType();
11974 SmallVector
<SDValue
, 4> Ops
;
11975 EVT TargetVT
= N
->getValueType(0);
11976 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
11977 SDValue NextOp
= N
->getOperand(i
);
11978 if (NextOp
.getOpcode() != PPCISD::MFVSR
)
11980 unsigned NextConversion
= NextOp
.getOperand(0).getOpcode();
11981 if (NextConversion
!= FirstConversion
)
11983 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
11984 // This is not valid if the input was originally double precision. It is
11985 // also not profitable to do unless this is an extending load in which
11986 // case doing this combine will allow us to combine consecutive loads.
11987 if (Is32Bit
&& !isFPExtLoad(NextOp
.getOperand(0).getOperand(0)))
11989 if (N
->getOperand(i
) != FirstInput
)
11993 // If this is a splat, we leave it as-is since there will be only a single
11994 // fp-to-int conversion followed by a splat of the integer. This is better
11995 // for 32-bit and smaller ints and neutral for 64-bit ints.
11999 // Now that we know we have the right type of node, get its operands
12000 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
12001 SDValue In
= N
->getOperand(i
).getOperand(0);
12003 // For 32-bit values, we need to add an FP_ROUND node (if we made it
12004 // here, we know that all inputs are extending loads so this is safe).
12006 Ops
.push_back(DAG
.getUNDEF(SrcVT
));
12008 SDValue Trunc
= DAG
.getNode(ISD::FP_ROUND
, dl
,
12009 MVT::f32
, In
.getOperand(0),
12010 DAG
.getIntPtrConstant(1, dl
));
12011 Ops
.push_back(Trunc
);
12014 Ops
.push_back(In
.isUndef() ? DAG
.getUNDEF(SrcVT
) : In
.getOperand(0));
12018 if (FirstConversion
== PPCISD::FCTIDZ
||
12019 FirstConversion
== PPCISD::FCTIWZ
)
12020 Opcode
= ISD::FP_TO_SINT
;
12022 Opcode
= ISD::FP_TO_UINT
;
12024 EVT NewVT
= TargetVT
== MVT::v2i64
? MVT::v2f64
: MVT::v4f32
;
12025 SDValue BV
= DAG
.getBuildVector(NewVT
, dl
, Ops
);
12026 return DAG
.getNode(Opcode
, dl
, TargetVT
, BV
);
12031 /// Reduce the number of loads when building a vector.
12033 /// Building a vector out of multiple loads can be converted to a load
12034 /// of the vector type if the loads are consecutive. If the loads are
12035 /// consecutive but in descending order, a shuffle is added at the end
12036 /// to reorder the vector.
12037 static SDValue
combineBVOfConsecutiveLoads(SDNode
*N
, SelectionDAG
&DAG
) {
12038 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12039 "Should be called with a BUILD_VECTOR node");
12042 bool InputsAreConsecutiveLoads
= true;
12043 bool InputsAreReverseConsecutive
= true;
12044 unsigned ElemSize
= N
->getValueType(0).getScalarSizeInBits() / 8;
12045 SDValue FirstInput
= N
->getOperand(0);
12046 bool IsRoundOfExtLoad
= false;
12048 if (FirstInput
.getOpcode() == ISD::FP_ROUND
&&
12049 FirstInput
.getOperand(0).getOpcode() == ISD::LOAD
) {
12050 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(FirstInput
.getOperand(0));
12051 IsRoundOfExtLoad
= LD
->getExtensionType() == ISD::EXTLOAD
;
12053 // Not a build vector of (possibly fp_rounded) loads.
12054 if ((!IsRoundOfExtLoad
&& FirstInput
.getOpcode() != ISD::LOAD
) ||
12055 N
->getNumOperands() == 1)
12058 for (int i
= 1, e
= N
->getNumOperands(); i
< e
; ++i
) {
12059 // If any inputs are fp_round(extload), they all must be.
12060 if (IsRoundOfExtLoad
&& N
->getOperand(i
).getOpcode() != ISD::FP_ROUND
)
12063 SDValue NextInput
= IsRoundOfExtLoad
? N
->getOperand(i
).getOperand(0) :
12065 if (NextInput
.getOpcode() != ISD::LOAD
)
12068 SDValue PreviousInput
=
12069 IsRoundOfExtLoad
? N
->getOperand(i
-1).getOperand(0) : N
->getOperand(i
-1);
12070 LoadSDNode
*LD1
= dyn_cast
<LoadSDNode
>(PreviousInput
);
12071 LoadSDNode
*LD2
= dyn_cast
<LoadSDNode
>(NextInput
);
12073 // If any inputs are fp_round(extload), they all must be.
12074 if (IsRoundOfExtLoad
&& LD2
->getExtensionType() != ISD::EXTLOAD
)
12077 if (!isConsecutiveLS(LD2
, LD1
, ElemSize
, 1, DAG
))
12078 InputsAreConsecutiveLoads
= false;
12079 if (!isConsecutiveLS(LD1
, LD2
, ElemSize
, 1, DAG
))
12080 InputsAreReverseConsecutive
= false;
12082 // Exit early if the loads are neither consecutive nor reverse consecutive.
12083 if (!InputsAreConsecutiveLoads
&& !InputsAreReverseConsecutive
)
12087 assert(!(InputsAreConsecutiveLoads
&& InputsAreReverseConsecutive
) &&
12088 "The loads cannot be both consecutive and reverse consecutive.");
12090 SDValue FirstLoadOp
=
12091 IsRoundOfExtLoad
? FirstInput
.getOperand(0) : FirstInput
;
12092 SDValue LastLoadOp
=
12093 IsRoundOfExtLoad
? N
->getOperand(N
->getNumOperands()-1).getOperand(0) :
12094 N
->getOperand(N
->getNumOperands()-1);
12096 LoadSDNode
*LD1
= dyn_cast
<LoadSDNode
>(FirstLoadOp
);
12097 LoadSDNode
*LDL
= dyn_cast
<LoadSDNode
>(LastLoadOp
);
12098 if (InputsAreConsecutiveLoads
) {
12099 assert(LD1
&& "Input needs to be a LoadSDNode.");
12100 return DAG
.getLoad(N
->getValueType(0), dl
, LD1
->getChain(),
12101 LD1
->getBasePtr(), LD1
->getPointerInfo(),
12102 LD1
->getAlignment());
12104 if (InputsAreReverseConsecutive
) {
12105 assert(LDL
&& "Input needs to be a LoadSDNode.");
12106 SDValue Load
= DAG
.getLoad(N
->getValueType(0), dl
, LDL
->getChain(),
12107 LDL
->getBasePtr(), LDL
->getPointerInfo(),
12108 LDL
->getAlignment());
12109 SmallVector
<int, 16> Ops
;
12110 for (int i
= N
->getNumOperands() - 1; i
>= 0; i
--)
12113 return DAG
.getVectorShuffle(N
->getValueType(0), dl
, Load
,
12114 DAG
.getUNDEF(N
->getValueType(0)), Ops
);
12119 // This function adds the required vector_shuffle needed to get
12120 // the elements of the vector extract in the correct position
12121 // as specified by the CorrectElems encoding.
12122 static SDValue
addShuffleForVecExtend(SDNode
*N
, SelectionDAG
&DAG
,
12123 SDValue Input
, uint64_t Elems
,
12124 uint64_t CorrectElems
) {
12127 unsigned NumElems
= Input
.getValueType().getVectorNumElements();
12128 SmallVector
<int, 16> ShuffleMask(NumElems
, -1);
12130 // Knowing the element indices being extracted from the original
12131 // vector and the order in which they're being inserted, just put
12132 // them at element indices required for the instruction.
12133 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
12134 if (DAG
.getDataLayout().isLittleEndian())
12135 ShuffleMask
[CorrectElems
& 0xF] = Elems
& 0xF;
12137 ShuffleMask
[(CorrectElems
& 0xF0) >> 4] = (Elems
& 0xF0) >> 4;
12138 CorrectElems
= CorrectElems
>> 8;
12139 Elems
= Elems
>> 8;
12143 DAG
.getVectorShuffle(Input
.getValueType(), dl
, Input
,
12144 DAG
.getUNDEF(Input
.getValueType()), ShuffleMask
);
12146 EVT Ty
= N
->getValueType(0);
12147 SDValue BV
= DAG
.getNode(PPCISD::SExtVElems
, dl
, Ty
, Shuffle
);
12151 // Look for build vector patterns where input operands come from sign
12152 // extended vector_extract elements of specific indices. If the correct indices
12153 // aren't used, add a vector shuffle to fix up the indices and create a new
12154 // PPCISD:SExtVElems node which selects the vector sign extend instructions
12155 // during instruction selection.
12156 static SDValue
combineBVOfVecSExt(SDNode
*N
, SelectionDAG
&DAG
) {
12157 // This array encodes the indices that the vector sign extend instructions
12158 // extract from when extending from one type to another for both BE and LE.
12159 // The right nibble of each byte corresponds to the LE incides.
12160 // and the left nibble of each byte corresponds to the BE incides.
12161 // For example: 0x3074B8FC byte->word
12162 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12163 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12164 // For example: 0x000070F8 byte->double word
12165 // For LE: the allowed indices are: 0x0,0x8
12166 // For BE: the allowed indices are: 0x7,0xF
12167 uint64_t TargetElems
[] = {
12168 0x3074B8FC, // b->w
12169 0x000070F8, // b->d
12170 0x10325476, // h->w
12171 0x00003074, // h->d
12172 0x00001032, // w->d
12175 uint64_t Elems
= 0;
12179 auto isSExtOfVecExtract
= [&](SDValue Op
) -> bool {
12182 if (Op
.getOpcode() != ISD::SIGN_EXTEND
&&
12183 Op
.getOpcode() != ISD::SIGN_EXTEND_INREG
)
12186 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12187 // of the right width.
12188 SDValue Extract
= Op
.getOperand(0);
12189 if (Extract
.getOpcode() == ISD::ANY_EXTEND
)
12190 Extract
= Extract
.getOperand(0);
12191 if (Extract
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
12194 ConstantSDNode
*ExtOp
= dyn_cast
<ConstantSDNode
>(Extract
.getOperand(1));
12198 Index
= ExtOp
->getZExtValue();
12199 if (Input
&& Input
!= Extract
.getOperand(0))
12203 Input
= Extract
.getOperand(0);
12205 Elems
= Elems
<< 8;
12206 Index
= DAG
.getDataLayout().isLittleEndian() ? Index
: Index
<< 4;
12212 // If the build vector operands aren't sign extended vector extracts,
12213 // of the same input vector, then return.
12214 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
12215 if (!isSExtOfVecExtract(N
->getOperand(i
))) {
12220 // If the vector extract indicies are not correct, add the appropriate
12222 int TgtElemArrayIdx
;
12223 int InputSize
= Input
.getValueType().getScalarSizeInBits();
12224 int OutputSize
= N
->getValueType(0).getScalarSizeInBits();
12225 if (InputSize
+ OutputSize
== 40)
12226 TgtElemArrayIdx
= 0;
12227 else if (InputSize
+ OutputSize
== 72)
12228 TgtElemArrayIdx
= 1;
12229 else if (InputSize
+ OutputSize
== 48)
12230 TgtElemArrayIdx
= 2;
12231 else if (InputSize
+ OutputSize
== 80)
12232 TgtElemArrayIdx
= 3;
12233 else if (InputSize
+ OutputSize
== 96)
12234 TgtElemArrayIdx
= 4;
12238 uint64_t CorrectElems
= TargetElems
[TgtElemArrayIdx
];
12239 CorrectElems
= DAG
.getDataLayout().isLittleEndian()
12240 ? CorrectElems
& 0x0F0F0F0F0F0F0F0F
12241 : CorrectElems
& 0xF0F0F0F0F0F0F0F0;
12242 if (Elems
!= CorrectElems
) {
12243 return addShuffleForVecExtend(N
, DAG
, Input
, Elems
, CorrectElems
);
12246 // Regular lowering will catch cases where a shuffle is not needed.
12250 SDValue
PPCTargetLowering::DAGCombineBuildVector(SDNode
*N
,
12251 DAGCombinerInfo
&DCI
) const {
12252 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12253 "Should be called with a BUILD_VECTOR node");
12255 SelectionDAG
&DAG
= DCI
.DAG
;
12258 if (!Subtarget
.hasVSX())
12261 // The target independent DAG combiner will leave a build_vector of
12262 // float-to-int conversions intact. We can generate MUCH better code for
12263 // a float-to-int conversion of a vector of floats.
12264 SDValue FirstInput
= N
->getOperand(0);
12265 if (FirstInput
.getOpcode() == PPCISD::MFVSR
) {
12266 SDValue Reduced
= combineElementTruncationToVectorTruncation(N
, DCI
);
12271 // If we're building a vector out of consecutive loads, just load that
12273 SDValue Reduced
= combineBVOfConsecutiveLoads(N
, DAG
);
12277 // If we're building a vector out of extended elements from another vector
12278 // we have P9 vector integer extend instructions. The code assumes legal
12279 // input types (i.e. it can't handle things like v4i16) so do not run before
12281 if (Subtarget
.hasP9Altivec() && !DCI
.isBeforeLegalize()) {
12282 Reduced
= combineBVOfVecSExt(N
, DAG
);
12288 if (N
->getValueType(0) != MVT::v2f64
)
12292 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12293 if (FirstInput
.getOpcode() != ISD::SINT_TO_FP
&&
12294 FirstInput
.getOpcode() != ISD::UINT_TO_FP
)
12296 if (N
->getOperand(1).getOpcode() != ISD::SINT_TO_FP
&&
12297 N
->getOperand(1).getOpcode() != ISD::UINT_TO_FP
)
12299 if (FirstInput
.getOpcode() != N
->getOperand(1).getOpcode())
12302 SDValue Ext1
= FirstInput
.getOperand(0);
12303 SDValue Ext2
= N
->getOperand(1).getOperand(0);
12304 if(Ext1
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
12305 Ext2
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
12308 ConstantSDNode
*Ext1Op
= dyn_cast
<ConstantSDNode
>(Ext1
.getOperand(1));
12309 ConstantSDNode
*Ext2Op
= dyn_cast
<ConstantSDNode
>(Ext2
.getOperand(1));
12310 if (!Ext1Op
|| !Ext2Op
)
12312 if (Ext1
.getValueType() != MVT::i32
||
12313 Ext2
.getValueType() != MVT::i32
)
12314 if (Ext1
.getOperand(0) != Ext2
.getOperand(0))
12317 int FirstElem
= Ext1Op
->getZExtValue();
12318 int SecondElem
= Ext2Op
->getZExtValue();
12320 if (FirstElem
== 0 && SecondElem
== 1)
12321 SubvecIdx
= Subtarget
.isLittleEndian() ? 1 : 0;
12322 else if (FirstElem
== 2 && SecondElem
== 3)
12323 SubvecIdx
= Subtarget
.isLittleEndian() ? 0 : 1;
12327 SDValue SrcVec
= Ext1
.getOperand(0);
12328 auto NodeType
= (N
->getOperand(1).getOpcode() == ISD::SINT_TO_FP
) ?
12329 PPCISD::SINT_VEC_TO_FP
: PPCISD::UINT_VEC_TO_FP
;
12330 return DAG
.getNode(NodeType
, dl
, MVT::v2f64
,
12331 SrcVec
, DAG
.getIntPtrConstant(SubvecIdx
, dl
));
12334 SDValue
PPCTargetLowering::combineFPToIntToFP(SDNode
*N
,
12335 DAGCombinerInfo
&DCI
) const {
12336 assert((N
->getOpcode() == ISD::SINT_TO_FP
||
12337 N
->getOpcode() == ISD::UINT_TO_FP
) &&
12338 "Need an int -> FP conversion node here");
12340 if (useSoftFloat() || !Subtarget
.has64BitSupport())
12343 SelectionDAG
&DAG
= DCI
.DAG
;
12347 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12348 // from the hardware.
12349 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
12351 if (Op
.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1
) ||
12352 Op
.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64
))
12355 SDValue
FirstOperand(Op
.getOperand(0));
12356 bool SubWordLoad
= FirstOperand
.getOpcode() == ISD::LOAD
&&
12357 (FirstOperand
.getValueType() == MVT::i8
||
12358 FirstOperand
.getValueType() == MVT::i16
);
12359 if (Subtarget
.hasP9Vector() && Subtarget
.hasP9Altivec() && SubWordLoad
) {
12360 bool Signed
= N
->getOpcode() == ISD::SINT_TO_FP
;
12361 bool DstDouble
= Op
.getValueType() == MVT::f64
;
12362 unsigned ConvOp
= Signed
?
12363 (DstDouble
? PPCISD::FCFID
: PPCISD::FCFIDS
) :
12364 (DstDouble
? PPCISD::FCFIDU
: PPCISD::FCFIDUS
);
12365 SDValue WidthConst
=
12366 DAG
.getIntPtrConstant(FirstOperand
.getValueType() == MVT::i8
? 1 : 2,
12368 LoadSDNode
*LDN
= cast
<LoadSDNode
>(FirstOperand
.getNode());
12369 SDValue Ops
[] = { LDN
->getChain(), LDN
->getBasePtr(), WidthConst
};
12370 SDValue Ld
= DAG
.getMemIntrinsicNode(PPCISD::LXSIZX
, dl
,
12371 DAG
.getVTList(MVT::f64
, MVT::Other
),
12372 Ops
, MVT::i8
, LDN
->getMemOperand());
12374 // For signed conversion, we need to sign-extend the value in the VSR
12376 SDValue ExtOps
[] = { Ld
, WidthConst
};
12377 SDValue Ext
= DAG
.getNode(PPCISD::VEXTS
, dl
, MVT::f64
, ExtOps
);
12378 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ext
);
12380 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ld
);
12384 // For i32 intermediate values, unfortunately, the conversion functions
12385 // leave the upper 32 bits of the value are undefined. Within the set of
12386 // scalar instructions, we have no method for zero- or sign-extending the
12387 // value. Thus, we cannot handle i32 intermediate values here.
12388 if (Op
.getOperand(0).getValueType() == MVT::i32
)
12391 assert((Op
.getOpcode() == ISD::SINT_TO_FP
|| Subtarget
.hasFPCVT()) &&
12392 "UINT_TO_FP is supported only with FPCVT");
12394 // If we have FCFIDS, then use it when converting to single-precision.
12395 // Otherwise, convert to double-precision and then round.
12396 unsigned FCFOp
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
12397 ? (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDUS
12399 : (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDU
12401 MVT FCFTy
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
12405 // If we're converting from a float, to an int, and back to a float again,
12406 // then we don't need the store/load pair at all.
12407 if ((Op
.getOperand(0).getOpcode() == ISD::FP_TO_UINT
&&
12408 Subtarget
.hasFPCVT()) ||
12409 (Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
)) {
12410 SDValue Src
= Op
.getOperand(0).getOperand(0);
12411 if (Src
.getValueType() == MVT::f32
) {
12412 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
12413 DCI
.AddToWorklist(Src
.getNode());
12414 } else if (Src
.getValueType() != MVT::f64
) {
12415 // Make sure that we don't pick up a ppc_fp128 source value.
12420 Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
12423 SDValue Tmp
= DAG
.getNode(FCTOp
, dl
, MVT::f64
, Src
);
12424 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Tmp
);
12426 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT()) {
12427 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
,
12428 MVT::f32
, FP
, DAG
.getIntPtrConstant(0, dl
));
12429 DCI
.AddToWorklist(FP
.getNode());
12438 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
12439 // builtins) into loads with swaps.
12440 SDValue
PPCTargetLowering::expandVSXLoadForLE(SDNode
*N
,
12441 DAGCombinerInfo
&DCI
) const {
12442 SelectionDAG
&DAG
= DCI
.DAG
;
12446 MachineMemOperand
*MMO
;
12448 switch (N
->getOpcode()) {
12450 llvm_unreachable("Unexpected opcode for little endian VSX load");
12452 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
12453 Chain
= LD
->getChain();
12454 Base
= LD
->getBasePtr();
12455 MMO
= LD
->getMemOperand();
12456 // If the MMO suggests this isn't a load of a full vector, leave
12457 // things alone. For a built-in, we have to make the change for
12458 // correctness, so if there is a size problem that will be a bug.
12459 if (MMO
->getSize() < 16)
12463 case ISD::INTRINSIC_W_CHAIN
: {
12464 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
12465 Chain
= Intrin
->getChain();
12466 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
12467 // us what we want. Get operand 2 instead.
12468 Base
= Intrin
->getOperand(2);
12469 MMO
= Intrin
->getMemOperand();
12474 MVT VecTy
= N
->getValueType(0).getSimpleVT();
12476 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
12477 // aligned and the type is a vector with elements up to 4 bytes
12478 if (Subtarget
.needsSwapsForVSXMemOps() && !(MMO
->getAlignment()%16)
12479 && VecTy
.getScalarSizeInBits() <= 32 ) {
12483 SDValue LoadOps
[] = { Chain
, Base
};
12484 SDValue Load
= DAG
.getMemIntrinsicNode(PPCISD::LXVD2X
, dl
,
12485 DAG
.getVTList(MVT::v2f64
, MVT::Other
),
12486 LoadOps
, MVT::v2f64
, MMO
);
12488 DCI
.AddToWorklist(Load
.getNode());
12489 Chain
= Load
.getValue(1);
12490 SDValue Swap
= DAG
.getNode(
12491 PPCISD::XXSWAPD
, dl
, DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Load
);
12492 DCI
.AddToWorklist(Swap
.getNode());
12494 // Add a bitcast if the resulting load type doesn't match v2f64.
12495 if (VecTy
!= MVT::v2f64
) {
12496 SDValue N
= DAG
.getNode(ISD::BITCAST
, dl
, VecTy
, Swap
);
12497 DCI
.AddToWorklist(N
.getNode());
12498 // Package {bitcast value, swap's chain} to match Load's shape.
12499 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, DAG
.getVTList(VecTy
, MVT::Other
),
12500 N
, Swap
.getValue(1));
12506 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
12507 // builtins) into stores with swaps.
12508 SDValue
PPCTargetLowering::expandVSXStoreForLE(SDNode
*N
,
12509 DAGCombinerInfo
&DCI
) const {
12510 SelectionDAG
&DAG
= DCI
.DAG
;
12515 MachineMemOperand
*MMO
;
12517 switch (N
->getOpcode()) {
12519 llvm_unreachable("Unexpected opcode for little endian VSX store");
12521 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
12522 Chain
= ST
->getChain();
12523 Base
= ST
->getBasePtr();
12524 MMO
= ST
->getMemOperand();
12526 // If the MMO suggests this isn't a store of a full vector, leave
12527 // things alone. For a built-in, we have to make the change for
12528 // correctness, so if there is a size problem that will be a bug.
12529 if (MMO
->getSize() < 16)
12533 case ISD::INTRINSIC_VOID
: {
12534 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
12535 Chain
= Intrin
->getChain();
12536 // Intrin->getBasePtr() oddly does not get what we want.
12537 Base
= Intrin
->getOperand(3);
12538 MMO
= Intrin
->getMemOperand();
12544 SDValue Src
= N
->getOperand(SrcOpnd
);
12545 MVT VecTy
= Src
.getValueType().getSimpleVT();
12547 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
12548 // aligned and the type is a vector with elements up to 4 bytes
12549 if (Subtarget
.needsSwapsForVSXMemOps() && !(MMO
->getAlignment()%16)
12550 && VecTy
.getScalarSizeInBits() <= 32 ) {
12554 // All stores are done as v2f64 and possible bit cast.
12555 if (VecTy
!= MVT::v2f64
) {
12556 Src
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, Src
);
12557 DCI
.AddToWorklist(Src
.getNode());
12560 SDValue Swap
= DAG
.getNode(PPCISD::XXSWAPD
, dl
,
12561 DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Src
);
12562 DCI
.AddToWorklist(Swap
.getNode());
12563 Chain
= Swap
.getValue(1);
12564 SDValue StoreOps
[] = { Chain
, Swap
, Base
};
12565 SDValue Store
= DAG
.getMemIntrinsicNode(PPCISD::STXVD2X
, dl
,
12566 DAG
.getVTList(MVT::Other
),
12567 StoreOps
, VecTy
, MMO
);
12568 DCI
.AddToWorklist(Store
.getNode());
12572 // Handle DAG combine for STORE (FP_TO_INT F).
12573 SDValue
PPCTargetLowering::combineStoreFPToInt(SDNode
*N
,
12574 DAGCombinerInfo
&DCI
) const {
12576 SelectionDAG
&DAG
= DCI
.DAG
;
12578 unsigned Opcode
= N
->getOperand(1).getOpcode();
12580 assert((Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
)
12581 && "Not a FP_TO_INT Instruction!");
12583 SDValue Val
= N
->getOperand(1).getOperand(0);
12584 EVT Op1VT
= N
->getOperand(1).getValueType();
12585 EVT ResVT
= Val
.getValueType();
12587 // Floating point types smaller than 32 bits are not legal on Power.
12588 if (ResVT
.getScalarSizeInBits() < 32)
12591 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
12592 bool ValidTypeForStoreFltAsInt
=
12593 (Op1VT
== MVT::i32
|| Op1VT
== MVT::i64
||
12594 (Subtarget
.hasP9Vector() && (Op1VT
== MVT::i16
|| Op1VT
== MVT::i8
)));
12596 if (ResVT
== MVT::ppcf128
|| !Subtarget
.hasP8Altivec() ||
12597 cast
<StoreSDNode
>(N
)->isTruncatingStore() || !ValidTypeForStoreFltAsInt
)
12600 // Extend f32 values to f64
12601 if (ResVT
.getScalarSizeInBits() == 32) {
12602 Val
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Val
);
12603 DCI
.AddToWorklist(Val
.getNode());
12606 // Set signed or unsigned conversion opcode.
12607 unsigned ConvOpcode
= (Opcode
== ISD::FP_TO_SINT
) ?
12608 PPCISD::FP_TO_SINT_IN_VSR
:
12609 PPCISD::FP_TO_UINT_IN_VSR
;
12611 Val
= DAG
.getNode(ConvOpcode
,
12612 dl
, ResVT
== MVT::f128
? MVT::f128
: MVT::f64
, Val
);
12613 DCI
.AddToWorklist(Val
.getNode());
12615 // Set number of bytes being converted.
12616 unsigned ByteSize
= Op1VT
.getScalarSizeInBits() / 8;
12617 SDValue Ops
[] = { N
->getOperand(0), Val
, N
->getOperand(2),
12618 DAG
.getIntPtrConstant(ByteSize
, dl
, false),
12619 DAG
.getValueType(Op1VT
) };
12621 Val
= DAG
.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT
, dl
,
12622 DAG
.getVTList(MVT::Other
), Ops
,
12623 cast
<StoreSDNode
>(N
)->getMemoryVT(),
12624 cast
<StoreSDNode
>(N
)->getMemOperand());
12626 DCI
.AddToWorklist(Val
.getNode());
12630 SDValue
PPCTargetLowering::PerformDAGCombine(SDNode
*N
,
12631 DAGCombinerInfo
&DCI
) const {
12632 SelectionDAG
&DAG
= DCI
.DAG
;
12634 switch (N
->getOpcode()) {
12637 return combineADD(N
, DCI
);
12639 return combineSHL(N
, DCI
);
12641 return combineSRA(N
, DCI
);
12643 return combineSRL(N
, DCI
);
12645 if (isNullConstant(N
->getOperand(0))) // 0 << V -> 0.
12646 return N
->getOperand(0);
12649 if (isNullConstant(N
->getOperand(0))) // 0 >>u V -> 0.
12650 return N
->getOperand(0);
12653 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(0))) {
12654 if (C
->isNullValue() || // 0 >>s V -> 0.
12655 C
->isAllOnesValue()) // -1 >>s V -> -1.
12656 return N
->getOperand(0);
12659 case ISD::SIGN_EXTEND
:
12660 case ISD::ZERO_EXTEND
:
12661 case ISD::ANY_EXTEND
:
12662 return DAGCombineExtBoolTrunc(N
, DCI
);
12663 case ISD::TRUNCATE
:
12664 return combineTRUNCATE(N
, DCI
);
12666 if (SDValue CSCC
= combineSetCC(N
, DCI
))
12669 case ISD::SELECT_CC
:
12670 return DAGCombineTruncBoolExt(N
, DCI
);
12671 case ISD::SINT_TO_FP
:
12672 case ISD::UINT_TO_FP
:
12673 return combineFPToIntToFP(N
, DCI
);
12676 EVT Op1VT
= N
->getOperand(1).getValueType();
12677 unsigned Opcode
= N
->getOperand(1).getOpcode();
12679 if (Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
) {
12680 SDValue Val
= combineStoreFPToInt(N
, DCI
);
12685 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
12686 if (cast
<StoreSDNode
>(N
)->isUnindexed() && Opcode
== ISD::BSWAP
&&
12687 N
->getOperand(1).getNode()->hasOneUse() &&
12688 (Op1VT
== MVT::i32
|| Op1VT
== MVT::i16
||
12689 (Subtarget
.hasLDBRX() && Subtarget
.isPPC64() && Op1VT
== MVT::i64
))) {
12691 // STBRX can only handle simple types and it makes no sense to store less
12692 // two bytes in byte-reversed order.
12693 EVT mVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
12694 if (mVT
.isExtended() || mVT
.getSizeInBits() < 16)
12697 SDValue BSwapOp
= N
->getOperand(1).getOperand(0);
12698 // Do an any-extend to 32-bits if this is a half-word input.
12699 if (BSwapOp
.getValueType() == MVT::i16
)
12700 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, BSwapOp
);
12702 // If the type of BSWAP operand is wider than stored memory width
12703 // it need to be shifted to the right side before STBRX.
12704 if (Op1VT
.bitsGT(mVT
)) {
12705 int Shift
= Op1VT
.getSizeInBits() - mVT
.getSizeInBits();
12706 BSwapOp
= DAG
.getNode(ISD::SRL
, dl
, Op1VT
, BSwapOp
,
12707 DAG
.getConstant(Shift
, dl
, MVT::i32
));
12708 // Need to truncate if this is a bswap of i64 stored as i32/i16.
12709 if (Op1VT
== MVT::i64
)
12710 BSwapOp
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, BSwapOp
);
12714 N
->getOperand(0), BSwapOp
, N
->getOperand(2), DAG
.getValueType(mVT
)
12717 DAG
.getMemIntrinsicNode(PPCISD::STBRX
, dl
, DAG
.getVTList(MVT::Other
),
12718 Ops
, cast
<StoreSDNode
>(N
)->getMemoryVT(),
12719 cast
<StoreSDNode
>(N
)->getMemOperand());
12722 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
12723 // So it can increase the chance of CSE constant construction.
12724 if (Subtarget
.isPPC64() && !DCI
.isBeforeLegalize() &&
12725 isa
<ConstantSDNode
>(N
->getOperand(1)) && Op1VT
== MVT::i32
) {
12726 // Need to sign-extended to 64-bits to handle negative values.
12727 EVT MemVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
12728 uint64_t Val64
= SignExtend64(N
->getConstantOperandVal(1),
12729 MemVT
.getSizeInBits());
12730 SDValue Const64
= DAG
.getConstant(Val64
, dl
, MVT::i64
);
12732 // DAG.getTruncStore() can't be used here because it doesn't accept
12733 // the general (base + offset) addressing mode.
12734 // So we use UpdateNodeOperands and setTruncatingStore instead.
12735 DAG
.UpdateNodeOperands(N
, N
->getOperand(0), Const64
, N
->getOperand(2),
12737 cast
<StoreSDNode
>(N
)->setTruncatingStore(true);
12738 return SDValue(N
, 0);
12741 // For little endian, VSX stores require generating xxswapd/lxvd2x.
12742 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
12743 if (Op1VT
.isSimple()) {
12744 MVT StoreVT
= Op1VT
.getSimpleVT();
12745 if (Subtarget
.needsSwapsForVSXMemOps() &&
12746 (StoreVT
== MVT::v2f64
|| StoreVT
== MVT::v2i64
||
12747 StoreVT
== MVT::v4f32
|| StoreVT
== MVT::v4i32
))
12748 return expandVSXStoreForLE(N
, DCI
);
12753 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
12754 EVT VT
= LD
->getValueType(0);
12756 // For little endian, VSX loads require generating lxvd2x/xxswapd.
12757 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
12758 if (VT
.isSimple()) {
12759 MVT LoadVT
= VT
.getSimpleVT();
12760 if (Subtarget
.needsSwapsForVSXMemOps() &&
12761 (LoadVT
== MVT::v2f64
|| LoadVT
== MVT::v2i64
||
12762 LoadVT
== MVT::v4f32
|| LoadVT
== MVT::v4i32
))
12763 return expandVSXLoadForLE(N
, DCI
);
12766 // We sometimes end up with a 64-bit integer load, from which we extract
12767 // two single-precision floating-point numbers. This happens with
12768 // std::complex<float>, and other similar structures, because of the way we
12769 // canonicalize structure copies. However, if we lack direct moves,
12770 // then the final bitcasts from the extracted integer values to the
12771 // floating-point numbers turn into store/load pairs. Even with direct moves,
12772 // just loading the two floating-point numbers is likely better.
12773 auto ReplaceTwoFloatLoad
= [&]() {
12774 if (VT
!= MVT::i64
)
12777 if (LD
->getExtensionType() != ISD::NON_EXTLOAD
||
12781 // We're looking for a sequence like this:
12782 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
12783 // t16: i64 = srl t13, Constant:i32<32>
12784 // t17: i32 = truncate t16
12785 // t18: f32 = bitcast t17
12786 // t19: i32 = truncate t13
12787 // t20: f32 = bitcast t19
12789 if (!LD
->hasNUsesOfValue(2, 0))
12792 auto UI
= LD
->use_begin();
12793 while (UI
.getUse().getResNo() != 0) ++UI
;
12794 SDNode
*Trunc
= *UI
++;
12795 while (UI
.getUse().getResNo() != 0) ++UI
;
12796 SDNode
*RightShift
= *UI
;
12797 if (Trunc
->getOpcode() != ISD::TRUNCATE
)
12798 std::swap(Trunc
, RightShift
);
12800 if (Trunc
->getOpcode() != ISD::TRUNCATE
||
12801 Trunc
->getValueType(0) != MVT::i32
||
12802 !Trunc
->hasOneUse())
12804 if (RightShift
->getOpcode() != ISD::SRL
||
12805 !isa
<ConstantSDNode
>(RightShift
->getOperand(1)) ||
12806 RightShift
->getConstantOperandVal(1) != 32 ||
12807 !RightShift
->hasOneUse())
12810 SDNode
*Trunc2
= *RightShift
->use_begin();
12811 if (Trunc2
->getOpcode() != ISD::TRUNCATE
||
12812 Trunc2
->getValueType(0) != MVT::i32
||
12813 !Trunc2
->hasOneUse())
12816 SDNode
*Bitcast
= *Trunc
->use_begin();
12817 SDNode
*Bitcast2
= *Trunc2
->use_begin();
12819 if (Bitcast
->getOpcode() != ISD::BITCAST
||
12820 Bitcast
->getValueType(0) != MVT::f32
)
12822 if (Bitcast2
->getOpcode() != ISD::BITCAST
||
12823 Bitcast2
->getValueType(0) != MVT::f32
)
12826 if (Subtarget
.isLittleEndian())
12827 std::swap(Bitcast
, Bitcast2
);
12829 // Bitcast has the second float (in memory-layout order) and Bitcast2
12830 // has the first one.
12832 SDValue BasePtr
= LD
->getBasePtr();
12833 if (LD
->isIndexed()) {
12834 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
12835 "Non-pre-inc AM on PPC?");
12837 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
12842 LD
->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile
;
12843 SDValue FloatLoad
= DAG
.getLoad(MVT::f32
, dl
, LD
->getChain(), BasePtr
,
12844 LD
->getPointerInfo(), LD
->getAlignment(),
12845 MMOFlags
, LD
->getAAInfo());
12847 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(),
12848 BasePtr
, DAG
.getIntPtrConstant(4, dl
));
12849 SDValue FloatLoad2
= DAG
.getLoad(
12850 MVT::f32
, dl
, SDValue(FloatLoad
.getNode(), 1), AddPtr
,
12851 LD
->getPointerInfo().getWithOffset(4),
12852 MinAlign(LD
->getAlignment(), 4), MMOFlags
, LD
->getAAInfo());
12854 if (LD
->isIndexed()) {
12855 // Note that DAGCombine should re-form any pre-increment load(s) from
12856 // what is produced here if that makes sense.
12857 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, 1), BasePtr
);
12860 DCI
.CombineTo(Bitcast2
, FloatLoad
);
12861 DCI
.CombineTo(Bitcast
, FloatLoad2
);
12863 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, LD
->isIndexed() ? 2 : 1),
12864 SDValue(FloatLoad2
.getNode(), 1));
12868 if (ReplaceTwoFloatLoad())
12869 return SDValue(N
, 0);
12871 EVT MemVT
= LD
->getMemoryVT();
12872 Type
*Ty
= MemVT
.getTypeForEVT(*DAG
.getContext());
12873 unsigned ABIAlignment
= DAG
.getDataLayout().getABITypeAlignment(Ty
);
12874 Type
*STy
= MemVT
.getScalarType().getTypeForEVT(*DAG
.getContext());
12875 unsigned ScalarABIAlignment
= DAG
.getDataLayout().getABITypeAlignment(STy
);
12876 if (LD
->isUnindexed() && VT
.isVector() &&
12877 ((Subtarget
.hasAltivec() && ISD::isNON_EXTLoad(N
) &&
12878 // P8 and later hardware should just use LOAD.
12879 !Subtarget
.hasP8Vector() && (VT
== MVT::v16i8
|| VT
== MVT::v8i16
||
12880 VT
== MVT::v4i32
|| VT
== MVT::v4f32
)) ||
12881 (Subtarget
.hasQPX() && (VT
== MVT::v4f64
|| VT
== MVT::v4f32
) &&
12882 LD
->getAlignment() >= ScalarABIAlignment
)) &&
12883 LD
->getAlignment() < ABIAlignment
) {
12884 // This is a type-legal unaligned Altivec or QPX load.
12885 SDValue Chain
= LD
->getChain();
12886 SDValue Ptr
= LD
->getBasePtr();
12887 bool isLittleEndian
= Subtarget
.isLittleEndian();
12889 // This implements the loading of unaligned vectors as described in
12890 // the venerable Apple Velocity Engine overview. Specifically:
12891 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
12892 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
12894 // The general idea is to expand a sequence of one or more unaligned
12895 // loads into an alignment-based permutation-control instruction (lvsl
12896 // or lvsr), a series of regular vector loads (which always truncate
12897 // their input address to an aligned address), and a series of
12898 // permutations. The results of these permutations are the requested
12899 // loaded values. The trick is that the last "extra" load is not taken
12900 // from the address you might suspect (sizeof(vector) bytes after the
12901 // last requested load), but rather sizeof(vector) - 1 bytes after the
12902 // last requested vector. The point of this is to avoid a page fault if
12903 // the base address happened to be aligned. This works because if the
12904 // base address is aligned, then adding less than a full vector length
12905 // will cause the last vector in the sequence to be (re)loaded.
12906 // Otherwise, the next vector will be fetched as you might suspect was
12909 // We might be able to reuse the permutation generation from
12910 // a different base address offset from this one by an aligned amount.
12911 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
12912 // optimization later.
12913 Intrinsic::ID Intr
, IntrLD
, IntrPerm
;
12914 MVT PermCntlTy
, PermTy
, LDTy
;
12915 if (Subtarget
.hasAltivec()) {
12916 Intr
= isLittleEndian
? Intrinsic::ppc_altivec_lvsr
:
12917 Intrinsic::ppc_altivec_lvsl
;
12918 IntrLD
= Intrinsic::ppc_altivec_lvx
;
12919 IntrPerm
= Intrinsic::ppc_altivec_vperm
;
12920 PermCntlTy
= MVT::v16i8
;
12921 PermTy
= MVT::v4i32
;
12924 Intr
= MemVT
== MVT::v4f64
? Intrinsic::ppc_qpx_qvlpcld
:
12925 Intrinsic::ppc_qpx_qvlpcls
;
12926 IntrLD
= MemVT
== MVT::v4f64
? Intrinsic::ppc_qpx_qvlfd
:
12927 Intrinsic::ppc_qpx_qvlfs
;
12928 IntrPerm
= Intrinsic::ppc_qpx_qvfperm
;
12929 PermCntlTy
= MVT::v4f64
;
12930 PermTy
= MVT::v4f64
;
12931 LDTy
= MemVT
.getSimpleVT();
12934 SDValue PermCntl
= BuildIntrinsicOp(Intr
, Ptr
, DAG
, dl
, PermCntlTy
);
12936 // Create the new MMO for the new base load. It is like the original MMO,
12937 // but represents an area in memory almost twice the vector size centered
12938 // on the original address. If the address is unaligned, we might start
12939 // reading up to (sizeof(vector)-1) bytes below the address of the
12940 // original unaligned load.
12941 MachineFunction
&MF
= DAG
.getMachineFunction();
12942 MachineMemOperand
*BaseMMO
=
12943 MF
.getMachineMemOperand(LD
->getMemOperand(),
12944 -(long)MemVT
.getStoreSize()+1,
12945 2*MemVT
.getStoreSize()-1);
12947 // Create the new base load.
12949 DAG
.getTargetConstant(IntrLD
, dl
, getPointerTy(MF
.getDataLayout()));
12950 SDValue BaseLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
12952 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
12953 DAG
.getVTList(PermTy
, MVT::Other
),
12954 BaseLoadOps
, LDTy
, BaseMMO
);
12956 // Note that the value of IncOffset (which is provided to the next
12957 // load's pointer info offset value, and thus used to calculate the
12958 // alignment), and the value of IncValue (which is actually used to
12959 // increment the pointer value) are different! This is because we
12960 // require the next load to appear to be aligned, even though it
12961 // is actually offset from the base pointer by a lesser amount.
12962 int IncOffset
= VT
.getSizeInBits() / 8;
12963 int IncValue
= IncOffset
;
12965 // Walk (both up and down) the chain looking for another load at the real
12966 // (aligned) offset (the alignment of the other load does not matter in
12967 // this case). If found, then do not use the offset reduction trick, as
12968 // that will prevent the loads from being later combined (as they would
12969 // otherwise be duplicates).
12970 if (!findConsecutiveLoad(LD
, DAG
))
12973 SDValue Increment
=
12974 DAG
.getConstant(IncValue
, dl
, getPointerTy(MF
.getDataLayout()));
12975 Ptr
= DAG
.getNode(ISD::ADD
, dl
, Ptr
.getValueType(), Ptr
, Increment
);
12977 MachineMemOperand
*ExtraMMO
=
12978 MF
.getMachineMemOperand(LD
->getMemOperand(),
12979 1, 2*MemVT
.getStoreSize()-1);
12980 SDValue ExtraLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
12981 SDValue ExtraLoad
=
12982 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
12983 DAG
.getVTList(PermTy
, MVT::Other
),
12984 ExtraLoadOps
, LDTy
, ExtraMMO
);
12986 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
12987 BaseLoad
.getValue(1), ExtraLoad
.getValue(1));
12989 // Because vperm has a big-endian bias, we must reverse the order
12990 // of the input vectors and complement the permute control vector
12991 // when generating little endian code. We have already handled the
12992 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
12993 // and ExtraLoad here.
12995 if (isLittleEndian
)
12996 Perm
= BuildIntrinsicOp(IntrPerm
,
12997 ExtraLoad
, BaseLoad
, PermCntl
, DAG
, dl
);
12999 Perm
= BuildIntrinsicOp(IntrPerm
,
13000 BaseLoad
, ExtraLoad
, PermCntl
, DAG
, dl
);
13003 Perm
= Subtarget
.hasAltivec() ?
13004 DAG
.getNode(ISD::BITCAST
, dl
, VT
, Perm
) :
13005 DAG
.getNode(ISD::FP_ROUND
, dl
, VT
, Perm
, // QPX
13006 DAG
.getTargetConstant(1, dl
, MVT::i64
));
13007 // second argument is 1 because this rounding
13008 // is always exact.
13010 // The output of the permutation is our loaded result, the TokenFactor is
13012 DCI
.CombineTo(N
, Perm
, TF
);
13013 return SDValue(N
, 0);
13017 case ISD::INTRINSIC_WO_CHAIN
: {
13018 bool isLittleEndian
= Subtarget
.isLittleEndian();
13019 unsigned IID
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
13020 Intrinsic::ID Intr
= (isLittleEndian
? Intrinsic::ppc_altivec_lvsr
13021 : Intrinsic::ppc_altivec_lvsl
);
13022 if ((IID
== Intr
||
13023 IID
== Intrinsic::ppc_qpx_qvlpcld
||
13024 IID
== Intrinsic::ppc_qpx_qvlpcls
) &&
13025 N
->getOperand(1)->getOpcode() == ISD::ADD
) {
13026 SDValue Add
= N
->getOperand(1);
13028 int Bits
= IID
== Intrinsic::ppc_qpx_qvlpcld
?
13029 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
13031 if (DAG
.MaskedValueIsZero(Add
->getOperand(1),
13032 APInt::getAllOnesValue(Bits
/* alignment */)
13033 .zext(Add
.getScalarValueSizeInBits()))) {
13034 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
13035 for (SDNode::use_iterator UI
= BasePtr
->use_begin(),
13036 UE
= BasePtr
->use_end();
13038 if (UI
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13039 cast
<ConstantSDNode
>(UI
->getOperand(0))->getZExtValue() == IID
) {
13040 // We've found another LVSL/LVSR, and this address is an aligned
13041 // multiple of that one. The results will be the same, so use the
13042 // one we've just found instead.
13044 return SDValue(*UI
, 0);
13049 if (isa
<ConstantSDNode
>(Add
->getOperand(1))) {
13050 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
13051 for (SDNode::use_iterator UI
= BasePtr
->use_begin(),
13052 UE
= BasePtr
->use_end(); UI
!= UE
; ++UI
) {
13053 if (UI
->getOpcode() == ISD::ADD
&&
13054 isa
<ConstantSDNode
>(UI
->getOperand(1)) &&
13055 (cast
<ConstantSDNode
>(Add
->getOperand(1))->getZExtValue() -
13056 cast
<ConstantSDNode
>(UI
->getOperand(1))->getZExtValue()) %
13057 (1ULL << Bits
) == 0) {
13058 SDNode
*OtherAdd
= *UI
;
13059 for (SDNode::use_iterator VI
= OtherAdd
->use_begin(),
13060 VE
= OtherAdd
->use_end(); VI
!= VE
; ++VI
) {
13061 if (VI
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13062 cast
<ConstantSDNode
>(VI
->getOperand(0))->getZExtValue() == IID
) {
13063 return SDValue(*VI
, 0);
13071 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13072 // Expose the vabsduw/h/b opportunity for down stream
13073 if (!DCI
.isAfterLegalizeDAG() && Subtarget
.hasP9Altivec() &&
13074 (IID
== Intrinsic::ppc_altivec_vmaxsw
||
13075 IID
== Intrinsic::ppc_altivec_vmaxsh
||
13076 IID
== Intrinsic::ppc_altivec_vmaxsb
)) {
13077 SDValue V1
= N
->getOperand(1);
13078 SDValue V2
= N
->getOperand(2);
13079 if ((V1
.getSimpleValueType() == MVT::v4i32
||
13080 V1
.getSimpleValueType() == MVT::v8i16
||
13081 V1
.getSimpleValueType() == MVT::v16i8
) &&
13082 V1
.getSimpleValueType() == V2
.getSimpleValueType()) {
13084 if (V1
.getOpcode() == ISD::SUB
&&
13085 ISD::isBuildVectorAllZeros(V1
.getOperand(0).getNode()) &&
13086 V1
.getOperand(1) == V2
) {
13087 return DAG
.getNode(ISD::ABS
, dl
, V2
.getValueType(), V2
);
13090 if (V2
.getOpcode() == ISD::SUB
&&
13091 ISD::isBuildVectorAllZeros(V2
.getOperand(0).getNode()) &&
13092 V2
.getOperand(1) == V1
) {
13093 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
13096 if (V1
.getOpcode() == ISD::SUB
&& V2
.getOpcode() == ISD::SUB
&&
13097 V1
.getOperand(0) == V2
.getOperand(1) &&
13098 V1
.getOperand(1) == V2
.getOperand(0)) {
13099 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
13106 case ISD::INTRINSIC_W_CHAIN
:
13107 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13108 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13109 if (Subtarget
.needsSwapsForVSXMemOps()) {
13110 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
13113 case Intrinsic::ppc_vsx_lxvw4x
:
13114 case Intrinsic::ppc_vsx_lxvd2x
:
13115 return expandVSXLoadForLE(N
, DCI
);
13119 case ISD::INTRINSIC_VOID
:
13120 // For little endian, VSX stores require generating xxswapd/stxvd2x.
13121 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13122 if (Subtarget
.needsSwapsForVSXMemOps()) {
13123 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
13126 case Intrinsic::ppc_vsx_stxvw4x
:
13127 case Intrinsic::ppc_vsx_stxvd2x
:
13128 return expandVSXStoreForLE(N
, DCI
);
13133 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13134 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
13135 N
->getOperand(0).hasOneUse() &&
13136 (N
->getValueType(0) == MVT::i32
|| N
->getValueType(0) == MVT::i16
||
13137 (Subtarget
.hasLDBRX() && Subtarget
.isPPC64() &&
13138 N
->getValueType(0) == MVT::i64
))) {
13139 SDValue Load
= N
->getOperand(0);
13140 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
13141 // Create the byte-swapping load.
13143 LD
->getChain(), // Chain
13144 LD
->getBasePtr(), // Ptr
13145 DAG
.getValueType(N
->getValueType(0)) // VT
13148 DAG
.getMemIntrinsicNode(PPCISD::LBRX
, dl
,
13149 DAG
.getVTList(N
->getValueType(0) == MVT::i64
?
13150 MVT::i64
: MVT::i32
, MVT::Other
),
13151 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
13153 // If this is an i16 load, insert the truncate.
13154 SDValue ResVal
= BSLoad
;
13155 if (N
->getValueType(0) == MVT::i16
)
13156 ResVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i16
, BSLoad
);
13158 // First, combine the bswap away. This makes the value produced by the
13160 DCI
.CombineTo(N
, ResVal
);
13162 // Next, combine the load away, we give it a bogus result value but a real
13163 // chain result. The result value is dead because the bswap is dead.
13164 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
13166 // Return N so it doesn't get rechecked!
13167 return SDValue(N
, 0);
13171 // If a VCMPo node already exists with exactly the same operands as this
13172 // node, use its result instead of this node (VCMPo computes both a CR6 and
13173 // a normal output).
13175 if (!N
->getOperand(0).hasOneUse() &&
13176 !N
->getOperand(1).hasOneUse() &&
13177 !N
->getOperand(2).hasOneUse()) {
13179 // Scan all of the users of the LHS, looking for VCMPo's that match.
13180 SDNode
*VCMPoNode
= nullptr;
13182 SDNode
*LHSN
= N
->getOperand(0).getNode();
13183 for (SDNode::use_iterator UI
= LHSN
->use_begin(), E
= LHSN
->use_end();
13185 if (UI
->getOpcode() == PPCISD::VCMPo
&&
13186 UI
->getOperand(1) == N
->getOperand(1) &&
13187 UI
->getOperand(2) == N
->getOperand(2) &&
13188 UI
->getOperand(0) == N
->getOperand(0)) {
13193 // If there is no VCMPo node, or if the flag value has a single use, don't
13195 if (!VCMPoNode
|| VCMPoNode
->hasNUsesOfValue(0, 1))
13198 // Look at the (necessarily single) use of the flag value. If it has a
13199 // chain, this transformation is more complex. Note that multiple things
13200 // could use the value result, which we should ignore.
13201 SDNode
*FlagUser
= nullptr;
13202 for (SDNode::use_iterator UI
= VCMPoNode
->use_begin();
13203 FlagUser
== nullptr; ++UI
) {
13204 assert(UI
!= VCMPoNode
->use_end() && "Didn't find user!");
13205 SDNode
*User
= *UI
;
13206 for (unsigned i
= 0, e
= User
->getNumOperands(); i
!= e
; ++i
) {
13207 if (User
->getOperand(i
) == SDValue(VCMPoNode
, 1)) {
13214 // If the user is a MFOCRF instruction, we know this is safe.
13215 // Otherwise we give up for right now.
13216 if (FlagUser
->getOpcode() == PPCISD::MFOCRF
)
13217 return SDValue(VCMPoNode
, 0);
13220 case ISD::BRCOND
: {
13221 SDValue Cond
= N
->getOperand(1);
13222 SDValue Target
= N
->getOperand(2);
13224 if (Cond
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13225 cast
<ConstantSDNode
>(Cond
.getOperand(1))->getZExtValue() ==
13226 Intrinsic::ppc_is_decremented_ctr_nonzero
) {
13228 // We now need to make the intrinsic dead (it cannot be instruction
13230 DAG
.ReplaceAllUsesOfValueWith(Cond
.getValue(1), Cond
.getOperand(0));
13231 assert(Cond
.getNode()->hasOneUse() &&
13232 "Counter decrement has more than one use");
13234 return DAG
.getNode(PPCISD::BDNZ
, dl
, MVT::Other
,
13235 N
->getOperand(0), Target
);
13240 // If this is a branch on an altivec predicate comparison, lower this so
13241 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
13242 // lowering is done pre-legalize, because the legalizer lowers the predicate
13243 // compare down to code that is difficult to reassemble.
13244 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
13245 SDValue LHS
= N
->getOperand(2), RHS
= N
->getOperand(3);
13247 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13248 // value. If so, pass-through the AND to get to the intrinsic.
13249 if (LHS
.getOpcode() == ISD::AND
&&
13250 LHS
.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13251 cast
<ConstantSDNode
>(LHS
.getOperand(0).getOperand(1))->getZExtValue() ==
13252 Intrinsic::ppc_is_decremented_ctr_nonzero
&&
13253 isa
<ConstantSDNode
>(LHS
.getOperand(1)) &&
13254 !isNullConstant(LHS
.getOperand(1)))
13255 LHS
= LHS
.getOperand(0);
13257 if (LHS
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13258 cast
<ConstantSDNode
>(LHS
.getOperand(1))->getZExtValue() ==
13259 Intrinsic::ppc_is_decremented_ctr_nonzero
&&
13260 isa
<ConstantSDNode
>(RHS
)) {
13261 assert((CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) &&
13262 "Counter decrement comparison is not EQ or NE");
13264 unsigned Val
= cast
<ConstantSDNode
>(RHS
)->getZExtValue();
13265 bool isBDNZ
= (CC
== ISD::SETEQ
&& Val
) ||
13266 (CC
== ISD::SETNE
&& !Val
);
13268 // We now need to make the intrinsic dead (it cannot be instruction
13270 DAG
.ReplaceAllUsesOfValueWith(LHS
.getValue(1), LHS
.getOperand(0));
13271 assert(LHS
.getNode()->hasOneUse() &&
13272 "Counter decrement has more than one use");
13274 return DAG
.getNode(isBDNZ
? PPCISD::BDNZ
: PPCISD::BDZ
, dl
, MVT::Other
,
13275 N
->getOperand(0), N
->getOperand(4));
13281 if (LHS
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13282 isa
<ConstantSDNode
>(RHS
) && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) &&
13283 getVectorCompareInfo(LHS
, CompareOpc
, isDot
, Subtarget
)) {
13284 assert(isDot
&& "Can't compare against a vector result!");
13286 // If this is a comparison against something other than 0/1, then we know
13287 // that the condition is never/always true.
13288 unsigned Val
= cast
<ConstantSDNode
>(RHS
)->getZExtValue();
13289 if (Val
!= 0 && Val
!= 1) {
13290 if (CC
== ISD::SETEQ
) // Cond never true, remove branch.
13291 return N
->getOperand(0);
13292 // Always !=, turn it into an unconditional branch.
13293 return DAG
.getNode(ISD::BR
, dl
, MVT::Other
,
13294 N
->getOperand(0), N
->getOperand(4));
13297 bool BranchOnWhenPredTrue
= (CC
== ISD::SETEQ
) ^ (Val
== 0);
13299 // Create the PPCISD altivec 'dot' comparison node.
13301 LHS
.getOperand(2), // LHS of compare
13302 LHS
.getOperand(3), // RHS of compare
13303 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
13305 EVT VTs
[] = { LHS
.getOperand(2).getValueType(), MVT::Glue
};
13306 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
);
13308 // Unpack the result based on how the target uses it.
13309 PPC::Predicate CompOpc
;
13310 switch (cast
<ConstantSDNode
>(LHS
.getOperand(1))->getZExtValue()) {
13311 default: // Can't happen, don't crash on invalid number though.
13312 case 0: // Branch on the value of the EQ bit of CR6.
13313 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_EQ
: PPC::PRED_NE
;
13315 case 1: // Branch on the inverted value of the EQ bit of CR6.
13316 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_NE
: PPC::PRED_EQ
;
13318 case 2: // Branch on the value of the LT bit of CR6.
13319 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_LT
: PPC::PRED_GE
;
13321 case 3: // Branch on the inverted value of the LT bit of CR6.
13322 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_GE
: PPC::PRED_LT
;
13326 return DAG
.getNode(PPCISD::COND_BRANCH
, dl
, MVT::Other
, N
->getOperand(0),
13327 DAG
.getConstant(CompOpc
, dl
, MVT::i32
),
13328 DAG
.getRegister(PPC::CR6
, MVT::i32
),
13329 N
->getOperand(4), CompNode
.getValue(1));
13333 case ISD::BUILD_VECTOR
:
13334 return DAGCombineBuildVector(N
, DCI
);
13336 return combineABS(N
, DCI
);
13338 return combineVSelect(N
, DCI
);
13345 PPCTargetLowering::BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
,
13347 SmallVectorImpl
<SDNode
*> &Created
) const {
13348 // fold (sdiv X, pow2)
13349 EVT VT
= N
->getValueType(0);
13350 if (VT
== MVT::i64
&& !Subtarget
.isPPC64())
13352 if ((VT
!= MVT::i32
&& VT
!= MVT::i64
) ||
13353 !(Divisor
.isPowerOf2() || (-Divisor
).isPowerOf2()))
13357 SDValue N0
= N
->getOperand(0);
13359 bool IsNegPow2
= (-Divisor
).isPowerOf2();
13360 unsigned Lg2
= (IsNegPow2
? -Divisor
: Divisor
).countTrailingZeros();
13361 SDValue ShiftAmt
= DAG
.getConstant(Lg2
, DL
, VT
);
13363 SDValue Op
= DAG
.getNode(PPCISD::SRA_ADDZE
, DL
, VT
, N0
, ShiftAmt
);
13364 Created
.push_back(Op
.getNode());
13367 Op
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Op
);
13368 Created
.push_back(Op
.getNode());
13374 //===----------------------------------------------------------------------===//
13375 // Inline Assembly Support
13376 //===----------------------------------------------------------------------===//
13378 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
13380 const APInt
&DemandedElts
,
13381 const SelectionDAG
&DAG
,
13382 unsigned Depth
) const {
13384 switch (Op
.getOpcode()) {
13386 case PPCISD::LBRX
: {
13387 // lhbrx is known to have the top bits cleared out.
13388 if (cast
<VTSDNode
>(Op
.getOperand(2))->getVT() == MVT::i16
)
13389 Known
.Zero
= 0xFFFF0000;
13392 case ISD::INTRINSIC_WO_CHAIN
: {
13393 switch (cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue()) {
13395 case Intrinsic::ppc_altivec_vcmpbfp_p
:
13396 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
13397 case Intrinsic::ppc_altivec_vcmpequb_p
:
13398 case Intrinsic::ppc_altivec_vcmpequh_p
:
13399 case Intrinsic::ppc_altivec_vcmpequw_p
:
13400 case Intrinsic::ppc_altivec_vcmpequd_p
:
13401 case Intrinsic::ppc_altivec_vcmpgefp_p
:
13402 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
13403 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
13404 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
13405 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
13406 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
13407 case Intrinsic::ppc_altivec_vcmpgtub_p
:
13408 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
13409 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
13410 case Intrinsic::ppc_altivec_vcmpgtud_p
:
13411 Known
.Zero
= ~1U; // All bits but the low one are known to be zero.
13418 unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop
*ML
) const {
13419 switch (Subtarget
.getDarwinDirective()) {
13422 case PPC::DIR_PWR4
:
13423 case PPC::DIR_PWR5
:
13424 case PPC::DIR_PWR5X
:
13425 case PPC::DIR_PWR6
:
13426 case PPC::DIR_PWR6X
:
13427 case PPC::DIR_PWR7
:
13428 case PPC::DIR_PWR8
:
13429 case PPC::DIR_PWR9
: {
13433 const PPCInstrInfo
*TII
= Subtarget
.getInstrInfo();
13435 // For small loops (between 5 and 8 instructions), align to a 32-byte
13436 // boundary so that the entire loop fits in one instruction-cache line.
13437 uint64_t LoopSize
= 0;
13438 for (auto I
= ML
->block_begin(), IE
= ML
->block_end(); I
!= IE
; ++I
)
13439 for (auto J
= (*I
)->begin(), JE
= (*I
)->end(); J
!= JE
; ++J
) {
13440 LoopSize
+= TII
->getInstSizeInBytes(*J
);
13445 if (LoopSize
> 16 && LoopSize
<= 32)
13452 return TargetLowering::getPrefLoopAlignment(ML
);
13455 /// getConstraintType - Given a constraint, return the type of
13456 /// constraint it is for this target.
13457 PPCTargetLowering::ConstraintType
13458 PPCTargetLowering::getConstraintType(StringRef Constraint
) const {
13459 if (Constraint
.size() == 1) {
13460 switch (Constraint
[0]) {
13468 return C_RegisterClass
;
13470 // FIXME: While Z does indicate a memory constraint, it specifically
13471 // indicates an r+r address (used in conjunction with the 'y' modifier
13472 // in the replacement string). Currently, we're forcing the base
13473 // register to be r0 in the asm printer (which is interpreted as zero)
13474 // and forming the complete address in the second register. This is
13478 } else if (Constraint
== "wc") { // individual CR bits.
13479 return C_RegisterClass
;
13480 } else if (Constraint
== "wa" || Constraint
== "wd" ||
13481 Constraint
== "wf" || Constraint
== "ws" ||
13482 Constraint
== "wi") {
13483 return C_RegisterClass
; // VSX registers.
13485 return TargetLowering::getConstraintType(Constraint
);
13488 /// Examine constraint type and operand type and determine a weight value.
13489 /// This object must already have been set up with the operand type
13490 /// and the current alternative constraint selected.
13491 TargetLowering::ConstraintWeight
13492 PPCTargetLowering::getSingleConstraintMatchWeight(
13493 AsmOperandInfo
&info
, const char *constraint
) const {
13494 ConstraintWeight weight
= CW_Invalid
;
13495 Value
*CallOperandVal
= info
.CallOperandVal
;
13496 // If we don't have a value, we can't do a match,
13497 // but allow it at the lowest weight.
13498 if (!CallOperandVal
)
13500 Type
*type
= CallOperandVal
->getType();
13502 // Look at the constraint type.
13503 if (StringRef(constraint
) == "wc" && type
->isIntegerTy(1))
13504 return CW_Register
; // an individual CR bit.
13505 else if ((StringRef(constraint
) == "wa" ||
13506 StringRef(constraint
) == "wd" ||
13507 StringRef(constraint
) == "wf") &&
13508 type
->isVectorTy())
13509 return CW_Register
;
13510 else if (StringRef(constraint
) == "ws" && type
->isDoubleTy())
13511 return CW_Register
;
13512 else if (StringRef(constraint
) == "wi" && type
->isIntegerTy(64))
13513 return CW_Register
; // just hold 64-bit integers data.
13515 switch (*constraint
) {
13517 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
13520 if (type
->isIntegerTy())
13521 weight
= CW_Register
;
13524 if (type
->isFloatTy())
13525 weight
= CW_Register
;
13528 if (type
->isDoubleTy())
13529 weight
= CW_Register
;
13532 if (type
->isVectorTy())
13533 weight
= CW_Register
;
13536 weight
= CW_Register
;
13539 weight
= CW_Memory
;
13545 std::pair
<unsigned, const TargetRegisterClass
*>
13546 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
13547 StringRef Constraint
,
13549 if (Constraint
.size() == 1) {
13550 // GCC RS6000 Constraint Letters
13551 switch (Constraint
[0]) {
13552 case 'b': // R1-R31
13553 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
13554 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass
);
13555 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass
);
13556 case 'r': // R0-R31
13557 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
13558 return std::make_pair(0U, &PPC::G8RCRegClass
);
13559 return std::make_pair(0U, &PPC::GPRCRegClass
);
13560 // 'd' and 'f' constraints are both defined to be "the floating point
13561 // registers", where one is for 32-bit and the other for 64-bit. We don't
13562 // really care overly much here so just give them all the same reg classes.
13565 if (Subtarget
.hasSPE()) {
13566 if (VT
== MVT::f32
|| VT
== MVT::i32
)
13567 return std::make_pair(0U, &PPC::SPE4RCRegClass
);
13568 if (VT
== MVT::f64
|| VT
== MVT::i64
)
13569 return std::make_pair(0U, &PPC::SPERCRegClass
);
13571 if (VT
== MVT::f32
|| VT
== MVT::i32
)
13572 return std::make_pair(0U, &PPC::F4RCRegClass
);
13573 if (VT
== MVT::f64
|| VT
== MVT::i64
)
13574 return std::make_pair(0U, &PPC::F8RCRegClass
);
13575 if (VT
== MVT::v4f64
&& Subtarget
.hasQPX())
13576 return std::make_pair(0U, &PPC::QFRCRegClass
);
13577 if (VT
== MVT::v4f32
&& Subtarget
.hasQPX())
13578 return std::make_pair(0U, &PPC::QSRCRegClass
);
13582 if (VT
== MVT::v4f64
&& Subtarget
.hasQPX())
13583 return std::make_pair(0U, &PPC::QFRCRegClass
);
13584 if (VT
== MVT::v4f32
&& Subtarget
.hasQPX())
13585 return std::make_pair(0U, &PPC::QSRCRegClass
);
13586 if (Subtarget
.hasAltivec())
13587 return std::make_pair(0U, &PPC::VRRCRegClass
);
13590 return std::make_pair(0U, &PPC::CRRCRegClass
);
13592 } else if (Constraint
== "wc" && Subtarget
.useCRBits()) {
13593 // An individual CR bit.
13594 return std::make_pair(0U, &PPC::CRBITRCRegClass
);
13595 } else if ((Constraint
== "wa" || Constraint
== "wd" ||
13596 Constraint
== "wf" || Constraint
== "wi") &&
13597 Subtarget
.hasVSX()) {
13598 return std::make_pair(0U, &PPC::VSRCRegClass
);
13599 } else if (Constraint
== "ws" && Subtarget
.hasVSX()) {
13600 if (VT
== MVT::f32
&& Subtarget
.hasP8Vector())
13601 return std::make_pair(0U, &PPC::VSSRCRegClass
);
13603 return std::make_pair(0U, &PPC::VSFRCRegClass
);
13606 std::pair
<unsigned, const TargetRegisterClass
*> R
=
13607 TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
13609 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
13610 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
13611 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
13613 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
13614 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
13615 if (R
.first
&& VT
== MVT::i64
&& Subtarget
.isPPC64() &&
13616 PPC::GPRCRegClass
.contains(R
.first
))
13617 return std::make_pair(TRI
->getMatchingSuperReg(R
.first
,
13618 PPC::sub_32
, &PPC::G8RCRegClass
),
13619 &PPC::G8RCRegClass
);
13621 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
13622 if (!R
.second
&& StringRef("{cc}").equals_lower(Constraint
)) {
13623 R
.first
= PPC::CR0
;
13624 R
.second
= &PPC::CRRCRegClass
;
13630 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
13631 /// vector. If it is invalid, don't add anything to Ops.
13632 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
13633 std::string
&Constraint
,
13634 std::vector
<SDValue
>&Ops
,
13635 SelectionDAG
&DAG
) const {
13638 // Only support length 1 constraints.
13639 if (Constraint
.length() > 1) return;
13641 char Letter
= Constraint
[0];
13652 ConstantSDNode
*CST
= dyn_cast
<ConstantSDNode
>(Op
);
13653 if (!CST
) return; // Must be an immediate to match.
13655 int64_t Value
= CST
->getSExtValue();
13656 EVT TCVT
= MVT::i64
; // All constants taken to be 64 bits so that negative
13657 // numbers are printed as such.
13659 default: llvm_unreachable("Unknown constraint letter!");
13660 case 'I': // "I" is a signed 16-bit constant.
13661 if (isInt
<16>(Value
))
13662 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
13664 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
13665 if (isShiftedUInt
<16, 16>(Value
))
13666 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
13668 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
13669 if (isShiftedInt
<16, 16>(Value
))
13670 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
13672 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
13673 if (isUInt
<16>(Value
))
13674 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
13676 case 'M': // "M" is a constant that is greater than 31.
13678 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
13680 case 'N': // "N" is a positive constant that is an exact power of two.
13681 if (Value
> 0 && isPowerOf2_64(Value
))
13682 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
13684 case 'O': // "O" is the constant zero.
13686 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
13688 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
13689 if (isInt
<16>(-Value
))
13690 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
13697 if (Result
.getNode()) {
13698 Ops
.push_back(Result
);
13702 // Handle standard constraint letters.
13703 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
13706 // isLegalAddressingMode - Return true if the addressing mode represented
13707 // by AM is legal for this target, for a load/store of the specified type.
13708 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
13709 const AddrMode
&AM
, Type
*Ty
,
13710 unsigned AS
, Instruction
*I
) const {
13711 // PPC does not allow r+i addressing modes for vectors!
13712 if (Ty
->isVectorTy() && AM
.BaseOffs
!= 0)
13715 // PPC allows a sign-extended 16-bit immediate field.
13716 if (AM
.BaseOffs
<= -(1LL << 16) || AM
.BaseOffs
>= (1LL << 16)-1)
13719 // No global is ever allowed as a base.
13723 // PPC only support r+r,
13724 switch (AM
.Scale
) {
13725 case 0: // "r+i" or just "i", depending on HasBaseReg.
13728 if (AM
.HasBaseReg
&& AM
.BaseOffs
) // "r+r+i" is not allowed.
13730 // Otherwise we have r+r or r+i.
13733 if (AM
.HasBaseReg
|| AM
.BaseOffs
) // 2*r+r or 2*r+i is not allowed.
13735 // Allow 2*r as r+r.
13738 // No other scales are supported.
13745 SDValue
PPCTargetLowering::LowerRETURNADDR(SDValue Op
,
13746 SelectionDAG
&DAG
) const {
13747 MachineFunction
&MF
= DAG
.getMachineFunction();
13748 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
13749 MFI
.setReturnAddressIsTaken(true);
13751 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
13755 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
13757 // Make sure the function does not optimize away the store of the RA to
13759 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
13760 FuncInfo
->setLRStoreRequired();
13761 bool isPPC64
= Subtarget
.isPPC64();
13762 auto PtrVT
= getPointerTy(MF
.getDataLayout());
13765 SDValue FrameAddr
= LowerFRAMEADDR(Op
, DAG
);
13767 DAG
.getConstant(Subtarget
.getFrameLowering()->getReturnSaveOffset(), dl
,
13768 isPPC64
? MVT::i64
: MVT::i32
);
13769 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(),
13770 DAG
.getNode(ISD::ADD
, dl
, PtrVT
, FrameAddr
, Offset
),
13771 MachinePointerInfo());
13774 // Just load the return address off the stack.
13775 SDValue RetAddrFI
= getReturnAddrFrameIndex(DAG
);
13776 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), RetAddrFI
,
13777 MachinePointerInfo());
13780 SDValue
PPCTargetLowering::LowerFRAMEADDR(SDValue Op
,
13781 SelectionDAG
&DAG
) const {
13783 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
13785 MachineFunction
&MF
= DAG
.getMachineFunction();
13786 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
13787 MFI
.setFrameAddressIsTaken(true);
13789 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
13790 bool isPPC64
= PtrVT
== MVT::i64
;
13792 // Naked functions never have a frame pointer, and so we use r1. For all
13793 // other functions, this decision must be delayed until during PEI.
13795 if (MF
.getFunction().hasFnAttribute(Attribute::Naked
))
13796 FrameReg
= isPPC64
? PPC::X1
: PPC::R1
;
13798 FrameReg
= isPPC64
? PPC::FP8
: PPC::FP
;
13800 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, FrameReg
,
13803 FrameAddr
= DAG
.getLoad(Op
.getValueType(), dl
, DAG
.getEntryNode(),
13804 FrameAddr
, MachinePointerInfo());
13808 // FIXME? Maybe this could be a TableGen attribute on some registers and
13809 // this table could be generated automatically from RegInfo.
13810 unsigned PPCTargetLowering::getRegisterByName(const char* RegName
, EVT VT
,
13811 SelectionDAG
&DAG
) const {
13812 bool isPPC64
= Subtarget
.isPPC64();
13813 bool isDarwinABI
= Subtarget
.isDarwinABI();
13815 if ((isPPC64
&& VT
!= MVT::i64
&& VT
!= MVT::i32
) ||
13816 (!isPPC64
&& VT
!= MVT::i32
))
13817 report_fatal_error("Invalid register global variable type");
13819 bool is64Bit
= isPPC64
&& VT
== MVT::i64
;
13820 unsigned Reg
= StringSwitch
<unsigned>(RegName
)
13821 .Case("r1", is64Bit
? PPC::X1
: PPC::R1
)
13822 .Case("r2", (isDarwinABI
|| isPPC64
) ? 0 : PPC::R2
)
13823 .Case("r13", (!isPPC64
&& isDarwinABI
) ? 0 :
13824 (is64Bit
? PPC::X13
: PPC::R13
))
13829 report_fatal_error("Invalid register name global variable");
13832 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA
) const {
13833 // 32-bit SVR4 ABI access everything as got-indirect.
13834 if (Subtarget
.isSVR4ABI() && !Subtarget
.isPPC64())
13837 CodeModel::Model CModel
= getTargetMachine().getCodeModel();
13838 // If it is small or large code model, module locals are accessed
13839 // indirectly by loading their address from .toc/.got. The difference
13840 // is that for large code model we have ADDISTocHa + LDtocL and for
13841 // small code model we simply have LDtoc.
13842 if (CModel
== CodeModel::Small
|| CModel
== CodeModel::Large
)
13845 // JumpTable and BlockAddress are accessed as got-indirect.
13846 if (isa
<JumpTableSDNode
>(GA
) || isa
<BlockAddressSDNode
>(GA
))
13849 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(GA
)) {
13850 const GlobalValue
*GV
= G
->getGlobal();
13851 unsigned char GVFlags
= Subtarget
.classifyGlobalReference(GV
);
13852 // The NLP flag indicates that a global access has to use an
13853 // extra indirection.
13854 if (GVFlags
& PPCII::MO_NLP_FLAG
)
13862 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
13863 // The PowerPC target isn't yet aware of offsets.
13867 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
13869 MachineFunction
&MF
,
13870 unsigned Intrinsic
) const {
13871 switch (Intrinsic
) {
13872 case Intrinsic::ppc_qpx_qvlfd
:
13873 case Intrinsic::ppc_qpx_qvlfs
:
13874 case Intrinsic::ppc_qpx_qvlfcd
:
13875 case Intrinsic::ppc_qpx_qvlfcs
:
13876 case Intrinsic::ppc_qpx_qvlfiwa
:
13877 case Intrinsic::ppc_qpx_qvlfiwz
:
13878 case Intrinsic::ppc_altivec_lvx
:
13879 case Intrinsic::ppc_altivec_lvxl
:
13880 case Intrinsic::ppc_altivec_lvebx
:
13881 case Intrinsic::ppc_altivec_lvehx
:
13882 case Intrinsic::ppc_altivec_lvewx
:
13883 case Intrinsic::ppc_vsx_lxvd2x
:
13884 case Intrinsic::ppc_vsx_lxvw4x
: {
13886 switch (Intrinsic
) {
13887 case Intrinsic::ppc_altivec_lvebx
:
13890 case Intrinsic::ppc_altivec_lvehx
:
13893 case Intrinsic::ppc_altivec_lvewx
:
13896 case Intrinsic::ppc_vsx_lxvd2x
:
13899 case Intrinsic::ppc_qpx_qvlfd
:
13902 case Intrinsic::ppc_qpx_qvlfs
:
13905 case Intrinsic::ppc_qpx_qvlfcd
:
13908 case Intrinsic::ppc_qpx_qvlfcs
:
13916 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
13918 Info
.ptrVal
= I
.getArgOperand(0);
13919 Info
.offset
= -VT
.getStoreSize()+1;
13920 Info
.size
= 2*VT
.getStoreSize()-1;
13922 Info
.flags
= MachineMemOperand::MOLoad
;
13925 case Intrinsic::ppc_qpx_qvlfda
:
13926 case Intrinsic::ppc_qpx_qvlfsa
:
13927 case Intrinsic::ppc_qpx_qvlfcda
:
13928 case Intrinsic::ppc_qpx_qvlfcsa
:
13929 case Intrinsic::ppc_qpx_qvlfiwaa
:
13930 case Intrinsic::ppc_qpx_qvlfiwza
: {
13932 switch (Intrinsic
) {
13933 case Intrinsic::ppc_qpx_qvlfda
:
13936 case Intrinsic::ppc_qpx_qvlfsa
:
13939 case Intrinsic::ppc_qpx_qvlfcda
:
13942 case Intrinsic::ppc_qpx_qvlfcsa
:
13950 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
13952 Info
.ptrVal
= I
.getArgOperand(0);
13954 Info
.size
= VT
.getStoreSize();
13956 Info
.flags
= MachineMemOperand::MOLoad
;
13959 case Intrinsic::ppc_qpx_qvstfd
:
13960 case Intrinsic::ppc_qpx_qvstfs
:
13961 case Intrinsic::ppc_qpx_qvstfcd
:
13962 case Intrinsic::ppc_qpx_qvstfcs
:
13963 case Intrinsic::ppc_qpx_qvstfiw
:
13964 case Intrinsic::ppc_altivec_stvx
:
13965 case Intrinsic::ppc_altivec_stvxl
:
13966 case Intrinsic::ppc_altivec_stvebx
:
13967 case Intrinsic::ppc_altivec_stvehx
:
13968 case Intrinsic::ppc_altivec_stvewx
:
13969 case Intrinsic::ppc_vsx_stxvd2x
:
13970 case Intrinsic::ppc_vsx_stxvw4x
: {
13972 switch (Intrinsic
) {
13973 case Intrinsic::ppc_altivec_stvebx
:
13976 case Intrinsic::ppc_altivec_stvehx
:
13979 case Intrinsic::ppc_altivec_stvewx
:
13982 case Intrinsic::ppc_vsx_stxvd2x
:
13985 case Intrinsic::ppc_qpx_qvstfd
:
13988 case Intrinsic::ppc_qpx_qvstfs
:
13991 case Intrinsic::ppc_qpx_qvstfcd
:
13994 case Intrinsic::ppc_qpx_qvstfcs
:
14002 Info
.opc
= ISD::INTRINSIC_VOID
;
14004 Info
.ptrVal
= I
.getArgOperand(1);
14005 Info
.offset
= -VT
.getStoreSize()+1;
14006 Info
.size
= 2*VT
.getStoreSize()-1;
14008 Info
.flags
= MachineMemOperand::MOStore
;
14011 case Intrinsic::ppc_qpx_qvstfda
:
14012 case Intrinsic::ppc_qpx_qvstfsa
:
14013 case Intrinsic::ppc_qpx_qvstfcda
:
14014 case Intrinsic::ppc_qpx_qvstfcsa
:
14015 case Intrinsic::ppc_qpx_qvstfiwa
: {
14017 switch (Intrinsic
) {
14018 case Intrinsic::ppc_qpx_qvstfda
:
14021 case Intrinsic::ppc_qpx_qvstfsa
:
14024 case Intrinsic::ppc_qpx_qvstfcda
:
14027 case Intrinsic::ppc_qpx_qvstfcsa
:
14035 Info
.opc
= ISD::INTRINSIC_VOID
;
14037 Info
.ptrVal
= I
.getArgOperand(1);
14039 Info
.size
= VT
.getStoreSize();
14041 Info
.flags
= MachineMemOperand::MOStore
;
14051 /// getOptimalMemOpType - Returns the target specific optimal type for load
14052 /// and store operations as a result of memset, memcpy, and memmove
14053 /// lowering. If DstAlign is zero that means it's safe to destination
14054 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14055 /// means there isn't a need to check it against alignment requirement,
14056 /// probably because the source does not need to be loaded. If 'IsMemset' is
14057 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14058 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14059 /// source is constant so it does not need to be loaded.
14060 /// It returns EVT::Other if the type should be determined using generic
14061 /// target-independent logic.
14062 EVT
PPCTargetLowering::getOptimalMemOpType(uint64_t Size
,
14063 unsigned DstAlign
, unsigned SrcAlign
,
14064 bool IsMemset
, bool ZeroMemset
,
14066 MachineFunction
&MF
) const {
14067 if (getTargetMachine().getOptLevel() != CodeGenOpt::None
) {
14068 const Function
&F
= MF
.getFunction();
14069 // When expanding a memset, require at least two QPX instructions to cover
14070 // the cost of loading the value to be stored from the constant pool.
14071 if (Subtarget
.hasQPX() && Size
>= 32 && (!IsMemset
|| Size
>= 64) &&
14072 (!SrcAlign
|| SrcAlign
>= 32) && (!DstAlign
|| DstAlign
>= 32) &&
14073 !F
.hasFnAttribute(Attribute::NoImplicitFloat
)) {
14077 // We should use Altivec/VSX loads and stores when available. For unaligned
14078 // addresses, unaligned VSX loads are only fast starting with the P8.
14079 if (Subtarget
.hasAltivec() && Size
>= 16 &&
14080 (((!SrcAlign
|| SrcAlign
>= 16) && (!DstAlign
|| DstAlign
>= 16)) ||
14081 ((IsMemset
&& Subtarget
.hasVSX()) || Subtarget
.hasP8Vector())))
14085 if (Subtarget
.isPPC64()) {
14092 /// Returns true if it is beneficial to convert a load of a constant
14093 /// to just the constant itself.
14094 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt
&Imm
,
14096 assert(Ty
->isIntegerTy());
14098 unsigned BitSize
= Ty
->getPrimitiveSizeInBits();
14099 return !(BitSize
== 0 || BitSize
> 64);
14102 bool PPCTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
14103 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
14105 unsigned NumBits1
= Ty1
->getPrimitiveSizeInBits();
14106 unsigned NumBits2
= Ty2
->getPrimitiveSizeInBits();
14107 return NumBits1
== 64 && NumBits2
== 32;
14110 bool PPCTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
14111 if (!VT1
.isInteger() || !VT2
.isInteger())
14113 unsigned NumBits1
= VT1
.getSizeInBits();
14114 unsigned NumBits2
= VT2
.getSizeInBits();
14115 return NumBits1
== 64 && NumBits2
== 32;
14118 bool PPCTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
14119 // Generally speaking, zexts are not free, but they are free when they can be
14120 // folded with other operations.
14121 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Val
)) {
14122 EVT MemVT
= LD
->getMemoryVT();
14123 if ((MemVT
== MVT::i1
|| MemVT
== MVT::i8
|| MemVT
== MVT::i16
||
14124 (Subtarget
.isPPC64() && MemVT
== MVT::i32
)) &&
14125 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
14126 LD
->getExtensionType() == ISD::ZEXTLOAD
))
14130 // FIXME: Add other cases...
14131 // - 32-bit shifts with a zext to i64
14132 // - zext after ctlz, bswap, etc.
14133 // - zext after and by a constant mask
14135 return TargetLowering::isZExtFree(Val
, VT2
);
14138 bool PPCTargetLowering::isFPExtFree(EVT DestVT
, EVT SrcVT
) const {
14139 assert(DestVT
.isFloatingPoint() && SrcVT
.isFloatingPoint() &&
14140 "invalid fpext types");
14141 // Extending to float128 is not free.
14142 if (DestVT
== MVT::f128
)
14147 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
14148 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
14151 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
14152 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
14155 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT
,
14158 bool *Fast
) const {
14159 if (DisablePPCUnaligned
)
14162 // PowerPC supports unaligned memory access for simple non-vector types.
14163 // Although accessing unaligned addresses is not as efficient as accessing
14164 // aligned addresses, it is generally more efficient than manual expansion,
14165 // and generally only traps for software emulation when crossing page
14168 if (!VT
.isSimple())
14171 if (VT
.getSimpleVT().isVector()) {
14172 if (Subtarget
.hasVSX()) {
14173 if (VT
!= MVT::v2f64
&& VT
!= MVT::v2i64
&&
14174 VT
!= MVT::v4f32
&& VT
!= MVT::v4i32
)
14181 if (VT
== MVT::ppcf128
)
14190 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT
) const {
14191 VT
= VT
.getScalarType();
14193 if (!VT
.isSimple())
14196 switch (VT
.getSimpleVT().SimpleTy
) {
14201 return (EnableQuadPrecision
&& Subtarget
.hasP9Vector());
14210 PPCTargetLowering::getScratchRegisters(CallingConv::ID
) const {
14211 // LR is a callee-save register, but we must treat it as clobbered by any call
14212 // site. Hence we include LR in the scratch registers, which are in turn added
14213 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14214 // to CTR, which is used by any indirect call.
14215 static const MCPhysReg ScratchRegs
[] = {
14216 PPC::X12
, PPC::LR8
, PPC::CTR8
, 0
14219 return ScratchRegs
;
14222 unsigned PPCTargetLowering::getExceptionPointerRegister(
14223 const Constant
*PersonalityFn
) const {
14224 return Subtarget
.isPPC64() ? PPC::X3
: PPC::R3
;
14227 unsigned PPCTargetLowering::getExceptionSelectorRegister(
14228 const Constant
*PersonalityFn
) const {
14229 return Subtarget
.isPPC64() ? PPC::X4
: PPC::R4
;
14233 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
14234 EVT VT
, unsigned DefinedValues
) const {
14235 if (VT
== MVT::v2i64
)
14236 return Subtarget
.hasDirectMove(); // Don't need stack ops with direct moves
14238 if (Subtarget
.hasVSX() || Subtarget
.hasQPX())
14241 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT
, DefinedValues
);
14244 Sched::Preference
PPCTargetLowering::getSchedulingPreference(SDNode
*N
) const {
14245 if (DisableILPPref
|| Subtarget
.enableMachineScheduler())
14246 return TargetLowering::getSchedulingPreference(N
);
14251 // Create a fast isel object.
14253 PPCTargetLowering::createFastISel(FunctionLoweringInfo
&FuncInfo
,
14254 const TargetLibraryInfo
*LibInfo
) const {
14255 return PPC::createFastISel(FuncInfo
, LibInfo
);
14258 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock
*Entry
) const {
14259 if (Subtarget
.isDarwinABI()) return;
14260 if (!Subtarget
.isPPC64()) return;
14262 // Update IsSplitCSR in PPCFunctionInfo
14263 PPCFunctionInfo
*PFI
= Entry
->getParent()->getInfo
<PPCFunctionInfo
>();
14264 PFI
->setIsSplitCSR(true);
14267 void PPCTargetLowering::insertCopiesSplitCSR(
14268 MachineBasicBlock
*Entry
,
14269 const SmallVectorImpl
<MachineBasicBlock
*> &Exits
) const {
14270 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
14271 const MCPhysReg
*IStart
= TRI
->getCalleeSavedRegsViaCopy(Entry
->getParent());
14275 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
14276 MachineRegisterInfo
*MRI
= &Entry
->getParent()->getRegInfo();
14277 MachineBasicBlock::iterator MBBI
= Entry
->begin();
14278 for (const MCPhysReg
*I
= IStart
; *I
; ++I
) {
14279 const TargetRegisterClass
*RC
= nullptr;
14280 if (PPC::G8RCRegClass
.contains(*I
))
14281 RC
= &PPC::G8RCRegClass
;
14282 else if (PPC::F8RCRegClass
.contains(*I
))
14283 RC
= &PPC::F8RCRegClass
;
14284 else if (PPC::CRRCRegClass
.contains(*I
))
14285 RC
= &PPC::CRRCRegClass
;
14286 else if (PPC::VRRCRegClass
.contains(*I
))
14287 RC
= &PPC::VRRCRegClass
;
14289 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
14291 unsigned NewVR
= MRI
->createVirtualRegister(RC
);
14292 // Create copy from CSR to a virtual register.
14293 // FIXME: this currently does not emit CFI pseudo-instructions, it works
14294 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14295 // nounwind. If we want to generalize this later, we may need to emit
14296 // CFI pseudo-instructions.
14297 assert(Entry
->getParent()->getFunction().hasFnAttribute(
14298 Attribute::NoUnwind
) &&
14299 "Function should be nounwind in insertCopiesSplitCSR!");
14300 Entry
->addLiveIn(*I
);
14301 BuildMI(*Entry
, MBBI
, DebugLoc(), TII
->get(TargetOpcode::COPY
), NewVR
)
14304 // Insert the copy-back instructions right before the terminator
14305 for (auto *Exit
: Exits
)
14306 BuildMI(*Exit
, Exit
->getFirstTerminator(), DebugLoc(),
14307 TII
->get(TargetOpcode::COPY
), *I
)
14312 // Override to enable LOAD_STACK_GUARD lowering on Linux.
14313 bool PPCTargetLowering::useLoadStackGuardNode() const {
14314 if (!Subtarget
.isTargetLinux())
14315 return TargetLowering::useLoadStackGuardNode();
14319 // Override to disable global variable loading on Linux.
14320 void PPCTargetLowering::insertSSPDeclarations(Module
&M
) const {
14321 if (!Subtarget
.isTargetLinux())
14322 return TargetLowering::insertSSPDeclarations(M
);
14325 bool PPCTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
) const {
14326 if (!VT
.isSimple() || !Subtarget
.hasVSX())
14329 switch(VT
.getSimpleVT().SimpleTy
) {
14331 // For FP types that are currently not supported by PPC backend, return
14332 // false. Examples: f16, f80.
14337 return Imm
.isPosZero();
14341 // For vector shift operation op, fold
14342 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
14343 static SDValue
stripModuloOnShift(const TargetLowering
&TLI
, SDNode
*N
,
14344 SelectionDAG
&DAG
) {
14345 SDValue N0
= N
->getOperand(0);
14346 SDValue N1
= N
->getOperand(1);
14347 EVT VT
= N0
.getValueType();
14348 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
14349 unsigned Opcode
= N
->getOpcode();
14350 unsigned TargetOpcode
;
14354 llvm_unreachable("Unexpected shift operation");
14356 TargetOpcode
= PPCISD::SHL
;
14359 TargetOpcode
= PPCISD::SRL
;
14362 TargetOpcode
= PPCISD::SRA
;
14366 if (VT
.isVector() && TLI
.isOperationLegal(Opcode
, VT
) &&
14367 N1
->getOpcode() == ISD::AND
)
14368 if (ConstantSDNode
*Mask
= isConstOrConstSplat(N1
->getOperand(1)))
14369 if (Mask
->getZExtValue() == OpSizeInBits
- 1)
14370 return DAG
.getNode(TargetOpcode
, SDLoc(N
), VT
, N0
, N1
->getOperand(0));
14375 SDValue
PPCTargetLowering::combineSHL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
14376 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
14379 SDValue N0
= N
->getOperand(0);
14380 ConstantSDNode
*CN1
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
14381 if (!Subtarget
.isISA3_0() ||
14382 N0
.getOpcode() != ISD::SIGN_EXTEND
||
14383 N0
.getOperand(0).getValueType() != MVT::i32
||
14384 CN1
== nullptr || N
->getValueType(0) != MVT::i64
)
14387 // We can't save an operation here if the value is already extended, and
14388 // the existing shift is easier to combine.
14389 SDValue ExtsSrc
= N0
.getOperand(0);
14390 if (ExtsSrc
.getOpcode() == ISD::TRUNCATE
&&
14391 ExtsSrc
.getOperand(0).getOpcode() == ISD::AssertSext
)
14395 SDValue ShiftBy
= SDValue(CN1
, 0);
14396 // We want the shift amount to be i32 on the extswli, but the shift could
14398 if (ShiftBy
.getValueType() == MVT::i64
)
14399 ShiftBy
= DCI
.DAG
.getConstant(CN1
->getZExtValue(), DL
, MVT::i32
);
14401 return DCI
.DAG
.getNode(PPCISD::EXTSWSLI
, DL
, MVT::i64
, N0
->getOperand(0),
14405 SDValue
PPCTargetLowering::combineSRA(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
14406 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
14412 SDValue
PPCTargetLowering::combineSRL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
14413 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
14419 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
14420 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
14421 // When C is zero, the equation (addi Z, -C) can be simplified to Z
14422 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
14423 static SDValue
combineADDToADDZE(SDNode
*N
, SelectionDAG
&DAG
,
14424 const PPCSubtarget
&Subtarget
) {
14425 if (!Subtarget
.isPPC64())
14428 SDValue LHS
= N
->getOperand(0);
14429 SDValue RHS
= N
->getOperand(1);
14431 auto isZextOfCompareWithConstant
= [](SDValue Op
) {
14432 if (Op
.getOpcode() != ISD::ZERO_EXTEND
|| !Op
.hasOneUse() ||
14433 Op
.getValueType() != MVT::i64
)
14436 SDValue Cmp
= Op
.getOperand(0);
14437 if (Cmp
.getOpcode() != ISD::SETCC
|| !Cmp
.hasOneUse() ||
14438 Cmp
.getOperand(0).getValueType() != MVT::i64
)
14441 if (auto *Constant
= dyn_cast
<ConstantSDNode
>(Cmp
.getOperand(1))) {
14442 int64_t NegConstant
= 0 - Constant
->getSExtValue();
14443 // Due to the limitations of the addi instruction,
14444 // -C is required to be [-32768, 32767].
14445 return isInt
<16>(NegConstant
);
14451 bool LHSHasPattern
= isZextOfCompareWithConstant(LHS
);
14452 bool RHSHasPattern
= isZextOfCompareWithConstant(RHS
);
14454 // If there is a pattern, canonicalize a zext operand to the RHS.
14455 if (LHSHasPattern
&& !RHSHasPattern
)
14456 std::swap(LHS
, RHS
);
14457 else if (!LHSHasPattern
&& !RHSHasPattern
)
14461 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Glue
);
14462 SDValue Cmp
= RHS
.getOperand(0);
14463 SDValue Z
= Cmp
.getOperand(0);
14464 auto *Constant
= dyn_cast
<ConstantSDNode
>(Cmp
.getOperand(1));
14466 assert(Constant
&& "Constant Should not be a null pointer.");
14467 int64_t NegConstant
= 0 - Constant
->getSExtValue();
14469 switch(cast
<CondCodeSDNode
>(Cmp
.getOperand(2))->get()) {
14473 // --> addze X, (addic Z, -1).carry
14475 // add X, (zext(setne Z, C))--
14476 // \ when -32768 <= -C <= 32767 && C != 0
14477 // --> addze X, (addic (addi Z, -C), -1).carry
14478 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
14479 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
14480 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
14481 SDValue Addc
= DAG
.getNode(ISD::ADDC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
14482 AddOrZ
, DAG
.getConstant(-1ULL, DL
, MVT::i64
));
14483 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
14484 SDValue(Addc
.getNode(), 1));
14488 // --> addze X, (subfic Z, 0).carry
14490 // add X, (zext(sete Z, C))--
14491 // \ when -32768 <= -C <= 32767 && C != 0
14492 // --> addze X, (subfic (addi Z, -C), 0).carry
14493 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
14494 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
14495 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
14496 SDValue Subc
= DAG
.getNode(ISD::SUBC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
14497 DAG
.getConstant(0, DL
, MVT::i64
), AddOrZ
);
14498 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
14499 SDValue(Subc
.getNode(), 1));
14506 SDValue
PPCTargetLowering::combineADD(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
14507 if (auto Value
= combineADDToADDZE(N
, DCI
.DAG
, Subtarget
))
14513 // Detect TRUNCATE operations on bitcasts of float128 values.
14514 // What we are looking for here is the situtation where we extract a subset
14515 // of bits from a 128 bit float.
14516 // This can be of two forms:
14517 // 1) BITCAST of f128 feeding TRUNCATE
14518 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
14519 // The reason this is required is because we do not have a legal i128 type
14520 // and so we want to prevent having to store the f128 and then reload part
14522 SDValue
PPCTargetLowering::combineTRUNCATE(SDNode
*N
,
14523 DAGCombinerInfo
&DCI
) const {
14524 // If we are using CRBits then try that first.
14525 if (Subtarget
.useCRBits()) {
14526 // Check if CRBits did anything and return that if it did.
14527 if (SDValue CRTruncValue
= DAGCombineTruncBoolExt(N
, DCI
))
14528 return CRTruncValue
;
14532 SDValue Op0
= N
->getOperand(0);
14534 // Looking for a truncate of i128 to i64.
14535 if (Op0
.getValueType() != MVT::i128
|| N
->getValueType(0) != MVT::i64
)
14538 int EltToExtract
= DCI
.DAG
.getDataLayout().isBigEndian() ? 1 : 0;
14540 // SRL feeding TRUNCATE.
14541 if (Op0
.getOpcode() == ISD::SRL
) {
14542 ConstantSDNode
*ConstNode
= dyn_cast
<ConstantSDNode
>(Op0
.getOperand(1));
14543 // The right shift has to be by 64 bits.
14544 if (!ConstNode
|| ConstNode
->getZExtValue() != 64)
14547 // Switch the element number to extract.
14548 EltToExtract
= EltToExtract
? 0 : 1;
14549 // Update Op0 past the SRL.
14550 Op0
= Op0
.getOperand(0);
14553 // BITCAST feeding a TRUNCATE possibly via SRL.
14554 if (Op0
.getOpcode() == ISD::BITCAST
&&
14555 Op0
.getValueType() == MVT::i128
&&
14556 Op0
.getOperand(0).getValueType() == MVT::f128
) {
14557 SDValue Bitcast
= DCI
.DAG
.getBitcast(MVT::v2i64
, Op0
.getOperand(0));
14558 return DCI
.DAG
.getNode(
14559 ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Bitcast
,
14560 DCI
.DAG
.getTargetConstant(EltToExtract
, dl
, MVT::i32
));
14565 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
14566 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
14567 if (!Subtarget
.isSVR4ABI() || !Subtarget
.isPPC64())
14570 // If not a tail call then no need to proceed.
14571 if (!CI
->isTailCall())
14574 // If tail calls are disabled for the caller then we are done.
14575 const Function
*Caller
= CI
->getParent()->getParent();
14576 auto Attr
= Caller
->getFnAttribute("disable-tail-calls");
14577 if (Attr
.getValueAsString() == "true")
14580 // If sibling calls have been disabled and tail-calls aren't guaranteed
14581 // there is no reason to duplicate.
14582 auto &TM
= getTargetMachine();
14583 if (!TM
.Options
.GuaranteedTailCallOpt
&& DisableSCO
)
14586 // Can't tail call a function called indirectly, or if it has variadic args.
14587 const Function
*Callee
= CI
->getCalledFunction();
14588 if (!Callee
|| Callee
->isVarArg())
14591 // Make sure the callee and caller calling conventions are eligible for tco.
14592 if (!areCallingConvEligibleForTCO_64SVR4(Caller
->getCallingConv(),
14593 CI
->getCallingConv()))
14596 // If the function is local then we have a good chance at tail-calling it
14597 return getTargetMachine().shouldAssumeDSOLocal(*Caller
->getParent(), Callee
);
14600 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT
) const {
14601 if (!Subtarget
.hasVSX())
14603 if (Subtarget
.hasP9Vector() && VT
== MVT::f128
)
14605 return VT
== MVT::f32
|| VT
== MVT::f64
||
14606 VT
== MVT::v4f32
|| VT
== MVT::v2f64
;
14609 bool PPCTargetLowering::
14610 isMaskAndCmp0FoldingBeneficial(const Instruction
&AndI
) const {
14611 const Value
*Mask
= AndI
.getOperand(1);
14612 // If the mask is suitable for andi. or andis. we should sink the and.
14613 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Mask
)) {
14614 // Can't handle constants wider than 64-bits.
14615 if (CI
->getBitWidth() > 64)
14617 int64_t ConstVal
= CI
->getZExtValue();
14618 return isUInt
<16>(ConstVal
) ||
14619 (isUInt
<16>(ConstVal
>> 16) && !(ConstVal
& 0xFFFF));
14622 // For non-constant masks, we can always use the record-form and.
14626 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
14627 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
14628 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
14629 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
14630 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
14631 SDValue
PPCTargetLowering::combineABS(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
14632 assert((N
->getOpcode() == ISD::ABS
) && "Need ABS node here");
14633 assert(Subtarget
.hasP9Altivec() &&
14634 "Only combine this when P9 altivec supported!");
14635 EVT VT
= N
->getValueType(0);
14636 if (VT
!= MVT::v4i32
&& VT
!= MVT::v8i16
&& VT
!= MVT::v16i8
)
14639 SelectionDAG
&DAG
= DCI
.DAG
;
14641 if (N
->getOperand(0).getOpcode() == ISD::SUB
) {
14642 // Even for signed integers, if it's known to be positive (as signed
14643 // integer) due to zero-extended inputs.
14644 unsigned SubOpcd0
= N
->getOperand(0)->getOperand(0).getOpcode();
14645 unsigned SubOpcd1
= N
->getOperand(0)->getOperand(1).getOpcode();
14646 if ((SubOpcd0
== ISD::ZERO_EXTEND
||
14647 SubOpcd0
== ISD::ZERO_EXTEND_VECTOR_INREG
) &&
14648 (SubOpcd1
== ISD::ZERO_EXTEND
||
14649 SubOpcd1
== ISD::ZERO_EXTEND_VECTOR_INREG
)) {
14650 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(0).getValueType(),
14651 N
->getOperand(0)->getOperand(0),
14652 N
->getOperand(0)->getOperand(1),
14653 DAG
.getTargetConstant(0, dl
, MVT::i32
));
14656 // For type v4i32, it can be optimized with xvnegsp + vabsduw
14657 if (N
->getOperand(0).getValueType() == MVT::v4i32
&&
14658 N
->getOperand(0).hasOneUse()) {
14659 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(0).getValueType(),
14660 N
->getOperand(0)->getOperand(0),
14661 N
->getOperand(0)->getOperand(1),
14662 DAG
.getTargetConstant(1, dl
, MVT::i32
));
14669 // For type v4i32/v8ii16/v16i8, transform
14670 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
14671 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
14672 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
14673 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
14674 SDValue
PPCTargetLowering::combineVSelect(SDNode
*N
,
14675 DAGCombinerInfo
&DCI
) const {
14676 assert((N
->getOpcode() == ISD::VSELECT
) && "Need VSELECT node here");
14677 assert(Subtarget
.hasP9Altivec() &&
14678 "Only combine this when P9 altivec supported!");
14680 SelectionDAG
&DAG
= DCI
.DAG
;
14682 SDValue Cond
= N
->getOperand(0);
14683 SDValue TrueOpnd
= N
->getOperand(1);
14684 SDValue FalseOpnd
= N
->getOperand(2);
14685 EVT VT
= N
->getOperand(1).getValueType();
14687 if (Cond
.getOpcode() != ISD::SETCC
|| TrueOpnd
.getOpcode() != ISD::SUB
||
14688 FalseOpnd
.getOpcode() != ISD::SUB
)
14691 // ABSD only available for type v4i32/v8i16/v16i8
14692 if (VT
!= MVT::v4i32
&& VT
!= MVT::v8i16
&& VT
!= MVT::v16i8
)
14695 // At least to save one more dependent computation
14696 if (!(Cond
.hasOneUse() || TrueOpnd
.hasOneUse() || FalseOpnd
.hasOneUse()))
14699 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Cond
.getOperand(2))->get();
14701 // Can only handle unsigned comparison here
14710 std::swap(TrueOpnd
, FalseOpnd
);
14714 SDValue CmpOpnd1
= Cond
.getOperand(0);
14715 SDValue CmpOpnd2
= Cond
.getOperand(1);
14717 // SETCC CmpOpnd1 CmpOpnd2 cond
14718 // TrueOpnd = CmpOpnd1 - CmpOpnd2
14719 // FalseOpnd = CmpOpnd2 - CmpOpnd1
14720 if (TrueOpnd
.getOperand(0) == CmpOpnd1
&&
14721 TrueOpnd
.getOperand(1) == CmpOpnd2
&&
14722 FalseOpnd
.getOperand(0) == CmpOpnd2
&&
14723 FalseOpnd
.getOperand(1) == CmpOpnd1
) {
14724 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(1).getValueType(),
14725 CmpOpnd1
, CmpOpnd2
,
14726 DAG
.getTargetConstant(0, dl
, MVT::i32
));