1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the PPCISelLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetRegisterInfo.h"
56 #include "llvm/CodeGen/ValueTypes.h"
57 #include "llvm/IR/CallSite.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/Value.h"
73 #include "llvm/MC/MCContext.h"
74 #include "llvm/MC/MCExpr.h"
75 #include "llvm/MC/MCRegisterInfo.h"
76 #include "llvm/MC/MCSymbolXCOFF.h"
77 #include "llvm/Support/AtomicOrdering.h"
78 #include "llvm/Support/BranchProbability.h"
79 #include "llvm/Support/Casting.h"
80 #include "llvm/Support/CodeGen.h"
81 #include "llvm/Support/CommandLine.h"
82 #include "llvm/Support/Compiler.h"
83 #include "llvm/Support/Debug.h"
84 #include "llvm/Support/ErrorHandling.h"
85 #include "llvm/Support/Format.h"
86 #include "llvm/Support/KnownBits.h"
87 #include "llvm/Support/MachineValueType.h"
88 #include "llvm/Support/MathExtras.h"
89 #include "llvm/Support/raw_ostream.h"
90 #include "llvm/Target/TargetMachine.h"
91 #include "llvm/Target/TargetOptions.h"
100 using namespace llvm
;
102 #define DEBUG_TYPE "ppc-lowering"
104 static cl::opt
<bool> DisablePPCPreinc("disable-ppc-preinc",
105 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden
);
107 static cl::opt
<bool> DisableILPPref("disable-ppc-ilp-pref",
108 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden
);
110 static cl::opt
<bool> DisablePPCUnaligned("disable-ppc-unaligned",
111 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden
);
113 static cl::opt
<bool> DisableSCO("disable-ppc-sco",
114 cl::desc("disable sibling call optimization on ppc"), cl::Hidden
);
116 static cl::opt
<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
117 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden
);
119 static cl::opt
<bool> EnableQuadPrecision("enable-ppc-quad-precision",
120 cl::desc("enable quad precision float support on ppc"), cl::Hidden
);
122 STATISTIC(NumTailCalls
, "Number of tail calls");
123 STATISTIC(NumSiblingCalls
, "Number of sibling calls");
125 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*, unsigned, int);
127 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
);
129 // FIXME: Remove this once the bug has been fixed!
130 extern cl::opt
<bool> ANDIGlueBug
;
132 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine
&TM
,
133 const PPCSubtarget
&STI
)
134 : TargetLowering(TM
), Subtarget(STI
) {
135 // Use _setjmp/_longjmp instead of setjmp/longjmp.
136 setUseUnderscoreSetJmp(true);
137 setUseUnderscoreLongJmp(true);
139 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140 // arguments are at least 4/8 bytes aligned.
141 bool isPPC64
= Subtarget
.isPPC64();
142 setMinStackArgumentAlignment(isPPC64
? Align(8) : Align(4));
144 // Set up the register classes.
145 addRegisterClass(MVT::i32
, &PPC::GPRCRegClass
);
146 if (!useSoftFloat()) {
148 addRegisterClass(MVT::f32
, &PPC::GPRCRegClass
);
149 addRegisterClass(MVT::f64
, &PPC::SPERCRegClass
);
151 addRegisterClass(MVT::f32
, &PPC::F4RCRegClass
);
152 addRegisterClass(MVT::f64
, &PPC::F8RCRegClass
);
156 // Match BITREVERSE to customized fast code sequence in the td file.
157 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Legal
);
158 setOperationAction(ISD::BITREVERSE
, MVT::i64
, Legal
);
160 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161 setOperationAction(ISD::ATOMIC_CMP_SWAP
, MVT::i32
, Custom
);
163 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164 for (MVT VT
: MVT::integer_valuetypes()) {
165 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
166 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i8
, Expand
);
169 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
171 // PowerPC has pre-inc load and store's.
172 setIndexedLoadAction(ISD::PRE_INC
, MVT::i1
, Legal
);
173 setIndexedLoadAction(ISD::PRE_INC
, MVT::i8
, Legal
);
174 setIndexedLoadAction(ISD::PRE_INC
, MVT::i16
, Legal
);
175 setIndexedLoadAction(ISD::PRE_INC
, MVT::i32
, Legal
);
176 setIndexedLoadAction(ISD::PRE_INC
, MVT::i64
, Legal
);
177 setIndexedStoreAction(ISD::PRE_INC
, MVT::i1
, Legal
);
178 setIndexedStoreAction(ISD::PRE_INC
, MVT::i8
, Legal
);
179 setIndexedStoreAction(ISD::PRE_INC
, MVT::i16
, Legal
);
180 setIndexedStoreAction(ISD::PRE_INC
, MVT::i32
, Legal
);
181 setIndexedStoreAction(ISD::PRE_INC
, MVT::i64
, Legal
);
182 if (!Subtarget
.hasSPE()) {
183 setIndexedLoadAction(ISD::PRE_INC
, MVT::f32
, Legal
);
184 setIndexedLoadAction(ISD::PRE_INC
, MVT::f64
, Legal
);
185 setIndexedStoreAction(ISD::PRE_INC
, MVT::f32
, Legal
);
186 setIndexedStoreAction(ISD::PRE_INC
, MVT::f64
, Legal
);
189 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
190 const MVT ScalarIntVTs
[] = { MVT::i32
, MVT::i64
};
191 for (MVT VT
: ScalarIntVTs
) {
192 setOperationAction(ISD::ADDC
, VT
, Legal
);
193 setOperationAction(ISD::ADDE
, VT
, Legal
);
194 setOperationAction(ISD::SUBC
, VT
, Legal
);
195 setOperationAction(ISD::SUBE
, VT
, Legal
);
198 if (Subtarget
.useCRBits()) {
199 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
201 if (isPPC64
|| Subtarget
.hasFPCVT()) {
202 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
203 AddPromotedToType (ISD::SINT_TO_FP
, MVT::i1
,
204 isPPC64
? MVT::i64
: MVT::i32
);
205 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
206 AddPromotedToType(ISD::UINT_TO_FP
, MVT::i1
,
207 isPPC64
? MVT::i64
: MVT::i32
);
209 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Custom
);
210 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Custom
);
213 // PowerPC does not support direct load/store of condition registers.
214 setOperationAction(ISD::LOAD
, MVT::i1
, Custom
);
215 setOperationAction(ISD::STORE
, MVT::i1
, Custom
);
217 // FIXME: Remove this once the ANDI glue bug is fixed:
219 setOperationAction(ISD::TRUNCATE
, MVT::i1
, Custom
);
221 for (MVT VT
: MVT::integer_valuetypes()) {
222 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
223 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
224 setTruncStoreAction(VT
, MVT::i1
, Expand
);
227 addRegisterClass(MVT::i1
, &PPC::CRBITRCRegClass
);
230 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
231 // PPC (the libcall is not available).
232 setOperationAction(ISD::FP_TO_SINT
, MVT::ppcf128
, Custom
);
233 setOperationAction(ISD::FP_TO_UINT
, MVT::ppcf128
, Custom
);
235 // We do not currently implement these libm ops for PowerPC.
236 setOperationAction(ISD::FFLOOR
, MVT::ppcf128
, Expand
);
237 setOperationAction(ISD::FCEIL
, MVT::ppcf128
, Expand
);
238 setOperationAction(ISD::FTRUNC
, MVT::ppcf128
, Expand
);
239 setOperationAction(ISD::FRINT
, MVT::ppcf128
, Expand
);
240 setOperationAction(ISD::FNEARBYINT
, MVT::ppcf128
, Expand
);
241 setOperationAction(ISD::FREM
, MVT::ppcf128
, Expand
);
243 // PowerPC has no SREM/UREM instructions unless we are on P9
244 // On P9 we may use a hardware instruction to compute the remainder.
245 // The instructions are not legalized directly because in the cases where the
246 // result of both the remainder and the division is required it is more
247 // efficient to compute the remainder from the result of the division rather
248 // than use the remainder instruction.
249 if (Subtarget
.isISA3_0()) {
250 setOperationAction(ISD::SREM
, MVT::i32
, Custom
);
251 setOperationAction(ISD::UREM
, MVT::i32
, Custom
);
252 setOperationAction(ISD::SREM
, MVT::i64
, Custom
);
253 setOperationAction(ISD::UREM
, MVT::i64
, Custom
);
255 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
256 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
257 setOperationAction(ISD::SREM
, MVT::i64
, Expand
);
258 setOperationAction(ISD::UREM
, MVT::i64
, Expand
);
261 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
262 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
263 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
264 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Expand
);
265 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Expand
);
266 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
267 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
268 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
269 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
271 // We don't support sin/cos/sqrt/fmod/pow
272 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
273 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
274 setOperationAction(ISD::FSINCOS
, MVT::f64
, Expand
);
275 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
276 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
277 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
278 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
279 setOperationAction(ISD::FSINCOS
, MVT::f32
, Expand
);
280 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
281 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
282 if (Subtarget
.hasSPE()) {
283 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
284 setOperationAction(ISD::FMA
, MVT::f32
, Expand
);
286 setOperationAction(ISD::FMA
, MVT::f64
, Legal
);
287 setOperationAction(ISD::FMA
, MVT::f32
, Legal
);
290 setOperationAction(ISD::FLT_ROUNDS_
, MVT::i32
, Custom
);
292 // If we're enabling GP optimizations, use hardware square root
293 if (!Subtarget
.hasFSQRT() &&
294 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTE() &&
296 setOperationAction(ISD::FSQRT
, MVT::f64
, Expand
);
298 if (!Subtarget
.hasFSQRT() &&
299 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTES() &&
300 Subtarget
.hasFRES()))
301 setOperationAction(ISD::FSQRT
, MVT::f32
, Expand
);
303 if (Subtarget
.hasFCPSGN()) {
304 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Legal
);
305 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Legal
);
307 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
308 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
311 if (Subtarget
.hasFPRND()) {
312 setOperationAction(ISD::FFLOOR
, MVT::f64
, Legal
);
313 setOperationAction(ISD::FCEIL
, MVT::f64
, Legal
);
314 setOperationAction(ISD::FTRUNC
, MVT::f64
, Legal
);
315 setOperationAction(ISD::FROUND
, MVT::f64
, Legal
);
317 setOperationAction(ISD::FFLOOR
, MVT::f32
, Legal
);
318 setOperationAction(ISD::FCEIL
, MVT::f32
, Legal
);
319 setOperationAction(ISD::FTRUNC
, MVT::f32
, Legal
);
320 setOperationAction(ISD::FROUND
, MVT::f32
, Legal
);
323 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
324 // to speed up scalar BSWAP64.
325 // CTPOP or CTTZ were introduced in P8/P9 respectively
326 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
327 if (Subtarget
.hasP9Vector())
328 setOperationAction(ISD::BSWAP
, MVT::i64
, Custom
);
330 setOperationAction(ISD::BSWAP
, MVT::i64
, Expand
);
331 if (Subtarget
.isISA3_0()) {
332 setOperationAction(ISD::CTTZ
, MVT::i32
, Legal
);
333 setOperationAction(ISD::CTTZ
, MVT::i64
, Legal
);
335 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
336 setOperationAction(ISD::CTTZ
, MVT::i64
, Expand
);
339 if (Subtarget
.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast
) {
340 setOperationAction(ISD::CTPOP
, MVT::i32
, Legal
);
341 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
343 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
344 setOperationAction(ISD::CTPOP
, MVT::i64
, Expand
);
347 // PowerPC does not have ROTR
348 setOperationAction(ISD::ROTR
, MVT::i32
, Expand
);
349 setOperationAction(ISD::ROTR
, MVT::i64
, Expand
);
351 if (!Subtarget
.useCRBits()) {
352 // PowerPC does not have Select
353 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
354 setOperationAction(ISD::SELECT
, MVT::i64
, Expand
);
355 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
356 setOperationAction(ISD::SELECT
, MVT::f64
, Expand
);
359 // PowerPC wants to turn select_cc of FP into fsel when possible.
360 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
361 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Custom
);
363 // PowerPC wants to optimize integer setcc a bit
364 if (!Subtarget
.useCRBits())
365 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
367 // PowerPC does not have BRCOND which requires SetCC
368 if (!Subtarget
.useCRBits())
369 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
371 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
373 if (Subtarget
.hasSPE()) {
374 // SPE has built-in conversions
375 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Legal
);
376 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Legal
);
377 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Legal
);
379 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
380 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
382 // PowerPC does not have [U|S]INT_TO_FP
383 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Expand
);
384 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Expand
);
387 if (Subtarget
.hasDirectMove() && isPPC64
) {
388 setOperationAction(ISD::BITCAST
, MVT::f32
, Legal
);
389 setOperationAction(ISD::BITCAST
, MVT::i32
, Legal
);
390 setOperationAction(ISD::BITCAST
, MVT::i64
, Legal
);
391 setOperationAction(ISD::BITCAST
, MVT::f64
, Legal
);
393 setOperationAction(ISD::BITCAST
, MVT::f32
, Expand
);
394 setOperationAction(ISD::BITCAST
, MVT::i32
, Expand
);
395 setOperationAction(ISD::BITCAST
, MVT::i64
, Expand
);
396 setOperationAction(ISD::BITCAST
, MVT::f64
, Expand
);
399 // We cannot sextinreg(i1). Expand to shifts.
400 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
402 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
403 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
404 // support continuation, user-level threading, and etc.. As a result, no
405 // other SjLj exception interfaces are implemented and please don't build
406 // your own exception handling based on them.
407 // LLVM/Clang supports zero-cost DWARF exception handling.
408 setOperationAction(ISD::EH_SJLJ_SETJMP
, MVT::i32
, Custom
);
409 setOperationAction(ISD::EH_SJLJ_LONGJMP
, MVT::Other
, Custom
);
411 // We want to legalize GlobalAddress and ConstantPool nodes into the
412 // appropriate instructions to materialize the address.
413 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
414 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
415 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
416 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
417 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
418 setOperationAction(ISD::GlobalAddress
, MVT::i64
, Custom
);
419 setOperationAction(ISD::GlobalTLSAddress
, MVT::i64
, Custom
);
420 setOperationAction(ISD::BlockAddress
, MVT::i64
, Custom
);
421 setOperationAction(ISD::ConstantPool
, MVT::i64
, Custom
);
422 setOperationAction(ISD::JumpTable
, MVT::i64
, Custom
);
425 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
427 // TRAMPOLINE is custom lowered.
428 setOperationAction(ISD::INIT_TRAMPOLINE
, MVT::Other
, Custom
);
429 setOperationAction(ISD::ADJUST_TRAMPOLINE
, MVT::Other
, Custom
);
431 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
432 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
434 if (Subtarget
.is64BitELFABI()) {
435 // VAARG always uses double-word chunks, so promote anything smaller.
436 setOperationAction(ISD::VAARG
, MVT::i1
, Promote
);
437 AddPromotedToType(ISD::VAARG
, MVT::i1
, MVT::i64
);
438 setOperationAction(ISD::VAARG
, MVT::i8
, Promote
);
439 AddPromotedToType(ISD::VAARG
, MVT::i8
, MVT::i64
);
440 setOperationAction(ISD::VAARG
, MVT::i16
, Promote
);
441 AddPromotedToType(ISD::VAARG
, MVT::i16
, MVT::i64
);
442 setOperationAction(ISD::VAARG
, MVT::i32
, Promote
);
443 AddPromotedToType(ISD::VAARG
, MVT::i32
, MVT::i64
);
444 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
445 } else if (Subtarget
.is32BitELFABI()) {
446 // VAARG is custom lowered with the 32-bit SVR4 ABI.
447 setOperationAction(ISD::VAARG
, MVT::Other
, Custom
);
448 setOperationAction(ISD::VAARG
, MVT::i64
, Custom
);
450 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
452 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
453 if (Subtarget
.is32BitELFABI())
454 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
456 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
458 // Use the default implementation.
459 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
460 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
461 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
462 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
463 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Custom
);
464 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i32
, Custom
);
465 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i64
, Custom
);
466 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i32
, Custom
);
467 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i64
, Custom
);
469 // We want to custom lower some of our intrinsics.
470 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
472 // To handle counter-based loop conditions.
473 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i1
, Custom
);
475 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i8
, Custom
);
476 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i16
, Custom
);
477 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i32
, Custom
);
478 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
480 // Comparisons that require checking two conditions.
481 if (Subtarget
.hasSPE()) {
482 setCondCodeAction(ISD::SETO
, MVT::f32
, Expand
);
483 setCondCodeAction(ISD::SETO
, MVT::f64
, Expand
);
484 setCondCodeAction(ISD::SETUO
, MVT::f32
, Expand
);
485 setCondCodeAction(ISD::SETUO
, MVT::f64
, Expand
);
487 setCondCodeAction(ISD::SETULT
, MVT::f32
, Expand
);
488 setCondCodeAction(ISD::SETULT
, MVT::f64
, Expand
);
489 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
490 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
491 setCondCodeAction(ISD::SETUEQ
, MVT::f32
, Expand
);
492 setCondCodeAction(ISD::SETUEQ
, MVT::f64
, Expand
);
493 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
494 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
495 setCondCodeAction(ISD::SETOLE
, MVT::f32
, Expand
);
496 setCondCodeAction(ISD::SETOLE
, MVT::f64
, Expand
);
497 setCondCodeAction(ISD::SETONE
, MVT::f32
, Expand
);
498 setCondCodeAction(ISD::SETONE
, MVT::f64
, Expand
);
500 if (Subtarget
.has64BitSupport()) {
501 // They also have instructions for converting between i64 and fp.
502 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
503 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Expand
);
504 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
505 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
506 // This is just the low 32 bits of a (signed) fp->i64 conversion.
507 // We cannot do this with Promote because i64 is not a legal type.
508 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
510 if (Subtarget
.hasLFIWAX() || Subtarget
.isPPC64())
511 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
513 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
514 if (Subtarget
.hasSPE())
515 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Legal
);
517 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Expand
);
520 // With the instructions enabled under FPCVT, we can do everything.
521 if (Subtarget
.hasFPCVT()) {
522 if (Subtarget
.has64BitSupport()) {
523 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
524 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Custom
);
525 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
526 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Custom
);
529 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
530 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
531 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
532 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
535 if (Subtarget
.use64BitRegs()) {
536 // 64-bit PowerPC implementations can support i64 types directly
537 addRegisterClass(MVT::i64
, &PPC::G8RCRegClass
);
538 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
539 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
540 // 64-bit PowerPC wants to expand i128 shifts itself.
541 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Custom
);
542 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Custom
);
543 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Custom
);
545 // 32-bit PowerPC wants to expand i64 shifts itself.
546 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
547 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
548 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
551 if (Subtarget
.hasAltivec()) {
552 // First set operation action for all vector types to expand. Then we
553 // will selectively turn on ones that can be effectively codegen'd.
554 for (MVT VT
: MVT::fixedlen_vector_valuetypes()) {
555 // add/sub are legal for all supported vector VT's.
556 setOperationAction(ISD::ADD
, VT
, Legal
);
557 setOperationAction(ISD::SUB
, VT
, Legal
);
559 // For v2i64, these are only valid with P8Vector. This is corrected after
561 if (VT
.getSizeInBits() <= 128 && VT
.getScalarSizeInBits() <= 64) {
562 setOperationAction(ISD::SMAX
, VT
, Legal
);
563 setOperationAction(ISD::SMIN
, VT
, Legal
);
564 setOperationAction(ISD::UMAX
, VT
, Legal
);
565 setOperationAction(ISD::UMIN
, VT
, Legal
);
568 setOperationAction(ISD::SMAX
, VT
, Expand
);
569 setOperationAction(ISD::SMIN
, VT
, Expand
);
570 setOperationAction(ISD::UMAX
, VT
, Expand
);
571 setOperationAction(ISD::UMIN
, VT
, Expand
);
574 if (Subtarget
.hasVSX()) {
575 setOperationAction(ISD::FMAXNUM
, VT
, Legal
);
576 setOperationAction(ISD::FMINNUM
, VT
, Legal
);
579 // Vector instructions introduced in P8
580 if (Subtarget
.hasP8Altivec() && (VT
.SimpleTy
!= MVT::v1i128
)) {
581 setOperationAction(ISD::CTPOP
, VT
, Legal
);
582 setOperationAction(ISD::CTLZ
, VT
, Legal
);
585 setOperationAction(ISD::CTPOP
, VT
, Expand
);
586 setOperationAction(ISD::CTLZ
, VT
, Expand
);
589 // Vector instructions introduced in P9
590 if (Subtarget
.hasP9Altivec() && (VT
.SimpleTy
!= MVT::v1i128
))
591 setOperationAction(ISD::CTTZ
, VT
, Legal
);
593 setOperationAction(ISD::CTTZ
, VT
, Expand
);
595 // We promote all shuffles to v16i8.
596 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Promote
);
597 AddPromotedToType (ISD::VECTOR_SHUFFLE
, VT
, MVT::v16i8
);
599 // We promote all non-typed operations to v4i32.
600 setOperationAction(ISD::AND
, VT
, Promote
);
601 AddPromotedToType (ISD::AND
, VT
, MVT::v4i32
);
602 setOperationAction(ISD::OR
, VT
, Promote
);
603 AddPromotedToType (ISD::OR
, VT
, MVT::v4i32
);
604 setOperationAction(ISD::XOR
, VT
, Promote
);
605 AddPromotedToType (ISD::XOR
, VT
, MVT::v4i32
);
606 setOperationAction(ISD::LOAD
, VT
, Promote
);
607 AddPromotedToType (ISD::LOAD
, VT
, MVT::v4i32
);
608 setOperationAction(ISD::SELECT
, VT
, Promote
);
609 AddPromotedToType (ISD::SELECT
, VT
, MVT::v4i32
);
610 setOperationAction(ISD::VSELECT
, VT
, Legal
);
611 setOperationAction(ISD::SELECT_CC
, VT
, Promote
);
612 AddPromotedToType (ISD::SELECT_CC
, VT
, MVT::v4i32
);
613 setOperationAction(ISD::STORE
, VT
, Promote
);
614 AddPromotedToType (ISD::STORE
, VT
, MVT::v4i32
);
616 // No other operations are legal.
617 setOperationAction(ISD::MUL
, VT
, Expand
);
618 setOperationAction(ISD::SDIV
, VT
, Expand
);
619 setOperationAction(ISD::SREM
, VT
, Expand
);
620 setOperationAction(ISD::UDIV
, VT
, Expand
);
621 setOperationAction(ISD::UREM
, VT
, Expand
);
622 setOperationAction(ISD::FDIV
, VT
, Expand
);
623 setOperationAction(ISD::FREM
, VT
, Expand
);
624 setOperationAction(ISD::FNEG
, VT
, Expand
);
625 setOperationAction(ISD::FSQRT
, VT
, Expand
);
626 setOperationAction(ISD::FLOG
, VT
, Expand
);
627 setOperationAction(ISD::FLOG10
, VT
, Expand
);
628 setOperationAction(ISD::FLOG2
, VT
, Expand
);
629 setOperationAction(ISD::FEXP
, VT
, Expand
);
630 setOperationAction(ISD::FEXP2
, VT
, Expand
);
631 setOperationAction(ISD::FSIN
, VT
, Expand
);
632 setOperationAction(ISD::FCOS
, VT
, Expand
);
633 setOperationAction(ISD::FABS
, VT
, Expand
);
634 setOperationAction(ISD::FFLOOR
, VT
, Expand
);
635 setOperationAction(ISD::FCEIL
, VT
, Expand
);
636 setOperationAction(ISD::FTRUNC
, VT
, Expand
);
637 setOperationAction(ISD::FRINT
, VT
, Expand
);
638 setOperationAction(ISD::FNEARBYINT
, VT
, Expand
);
639 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Expand
);
640 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Expand
);
641 setOperationAction(ISD::BUILD_VECTOR
, VT
, Expand
);
642 setOperationAction(ISD::MULHU
, VT
, Expand
);
643 setOperationAction(ISD::MULHS
, VT
, Expand
);
644 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
645 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
646 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
647 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
648 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Expand
);
649 setOperationAction(ISD::FPOW
, VT
, Expand
);
650 setOperationAction(ISD::BSWAP
, VT
, Expand
);
651 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Expand
);
652 setOperationAction(ISD::ROTL
, VT
, Expand
);
653 setOperationAction(ISD::ROTR
, VT
, Expand
);
655 for (MVT InnerVT
: MVT::fixedlen_vector_valuetypes()) {
656 setTruncStoreAction(VT
, InnerVT
, Expand
);
657 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
658 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
659 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
662 if (!Subtarget
.hasP8Vector()) {
663 setOperationAction(ISD::SMAX
, MVT::v2i64
, Expand
);
664 setOperationAction(ISD::SMIN
, MVT::v2i64
, Expand
);
665 setOperationAction(ISD::UMAX
, MVT::v2i64
, Expand
);
666 setOperationAction(ISD::UMIN
, MVT::v2i64
, Expand
);
669 for (auto VT
: {MVT::v2i64
, MVT::v4i32
, MVT::v8i16
, MVT::v16i8
})
670 setOperationAction(ISD::ABS
, VT
, Custom
);
672 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
673 // with merges, splats, etc.
674 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v16i8
, Custom
);
676 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
677 // are cheap, so handle them before they get expanded to scalar.
678 setOperationAction(ISD::TRUNCATE
, MVT::v8i8
, Custom
);
679 setOperationAction(ISD::TRUNCATE
, MVT::v4i8
, Custom
);
680 setOperationAction(ISD::TRUNCATE
, MVT::v2i8
, Custom
);
681 setOperationAction(ISD::TRUNCATE
, MVT::v4i16
, Custom
);
682 setOperationAction(ISD::TRUNCATE
, MVT::v2i16
, Custom
);
684 setOperationAction(ISD::AND
, MVT::v4i32
, Legal
);
685 setOperationAction(ISD::OR
, MVT::v4i32
, Legal
);
686 setOperationAction(ISD::XOR
, MVT::v4i32
, Legal
);
687 setOperationAction(ISD::LOAD
, MVT::v4i32
, Legal
);
688 setOperationAction(ISD::SELECT
, MVT::v4i32
,
689 Subtarget
.useCRBits() ? Legal
: Expand
);
690 setOperationAction(ISD::STORE
, MVT::v4i32
, Legal
);
691 setOperationAction(ISD::FP_TO_SINT
, MVT::v4i32
, Legal
);
692 setOperationAction(ISD::FP_TO_UINT
, MVT::v4i32
, Legal
);
693 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i32
, Legal
);
694 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i32
, Legal
);
695 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
696 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
697 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
698 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
700 // Without hasP8Altivec set, v2i64 SMAX isn't available.
701 // But ABS custom lowering requires SMAX support.
702 if (!Subtarget
.hasP8Altivec())
703 setOperationAction(ISD::ABS
, MVT::v2i64
, Expand
);
705 addRegisterClass(MVT::v4f32
, &PPC::VRRCRegClass
);
706 addRegisterClass(MVT::v4i32
, &PPC::VRRCRegClass
);
707 addRegisterClass(MVT::v8i16
, &PPC::VRRCRegClass
);
708 addRegisterClass(MVT::v16i8
, &PPC::VRRCRegClass
);
710 setOperationAction(ISD::MUL
, MVT::v4f32
, Legal
);
711 setOperationAction(ISD::FMA
, MVT::v4f32
, Legal
);
713 if (TM
.Options
.UnsafeFPMath
|| Subtarget
.hasVSX()) {
714 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
715 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
718 if (Subtarget
.hasP8Altivec())
719 setOperationAction(ISD::MUL
, MVT::v4i32
, Legal
);
721 setOperationAction(ISD::MUL
, MVT::v4i32
, Custom
);
723 setOperationAction(ISD::MUL
, MVT::v8i16
, Custom
);
724 setOperationAction(ISD::MUL
, MVT::v16i8
, Custom
);
726 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Custom
);
727 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Custom
);
729 setOperationAction(ISD::BUILD_VECTOR
, MVT::v16i8
, Custom
);
730 setOperationAction(ISD::BUILD_VECTOR
, MVT::v8i16
, Custom
);
731 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i32
, Custom
);
732 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
734 // Altivec does not contain unordered floating-point compare instructions
735 setCondCodeAction(ISD::SETUO
, MVT::v4f32
, Expand
);
736 setCondCodeAction(ISD::SETUEQ
, MVT::v4f32
, Expand
);
737 setCondCodeAction(ISD::SETO
, MVT::v4f32
, Expand
);
738 setCondCodeAction(ISD::SETONE
, MVT::v4f32
, Expand
);
740 if (Subtarget
.hasVSX()) {
741 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, Legal
);
742 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
743 if (Subtarget
.hasP8Vector()) {
744 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
745 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Legal
);
747 if (Subtarget
.hasDirectMove() && isPPC64
) {
748 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v16i8
, Legal
);
749 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v8i16
, Legal
);
750 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Legal
);
751 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2i64
, Legal
);
752 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v16i8
, Legal
);
753 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v8i16
, Legal
);
754 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i32
, Legal
);
755 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2i64
, Legal
);
757 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
759 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Legal
);
760 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Legal
);
761 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Legal
);
762 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Legal
);
763 setOperationAction(ISD::FROUND
, MVT::v2f64
, Legal
);
765 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
767 setOperationAction(ISD::MUL
, MVT::v2f64
, Legal
);
768 setOperationAction(ISD::FMA
, MVT::v2f64
, Legal
);
770 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
771 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
773 // Share the Altivec comparison restrictions.
774 setCondCodeAction(ISD::SETUO
, MVT::v2f64
, Expand
);
775 setCondCodeAction(ISD::SETUEQ
, MVT::v2f64
, Expand
);
776 setCondCodeAction(ISD::SETO
, MVT::v2f64
, Expand
);
777 setCondCodeAction(ISD::SETONE
, MVT::v2f64
, Expand
);
779 setOperationAction(ISD::LOAD
, MVT::v2f64
, Legal
);
780 setOperationAction(ISD::STORE
, MVT::v2f64
, Legal
);
782 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2f64
, Legal
);
784 if (Subtarget
.hasP8Vector())
785 addRegisterClass(MVT::f32
, &PPC::VSSRCRegClass
);
787 addRegisterClass(MVT::f64
, &PPC::VSFRCRegClass
);
789 addRegisterClass(MVT::v4i32
, &PPC::VSRCRegClass
);
790 addRegisterClass(MVT::v4f32
, &PPC::VSRCRegClass
);
791 addRegisterClass(MVT::v2f64
, &PPC::VSRCRegClass
);
793 if (Subtarget
.hasP8Altivec()) {
794 setOperationAction(ISD::SHL
, MVT::v2i64
, Legal
);
795 setOperationAction(ISD::SRA
, MVT::v2i64
, Legal
);
796 setOperationAction(ISD::SRL
, MVT::v2i64
, Legal
);
798 // 128 bit shifts can be accomplished via 3 instructions for SHL and
799 // SRL, but not for SRA because of the instructions available:
800 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
802 setOperationAction(ISD::SHL
, MVT::v1i128
, Expand
);
803 setOperationAction(ISD::SRL
, MVT::v1i128
, Expand
);
804 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
806 setOperationAction(ISD::SETCC
, MVT::v2i64
, Legal
);
809 setOperationAction(ISD::SHL
, MVT::v2i64
, Expand
);
810 setOperationAction(ISD::SRA
, MVT::v2i64
, Expand
);
811 setOperationAction(ISD::SRL
, MVT::v2i64
, Expand
);
813 setOperationAction(ISD::SETCC
, MVT::v2i64
, Custom
);
815 // VSX v2i64 only supports non-arithmetic operations.
816 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
817 setOperationAction(ISD::SUB
, MVT::v2i64
, Expand
);
820 setOperationAction(ISD::LOAD
, MVT::v2i64
, Promote
);
821 AddPromotedToType (ISD::LOAD
, MVT::v2i64
, MVT::v2f64
);
822 setOperationAction(ISD::STORE
, MVT::v2i64
, Promote
);
823 AddPromotedToType (ISD::STORE
, MVT::v2i64
, MVT::v2f64
);
825 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2i64
, Legal
);
827 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Legal
);
828 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Legal
);
829 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Legal
);
830 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Legal
);
832 // Custom handling for partial vectors of integers converted to
833 // floating point. We already have optimal handling for v2i32 through
834 // the DAG combine, so those aren't necessary.
835 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i8
, Custom
);
836 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i8
, Custom
);
837 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i16
, Custom
);
838 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i16
, Custom
);
839 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i8
, Custom
);
840 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i8
, Custom
);
841 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i16
, Custom
);
842 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i16
, Custom
);
844 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
845 setOperationAction(ISD::FNEG
, MVT::v2f64
, Legal
);
846 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
847 setOperationAction(ISD::FABS
, MVT::v2f64
, Legal
);
848 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f32
, Legal
);
849 setOperationAction(ISD::FCOPYSIGN
, MVT::v2f64
, Legal
);
851 if (Subtarget
.hasDirectMove())
852 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2i64
, Custom
);
853 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2f64
, Custom
);
855 addRegisterClass(MVT::v2i64
, &PPC::VSRCRegClass
);
858 if (Subtarget
.hasP8Altivec()) {
859 addRegisterClass(MVT::v2i64
, &PPC::VRRCRegClass
);
860 addRegisterClass(MVT::v1i128
, &PPC::VRRCRegClass
);
863 if (Subtarget
.hasP9Vector()) {
864 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i32
, Custom
);
865 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
867 // 128 bit shifts can be accomplished via 3 instructions for SHL and
868 // SRL, but not for SRA because of the instructions available:
869 // VS{RL} and VS{RL}O.
870 setOperationAction(ISD::SHL
, MVT::v1i128
, Legal
);
871 setOperationAction(ISD::SRL
, MVT::v1i128
, Legal
);
872 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
874 if (EnableQuadPrecision
) {
875 addRegisterClass(MVT::f128
, &PPC::VRRCRegClass
);
876 setOperationAction(ISD::FADD
, MVT::f128
, Legal
);
877 setOperationAction(ISD::FSUB
, MVT::f128
, Legal
);
878 setOperationAction(ISD::FDIV
, MVT::f128
, Legal
);
879 setOperationAction(ISD::FMUL
, MVT::f128
, Legal
);
880 setOperationAction(ISD::FP_EXTEND
, MVT::f128
, Legal
);
881 // No extending loads to f128 on PPC.
882 for (MVT FPT
: MVT::fp_valuetypes())
883 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, FPT
, Expand
);
884 setOperationAction(ISD::FMA
, MVT::f128
, Legal
);
885 setCondCodeAction(ISD::SETULT
, MVT::f128
, Expand
);
886 setCondCodeAction(ISD::SETUGT
, MVT::f128
, Expand
);
887 setCondCodeAction(ISD::SETUEQ
, MVT::f128
, Expand
);
888 setCondCodeAction(ISD::SETOGE
, MVT::f128
, Expand
);
889 setCondCodeAction(ISD::SETOLE
, MVT::f128
, Expand
);
890 setCondCodeAction(ISD::SETONE
, MVT::f128
, Expand
);
892 setOperationAction(ISD::FTRUNC
, MVT::f128
, Legal
);
893 setOperationAction(ISD::FRINT
, MVT::f128
, Legal
);
894 setOperationAction(ISD::FFLOOR
, MVT::f128
, Legal
);
895 setOperationAction(ISD::FCEIL
, MVT::f128
, Legal
);
896 setOperationAction(ISD::FNEARBYINT
, MVT::f128
, Legal
);
897 setOperationAction(ISD::FROUND
, MVT::f128
, Legal
);
899 setOperationAction(ISD::SELECT
, MVT::f128
, Expand
);
900 setOperationAction(ISD::FP_ROUND
, MVT::f64
, Legal
);
901 setOperationAction(ISD::FP_ROUND
, MVT::f32
, Legal
);
902 setTruncStoreAction(MVT::f128
, MVT::f64
, Expand
);
903 setTruncStoreAction(MVT::f128
, MVT::f32
, Expand
);
904 setOperationAction(ISD::BITCAST
, MVT::i128
, Custom
);
905 // No implementation for these ops for PowerPC.
906 setOperationAction(ISD::FSIN
, MVT::f128
, Expand
);
907 setOperationAction(ISD::FCOS
, MVT::f128
, Expand
);
908 setOperationAction(ISD::FPOW
, MVT::f128
, Expand
);
909 setOperationAction(ISD::FPOWI
, MVT::f128
, Expand
);
910 setOperationAction(ISD::FREM
, MVT::f128
, Expand
);
912 setOperationAction(ISD::FP_EXTEND
, MVT::v2f32
, Custom
);
916 if (Subtarget
.hasP9Altivec()) {
917 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v8i16
, Custom
);
918 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v16i8
, Custom
);
922 if (Subtarget
.hasQPX()) {
923 setOperationAction(ISD::FADD
, MVT::v4f64
, Legal
);
924 setOperationAction(ISD::FSUB
, MVT::v4f64
, Legal
);
925 setOperationAction(ISD::FMUL
, MVT::v4f64
, Legal
);
926 setOperationAction(ISD::FREM
, MVT::v4f64
, Expand
);
928 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f64
, Legal
);
929 setOperationAction(ISD::FGETSIGN
, MVT::v4f64
, Expand
);
931 setOperationAction(ISD::LOAD
, MVT::v4f64
, Custom
);
932 setOperationAction(ISD::STORE
, MVT::v4f64
, Custom
);
934 setTruncStoreAction(MVT::v4f64
, MVT::v4f32
, Custom
);
935 setLoadExtAction(ISD::EXTLOAD
, MVT::v4f64
, MVT::v4f32
, Custom
);
937 if (!Subtarget
.useCRBits())
938 setOperationAction(ISD::SELECT
, MVT::v4f64
, Expand
);
939 setOperationAction(ISD::VSELECT
, MVT::v4f64
, Legal
);
941 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f64
, Legal
);
942 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f64
, Expand
);
943 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4f64
, Expand
);
944 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4f64
, Expand
);
945 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4f64
, Custom
);
946 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f64
, Legal
);
947 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f64
, Custom
);
949 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f64
, Legal
);
950 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f64
, Expand
);
952 setOperationAction(ISD::FP_ROUND
, MVT::v4f32
, Legal
);
953 setOperationAction(ISD::FP_EXTEND
, MVT::v4f64
, Legal
);
955 setOperationAction(ISD::FNEG
, MVT::v4f64
, Legal
);
956 setOperationAction(ISD::FABS
, MVT::v4f64
, Legal
);
957 setOperationAction(ISD::FSIN
, MVT::v4f64
, Expand
);
958 setOperationAction(ISD::FCOS
, MVT::v4f64
, Expand
);
959 setOperationAction(ISD::FPOW
, MVT::v4f64
, Expand
);
960 setOperationAction(ISD::FLOG
, MVT::v4f64
, Expand
);
961 setOperationAction(ISD::FLOG2
, MVT::v4f64
, Expand
);
962 setOperationAction(ISD::FLOG10
, MVT::v4f64
, Expand
);
963 setOperationAction(ISD::FEXP
, MVT::v4f64
, Expand
);
964 setOperationAction(ISD::FEXP2
, MVT::v4f64
, Expand
);
966 setOperationAction(ISD::FMINNUM
, MVT::v4f64
, Legal
);
967 setOperationAction(ISD::FMAXNUM
, MVT::v4f64
, Legal
);
969 setIndexedLoadAction(ISD::PRE_INC
, MVT::v4f64
, Legal
);
970 setIndexedStoreAction(ISD::PRE_INC
, MVT::v4f64
, Legal
);
972 addRegisterClass(MVT::v4f64
, &PPC::QFRCRegClass
);
974 setOperationAction(ISD::FADD
, MVT::v4f32
, Legal
);
975 setOperationAction(ISD::FSUB
, MVT::v4f32
, Legal
);
976 setOperationAction(ISD::FMUL
, MVT::v4f32
, Legal
);
977 setOperationAction(ISD::FREM
, MVT::v4f32
, Expand
);
979 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f32
, Legal
);
980 setOperationAction(ISD::FGETSIGN
, MVT::v4f32
, Expand
);
982 setOperationAction(ISD::LOAD
, MVT::v4f32
, Custom
);
983 setOperationAction(ISD::STORE
, MVT::v4f32
, Custom
);
985 if (!Subtarget
.useCRBits())
986 setOperationAction(ISD::SELECT
, MVT::v4f32
, Expand
);
987 setOperationAction(ISD::VSELECT
, MVT::v4f32
, Legal
);
989 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Legal
);
990 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Expand
);
991 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4f32
, Expand
);
992 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4f32
, Expand
);
993 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4f32
, Custom
);
994 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
995 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
997 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f32
, Legal
);
998 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f32
, Expand
);
1000 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
1001 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
1002 setOperationAction(ISD::FSIN
, MVT::v4f32
, Expand
);
1003 setOperationAction(ISD::FCOS
, MVT::v4f32
, Expand
);
1004 setOperationAction(ISD::FPOW
, MVT::v4f32
, Expand
);
1005 setOperationAction(ISD::FLOG
, MVT::v4f32
, Expand
);
1006 setOperationAction(ISD::FLOG2
, MVT::v4f32
, Expand
);
1007 setOperationAction(ISD::FLOG10
, MVT::v4f32
, Expand
);
1008 setOperationAction(ISD::FEXP
, MVT::v4f32
, Expand
);
1009 setOperationAction(ISD::FEXP2
, MVT::v4f32
, Expand
);
1011 setOperationAction(ISD::FMINNUM
, MVT::v4f32
, Legal
);
1012 setOperationAction(ISD::FMAXNUM
, MVT::v4f32
, Legal
);
1014 setIndexedLoadAction(ISD::PRE_INC
, MVT::v4f32
, Legal
);
1015 setIndexedStoreAction(ISD::PRE_INC
, MVT::v4f32
, Legal
);
1017 addRegisterClass(MVT::v4f32
, &PPC::QSRCRegClass
);
1019 setOperationAction(ISD::AND
, MVT::v4i1
, Legal
);
1020 setOperationAction(ISD::OR
, MVT::v4i1
, Legal
);
1021 setOperationAction(ISD::XOR
, MVT::v4i1
, Legal
);
1023 if (!Subtarget
.useCRBits())
1024 setOperationAction(ISD::SELECT
, MVT::v4i1
, Expand
);
1025 setOperationAction(ISD::VSELECT
, MVT::v4i1
, Legal
);
1027 setOperationAction(ISD::LOAD
, MVT::v4i1
, Custom
);
1028 setOperationAction(ISD::STORE
, MVT::v4i1
, Custom
);
1030 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i1
, Custom
);
1031 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i1
, Expand
);
1032 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4i1
, Expand
);
1033 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4i1
, Expand
);
1034 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i1
, Custom
);
1035 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i1
, Expand
);
1036 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i1
, Custom
);
1038 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i1
, Custom
);
1039 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i1
, Custom
);
1041 addRegisterClass(MVT::v4i1
, &PPC::QBRCRegClass
);
1043 setOperationAction(ISD::FFLOOR
, MVT::v4f64
, Legal
);
1044 setOperationAction(ISD::FCEIL
, MVT::v4f64
, Legal
);
1045 setOperationAction(ISD::FTRUNC
, MVT::v4f64
, Legal
);
1046 setOperationAction(ISD::FROUND
, MVT::v4f64
, Legal
);
1048 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
1049 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
1050 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
1051 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
1053 setOperationAction(ISD::FNEARBYINT
, MVT::v4f64
, Expand
);
1054 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Expand
);
1056 // These need to set FE_INEXACT, and so cannot be vectorized here.
1057 setOperationAction(ISD::FRINT
, MVT::v4f64
, Expand
);
1058 setOperationAction(ISD::FRINT
, MVT::v4f32
, Expand
);
1060 if (TM
.Options
.UnsafeFPMath
) {
1061 setOperationAction(ISD::FDIV
, MVT::v4f64
, Legal
);
1062 setOperationAction(ISD::FSQRT
, MVT::v4f64
, Legal
);
1064 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
1065 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
1067 setOperationAction(ISD::FDIV
, MVT::v4f64
, Expand
);
1068 setOperationAction(ISD::FSQRT
, MVT::v4f64
, Expand
);
1070 setOperationAction(ISD::FDIV
, MVT::v4f32
, Expand
);
1071 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Expand
);
1075 if (Subtarget
.has64BitSupport())
1076 setOperationAction(ISD::PREFETCH
, MVT::Other
, Legal
);
1078 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, isPPC64
? Legal
: Custom
);
1081 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i64
, Expand
);
1082 setOperationAction(ISD::ATOMIC_STORE
, MVT::i64
, Expand
);
1085 setBooleanContents(ZeroOrOneBooleanContent
);
1087 if (Subtarget
.hasAltivec()) {
1088 // Altivec instructions set fields to all zeros or all ones.
1089 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
1093 // These libcalls are not available in 32-bit.
1094 setLibcallName(RTLIB::SHL_I128
, nullptr);
1095 setLibcallName(RTLIB::SRL_I128
, nullptr);
1096 setLibcallName(RTLIB::SRA_I128
, nullptr);
1099 setStackPointerRegisterToSaveRestore(isPPC64
? PPC::X1
: PPC::R1
);
1101 // We have target-specific dag combine patterns for the following nodes:
1102 setTargetDAGCombine(ISD::ADD
);
1103 setTargetDAGCombine(ISD::SHL
);
1104 setTargetDAGCombine(ISD::SRA
);
1105 setTargetDAGCombine(ISD::SRL
);
1106 setTargetDAGCombine(ISD::MUL
);
1107 setTargetDAGCombine(ISD::SINT_TO_FP
);
1108 setTargetDAGCombine(ISD::BUILD_VECTOR
);
1109 if (Subtarget
.hasFPCVT())
1110 setTargetDAGCombine(ISD::UINT_TO_FP
);
1111 setTargetDAGCombine(ISD::LOAD
);
1112 setTargetDAGCombine(ISD::STORE
);
1113 setTargetDAGCombine(ISD::BR_CC
);
1114 if (Subtarget
.useCRBits())
1115 setTargetDAGCombine(ISD::BRCOND
);
1116 setTargetDAGCombine(ISD::BSWAP
);
1117 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN
);
1118 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN
);
1119 setTargetDAGCombine(ISD::INTRINSIC_VOID
);
1121 setTargetDAGCombine(ISD::SIGN_EXTEND
);
1122 setTargetDAGCombine(ISD::ZERO_EXTEND
);
1123 setTargetDAGCombine(ISD::ANY_EXTEND
);
1125 setTargetDAGCombine(ISD::TRUNCATE
);
1126 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
1129 if (Subtarget
.useCRBits()) {
1130 setTargetDAGCombine(ISD::TRUNCATE
);
1131 setTargetDAGCombine(ISD::SETCC
);
1132 setTargetDAGCombine(ISD::SELECT_CC
);
1135 // Use reciprocal estimates.
1136 if (TM
.Options
.UnsafeFPMath
) {
1137 setTargetDAGCombine(ISD::FDIV
);
1138 setTargetDAGCombine(ISD::FSQRT
);
1141 if (Subtarget
.hasP9Altivec()) {
1142 setTargetDAGCombine(ISD::ABS
);
1143 setTargetDAGCombine(ISD::VSELECT
);
1146 // Darwin long double math library functions have $LDBL128 appended.
1147 if (Subtarget
.isDarwin()) {
1148 setLibcallName(RTLIB::COS_PPCF128
, "cosl$LDBL128");
1149 setLibcallName(RTLIB::POW_PPCF128
, "powl$LDBL128");
1150 setLibcallName(RTLIB::REM_PPCF128
, "fmodl$LDBL128");
1151 setLibcallName(RTLIB::SIN_PPCF128
, "sinl$LDBL128");
1152 setLibcallName(RTLIB::SQRT_PPCF128
, "sqrtl$LDBL128");
1153 setLibcallName(RTLIB::LOG_PPCF128
, "logl$LDBL128");
1154 setLibcallName(RTLIB::LOG2_PPCF128
, "log2l$LDBL128");
1155 setLibcallName(RTLIB::LOG10_PPCF128
, "log10l$LDBL128");
1156 setLibcallName(RTLIB::EXP_PPCF128
, "expl$LDBL128");
1157 setLibcallName(RTLIB::EXP2_PPCF128
, "exp2l$LDBL128");
1160 if (EnableQuadPrecision
) {
1161 setLibcallName(RTLIB::LOG_F128
, "logf128");
1162 setLibcallName(RTLIB::LOG2_F128
, "log2f128");
1163 setLibcallName(RTLIB::LOG10_F128
, "log10f128");
1164 setLibcallName(RTLIB::EXP_F128
, "expf128");
1165 setLibcallName(RTLIB::EXP2_F128
, "exp2f128");
1166 setLibcallName(RTLIB::SIN_F128
, "sinf128");
1167 setLibcallName(RTLIB::COS_F128
, "cosf128");
1168 setLibcallName(RTLIB::POW_F128
, "powf128");
1169 setLibcallName(RTLIB::FMIN_F128
, "fminf128");
1170 setLibcallName(RTLIB::FMAX_F128
, "fmaxf128");
1171 setLibcallName(RTLIB::POWI_F128
, "__powikf2");
1172 setLibcallName(RTLIB::REM_F128
, "fmodf128");
1175 // With 32 condition bits, we don't need to sink (and duplicate) compares
1176 // aggressively in CodeGenPrep.
1177 if (Subtarget
.useCRBits()) {
1178 setHasMultipleConditionRegisters();
1179 setJumpIsExpensive();
1182 setMinFunctionAlignment(Align(4));
1183 if (Subtarget
.isDarwin())
1184 setPrefFunctionAlignment(Align(16));
1186 switch (Subtarget
.getDarwinDirective()) {
1191 case PPC::DIR_E500mc
:
1192 case PPC::DIR_E5500
:
1195 case PPC::DIR_PWR5X
:
1197 case PPC::DIR_PWR6X
:
1201 setPrefLoopAlignment(Align(16));
1202 setPrefFunctionAlignment(Align(16));
1206 if (Subtarget
.enableMachineScheduler())
1207 setSchedulingPreference(Sched::Source
);
1209 setSchedulingPreference(Sched::Hybrid
);
1211 computeRegisterProperties(STI
.getRegisterInfo());
1213 // The Freescale cores do better with aggressive inlining of memcpy and
1214 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1215 if (Subtarget
.getDarwinDirective() == PPC::DIR_E500mc
||
1216 Subtarget
.getDarwinDirective() == PPC::DIR_E5500
) {
1217 MaxStoresPerMemset
= 32;
1218 MaxStoresPerMemsetOptSize
= 16;
1219 MaxStoresPerMemcpy
= 32;
1220 MaxStoresPerMemcpyOptSize
= 8;
1221 MaxStoresPerMemmove
= 32;
1222 MaxStoresPerMemmoveOptSize
= 8;
1223 } else if (Subtarget
.getDarwinDirective() == PPC::DIR_A2
) {
1224 // The A2 also benefits from (very) aggressive inlining of memcpy and
1225 // friends. The overhead of a the function call, even when warm, can be
1226 // over one hundred cycles.
1227 MaxStoresPerMemset
= 128;
1228 MaxStoresPerMemcpy
= 128;
1229 MaxStoresPerMemmove
= 128;
1230 MaxLoadsPerMemcmp
= 128;
1232 MaxLoadsPerMemcmp
= 8;
1233 MaxLoadsPerMemcmpOptSize
= 4;
1237 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1238 /// the desired ByVal argument alignment.
1239 static void getMaxByValAlign(Type
*Ty
, unsigned &MaxAlign
,
1240 unsigned MaxMaxAlign
) {
1241 if (MaxAlign
== MaxMaxAlign
)
1243 if (VectorType
*VTy
= dyn_cast
<VectorType
>(Ty
)) {
1244 if (MaxMaxAlign
>= 32 && VTy
->getBitWidth() >= 256)
1246 else if (VTy
->getBitWidth() >= 128 && MaxAlign
< 16)
1248 } else if (ArrayType
*ATy
= dyn_cast
<ArrayType
>(Ty
)) {
1249 unsigned EltAlign
= 0;
1250 getMaxByValAlign(ATy
->getElementType(), EltAlign
, MaxMaxAlign
);
1251 if (EltAlign
> MaxAlign
)
1252 MaxAlign
= EltAlign
;
1253 } else if (StructType
*STy
= dyn_cast
<StructType
>(Ty
)) {
1254 for (auto *EltTy
: STy
->elements()) {
1255 unsigned EltAlign
= 0;
1256 getMaxByValAlign(EltTy
, EltAlign
, MaxMaxAlign
);
1257 if (EltAlign
> MaxAlign
)
1258 MaxAlign
= EltAlign
;
1259 if (MaxAlign
== MaxMaxAlign
)
1265 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1266 /// function arguments in the caller parameter area.
1267 unsigned PPCTargetLowering::getByValTypeAlignment(Type
*Ty
,
1268 const DataLayout
&DL
) const {
1269 // Darwin passes everything on 4 byte boundary.
1270 if (Subtarget
.isDarwin())
1273 // 16byte and wider vectors are passed on 16byte boundary.
1274 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1275 unsigned Align
= Subtarget
.isPPC64() ? 8 : 4;
1276 if (Subtarget
.hasAltivec() || Subtarget
.hasQPX())
1277 getMaxByValAlign(Ty
, Align
, Subtarget
.hasQPX() ? 32 : 16);
1281 bool PPCTargetLowering::useSoftFloat() const {
1282 return Subtarget
.useSoftFloat();
1285 bool PPCTargetLowering::hasSPE() const {
1286 return Subtarget
.hasSPE();
1289 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT
) const {
1290 return VT
.isScalarInteger();
1293 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode
) const {
1294 switch ((PPCISD::NodeType
)Opcode
) {
1295 case PPCISD::FIRST_NUMBER
: break;
1296 case PPCISD::FSEL
: return "PPCISD::FSEL";
1297 case PPCISD::FCFID
: return "PPCISD::FCFID";
1298 case PPCISD::FCFIDU
: return "PPCISD::FCFIDU";
1299 case PPCISD::FCFIDS
: return "PPCISD::FCFIDS";
1300 case PPCISD::FCFIDUS
: return "PPCISD::FCFIDUS";
1301 case PPCISD::FCTIDZ
: return "PPCISD::FCTIDZ";
1302 case PPCISD::FCTIWZ
: return "PPCISD::FCTIWZ";
1303 case PPCISD::FCTIDUZ
: return "PPCISD::FCTIDUZ";
1304 case PPCISD::FCTIWUZ
: return "PPCISD::FCTIWUZ";
1305 case PPCISD::FP_TO_UINT_IN_VSR
:
1306 return "PPCISD::FP_TO_UINT_IN_VSR,";
1307 case PPCISD::FP_TO_SINT_IN_VSR
:
1308 return "PPCISD::FP_TO_SINT_IN_VSR";
1309 case PPCISD::FRE
: return "PPCISD::FRE";
1310 case PPCISD::FRSQRTE
: return "PPCISD::FRSQRTE";
1311 case PPCISD::STFIWX
: return "PPCISD::STFIWX";
1312 case PPCISD::VMADDFP
: return "PPCISD::VMADDFP";
1313 case PPCISD::VNMSUBFP
: return "PPCISD::VNMSUBFP";
1314 case PPCISD::VPERM
: return "PPCISD::VPERM";
1315 case PPCISD::XXSPLT
: return "PPCISD::XXSPLT";
1316 case PPCISD::VECINSERT
: return "PPCISD::VECINSERT";
1317 case PPCISD::XXREVERSE
: return "PPCISD::XXREVERSE";
1318 case PPCISD::XXPERMDI
: return "PPCISD::XXPERMDI";
1319 case PPCISD::VECSHL
: return "PPCISD::VECSHL";
1320 case PPCISD::CMPB
: return "PPCISD::CMPB";
1321 case PPCISD::Hi
: return "PPCISD::Hi";
1322 case PPCISD::Lo
: return "PPCISD::Lo";
1323 case PPCISD::TOC_ENTRY
: return "PPCISD::TOC_ENTRY";
1324 case PPCISD::ATOMIC_CMP_SWAP_8
: return "PPCISD::ATOMIC_CMP_SWAP_8";
1325 case PPCISD::ATOMIC_CMP_SWAP_16
: return "PPCISD::ATOMIC_CMP_SWAP_16";
1326 case PPCISD::DYNALLOC
: return "PPCISD::DYNALLOC";
1327 case PPCISD::DYNAREAOFFSET
: return "PPCISD::DYNAREAOFFSET";
1328 case PPCISD::GlobalBaseReg
: return "PPCISD::GlobalBaseReg";
1329 case PPCISD::SRL
: return "PPCISD::SRL";
1330 case PPCISD::SRA
: return "PPCISD::SRA";
1331 case PPCISD::SHL
: return "PPCISD::SHL";
1332 case PPCISD::SRA_ADDZE
: return "PPCISD::SRA_ADDZE";
1333 case PPCISD::CALL
: return "PPCISD::CALL";
1334 case PPCISD::CALL_NOP
: return "PPCISD::CALL_NOP";
1335 case PPCISD::MTCTR
: return "PPCISD::MTCTR";
1336 case PPCISD::BCTRL
: return "PPCISD::BCTRL";
1337 case PPCISD::BCTRL_LOAD_TOC
: return "PPCISD::BCTRL_LOAD_TOC";
1338 case PPCISD::RET_FLAG
: return "PPCISD::RET_FLAG";
1339 case PPCISD::READ_TIME_BASE
: return "PPCISD::READ_TIME_BASE";
1340 case PPCISD::EH_SJLJ_SETJMP
: return "PPCISD::EH_SJLJ_SETJMP";
1341 case PPCISD::EH_SJLJ_LONGJMP
: return "PPCISD::EH_SJLJ_LONGJMP";
1342 case PPCISD::MFOCRF
: return "PPCISD::MFOCRF";
1343 case PPCISD::MFVSR
: return "PPCISD::MFVSR";
1344 case PPCISD::MTVSRA
: return "PPCISD::MTVSRA";
1345 case PPCISD::MTVSRZ
: return "PPCISD::MTVSRZ";
1346 case PPCISD::SINT_VEC_TO_FP
: return "PPCISD::SINT_VEC_TO_FP";
1347 case PPCISD::UINT_VEC_TO_FP
: return "PPCISD::UINT_VEC_TO_FP";
1348 case PPCISD::ANDIo_1_EQ_BIT
: return "PPCISD::ANDIo_1_EQ_BIT";
1349 case PPCISD::ANDIo_1_GT_BIT
: return "PPCISD::ANDIo_1_GT_BIT";
1350 case PPCISD::VCMP
: return "PPCISD::VCMP";
1351 case PPCISD::VCMPo
: return "PPCISD::VCMPo";
1352 case PPCISD::LBRX
: return "PPCISD::LBRX";
1353 case PPCISD::STBRX
: return "PPCISD::STBRX";
1354 case PPCISD::LFIWAX
: return "PPCISD::LFIWAX";
1355 case PPCISD::LFIWZX
: return "PPCISD::LFIWZX";
1356 case PPCISD::LXSIZX
: return "PPCISD::LXSIZX";
1357 case PPCISD::STXSIX
: return "PPCISD::STXSIX";
1358 case PPCISD::VEXTS
: return "PPCISD::VEXTS";
1359 case PPCISD::SExtVElems
: return "PPCISD::SExtVElems";
1360 case PPCISD::LXVD2X
: return "PPCISD::LXVD2X";
1361 case PPCISD::STXVD2X
: return "PPCISD::STXVD2X";
1362 case PPCISD::LOAD_VEC_BE
: return "PPCISD::LOAD_VEC_BE";
1363 case PPCISD::STORE_VEC_BE
: return "PPCISD::STORE_VEC_BE";
1364 case PPCISD::ST_VSR_SCAL_INT
:
1365 return "PPCISD::ST_VSR_SCAL_INT";
1366 case PPCISD::COND_BRANCH
: return "PPCISD::COND_BRANCH";
1367 case PPCISD::BDNZ
: return "PPCISD::BDNZ";
1368 case PPCISD::BDZ
: return "PPCISD::BDZ";
1369 case PPCISD::MFFS
: return "PPCISD::MFFS";
1370 case PPCISD::FADDRTZ
: return "PPCISD::FADDRTZ";
1371 case PPCISD::TC_RETURN
: return "PPCISD::TC_RETURN";
1372 case PPCISD::CR6SET
: return "PPCISD::CR6SET";
1373 case PPCISD::CR6UNSET
: return "PPCISD::CR6UNSET";
1374 case PPCISD::PPC32_GOT
: return "PPCISD::PPC32_GOT";
1375 case PPCISD::PPC32_PICGOT
: return "PPCISD::PPC32_PICGOT";
1376 case PPCISD::ADDIS_GOT_TPREL_HA
: return "PPCISD::ADDIS_GOT_TPREL_HA";
1377 case PPCISD::LD_GOT_TPREL_L
: return "PPCISD::LD_GOT_TPREL_L";
1378 case PPCISD::ADD_TLS
: return "PPCISD::ADD_TLS";
1379 case PPCISD::ADDIS_TLSGD_HA
: return "PPCISD::ADDIS_TLSGD_HA";
1380 case PPCISD::ADDI_TLSGD_L
: return "PPCISD::ADDI_TLSGD_L";
1381 case PPCISD::GET_TLS_ADDR
: return "PPCISD::GET_TLS_ADDR";
1382 case PPCISD::ADDI_TLSGD_L_ADDR
: return "PPCISD::ADDI_TLSGD_L_ADDR";
1383 case PPCISD::ADDIS_TLSLD_HA
: return "PPCISD::ADDIS_TLSLD_HA";
1384 case PPCISD::ADDI_TLSLD_L
: return "PPCISD::ADDI_TLSLD_L";
1385 case PPCISD::GET_TLSLD_ADDR
: return "PPCISD::GET_TLSLD_ADDR";
1386 case PPCISD::ADDI_TLSLD_L_ADDR
: return "PPCISD::ADDI_TLSLD_L_ADDR";
1387 case PPCISD::ADDIS_DTPREL_HA
: return "PPCISD::ADDIS_DTPREL_HA";
1388 case PPCISD::ADDI_DTPREL_L
: return "PPCISD::ADDI_DTPREL_L";
1389 case PPCISD::VADD_SPLAT
: return "PPCISD::VADD_SPLAT";
1390 case PPCISD::SC
: return "PPCISD::SC";
1391 case PPCISD::CLRBHRB
: return "PPCISD::CLRBHRB";
1392 case PPCISD::MFBHRBE
: return "PPCISD::MFBHRBE";
1393 case PPCISD::RFEBB
: return "PPCISD::RFEBB";
1394 case PPCISD::XXSWAPD
: return "PPCISD::XXSWAPD";
1395 case PPCISD::SWAP_NO_CHAIN
: return "PPCISD::SWAP_NO_CHAIN";
1396 case PPCISD::VABSD
: return "PPCISD::VABSD";
1397 case PPCISD::QVFPERM
: return "PPCISD::QVFPERM";
1398 case PPCISD::QVGPCI
: return "PPCISD::QVGPCI";
1399 case PPCISD::QVALIGNI
: return "PPCISD::QVALIGNI";
1400 case PPCISD::QVESPLATI
: return "PPCISD::QVESPLATI";
1401 case PPCISD::QBFLT
: return "PPCISD::QBFLT";
1402 case PPCISD::QVLFSb
: return "PPCISD::QVLFSb";
1403 case PPCISD::BUILD_FP128
: return "PPCISD::BUILD_FP128";
1404 case PPCISD::BUILD_SPE64
: return "PPCISD::BUILD_SPE64";
1405 case PPCISD::EXTRACT_SPE
: return "PPCISD::EXTRACT_SPE";
1406 case PPCISD::EXTSWSLI
: return "PPCISD::EXTSWSLI";
1407 case PPCISD::LD_VSX_LH
: return "PPCISD::LD_VSX_LH";
1408 case PPCISD::FP_EXTEND_HALF
: return "PPCISD::FP_EXTEND_HALF";
1409 case PPCISD::LD_SPLAT
: return "PPCISD::LD_SPLAT";
1414 EVT
PPCTargetLowering::getSetCCResultType(const DataLayout
&DL
, LLVMContext
&C
,
1417 return Subtarget
.useCRBits() ? MVT::i1
: MVT::i32
;
1419 if (Subtarget
.hasQPX())
1420 return EVT::getVectorVT(C
, MVT::i1
, VT
.getVectorNumElements());
1422 return VT
.changeVectorElementTypeToInteger();
1425 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT
) const {
1426 assert(VT
.isFloatingPoint() && "Non-floating-point FMA?");
1430 //===----------------------------------------------------------------------===//
1431 // Node matching predicates, for use by the tblgen matching code.
1432 //===----------------------------------------------------------------------===//
1434 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1435 static bool isFloatingPointZero(SDValue Op
) {
1436 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(Op
))
1437 return CFP
->getValueAPF().isZero();
1438 else if (ISD::isEXTLoad(Op
.getNode()) || ISD::isNON_EXTLoad(Op
.getNode())) {
1439 // Maybe this has already been legalized into the constant pool?
1440 if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(Op
.getOperand(1)))
1441 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CP
->getConstVal()))
1442 return CFP
->getValueAPF().isZero();
1447 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1448 /// true if Op is undef or if it matches the specified value.
1449 static bool isConstantOrUndef(int Op
, int Val
) {
1450 return Op
< 0 || Op
== Val
;
1453 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1454 /// VPKUHUM instruction.
1455 /// The ShuffleKind distinguishes between big-endian operations with
1456 /// two different inputs (0), either-endian operations with two identical
1457 /// inputs (1), and little-endian operations with two different inputs (2).
1458 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1459 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1460 SelectionDAG
&DAG
) {
1461 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1462 if (ShuffleKind
== 0) {
1465 for (unsigned i
= 0; i
!= 16; ++i
)
1466 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+1))
1468 } else if (ShuffleKind
== 2) {
1471 for (unsigned i
= 0; i
!= 16; ++i
)
1472 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2))
1474 } else if (ShuffleKind
== 1) {
1475 unsigned j
= IsLE
? 0 : 1;
1476 for (unsigned i
= 0; i
!= 8; ++i
)
1477 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1478 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
))
1484 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1485 /// VPKUWUM instruction.
1486 /// The ShuffleKind distinguishes between big-endian operations with
1487 /// two different inputs (0), either-endian operations with two identical
1488 /// inputs (1), and little-endian operations with two different inputs (2).
1489 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1490 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1491 SelectionDAG
&DAG
) {
1492 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1493 if (ShuffleKind
== 0) {
1496 for (unsigned i
= 0; i
!= 16; i
+= 2)
1497 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+2) ||
1498 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+3))
1500 } else if (ShuffleKind
== 2) {
1503 for (unsigned i
= 0; i
!= 16; i
+= 2)
1504 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1505 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1))
1507 } else if (ShuffleKind
== 1) {
1508 unsigned j
= IsLE
? 0 : 2;
1509 for (unsigned i
= 0; i
!= 8; i
+= 2)
1510 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1511 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1512 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1513 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1))
1519 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1520 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1521 /// current subtarget.
1523 /// The ShuffleKind distinguishes between big-endian operations with
1524 /// two different inputs (0), either-endian operations with two identical
1525 /// inputs (1), and little-endian operations with two different inputs (2).
1526 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1527 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1528 SelectionDAG
&DAG
) {
1529 const PPCSubtarget
& Subtarget
=
1530 static_cast<const PPCSubtarget
&>(DAG
.getSubtarget());
1531 if (!Subtarget
.hasP8Vector())
1534 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1535 if (ShuffleKind
== 0) {
1538 for (unsigned i
= 0; i
!= 16; i
+= 4)
1539 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+4) ||
1540 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+5) ||
1541 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+6) ||
1542 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+7))
1544 } else if (ShuffleKind
== 2) {
1547 for (unsigned i
= 0; i
!= 16; i
+= 4)
1548 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1549 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1) ||
1550 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+2) ||
1551 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+3))
1553 } else if (ShuffleKind
== 1) {
1554 unsigned j
= IsLE
? 0 : 4;
1555 for (unsigned i
= 0; i
!= 8; i
+= 4)
1556 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1557 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1558 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+j
+2) ||
1559 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+j
+3) ||
1560 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1561 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1) ||
1562 !isConstantOrUndef(N
->getMaskElt(i
+10), i
*2+j
+2) ||
1563 !isConstantOrUndef(N
->getMaskElt(i
+11), i
*2+j
+3))
1569 /// isVMerge - Common function, used to match vmrg* shuffles.
1571 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1572 unsigned LHSStart
, unsigned RHSStart
) {
1573 if (N
->getValueType(0) != MVT::v16i8
)
1575 assert((UnitSize
== 1 || UnitSize
== 2 || UnitSize
== 4) &&
1576 "Unsupported merge size!");
1578 for (unsigned i
= 0; i
!= 8/UnitSize
; ++i
) // Step over units
1579 for (unsigned j
= 0; j
!= UnitSize
; ++j
) { // Step over bytes within unit
1580 if (!isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+j
),
1581 LHSStart
+j
+i
*UnitSize
) ||
1582 !isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+UnitSize
+j
),
1583 RHSStart
+j
+i
*UnitSize
))
1589 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1590 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1591 /// The ShuffleKind distinguishes between big-endian merges with two
1592 /// different inputs (0), either-endian merges with two identical inputs (1),
1593 /// and little-endian merges with two different inputs (2). For the latter,
1594 /// the input operands are swapped (see PPCInstrAltivec.td).
1595 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1596 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1597 if (DAG
.getDataLayout().isLittleEndian()) {
1598 if (ShuffleKind
== 1) // unary
1599 return isVMerge(N
, UnitSize
, 0, 0);
1600 else if (ShuffleKind
== 2) // swapped
1601 return isVMerge(N
, UnitSize
, 0, 16);
1605 if (ShuffleKind
== 1) // unary
1606 return isVMerge(N
, UnitSize
, 8, 8);
1607 else if (ShuffleKind
== 0) // normal
1608 return isVMerge(N
, UnitSize
, 8, 24);
1614 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1615 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1616 /// The ShuffleKind distinguishes between big-endian merges with two
1617 /// different inputs (0), either-endian merges with two identical inputs (1),
1618 /// and little-endian merges with two different inputs (2). For the latter,
1619 /// the input operands are swapped (see PPCInstrAltivec.td).
1620 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1621 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1622 if (DAG
.getDataLayout().isLittleEndian()) {
1623 if (ShuffleKind
== 1) // unary
1624 return isVMerge(N
, UnitSize
, 8, 8);
1625 else if (ShuffleKind
== 2) // swapped
1626 return isVMerge(N
, UnitSize
, 8, 24);
1630 if (ShuffleKind
== 1) // unary
1631 return isVMerge(N
, UnitSize
, 0, 0);
1632 else if (ShuffleKind
== 0) // normal
1633 return isVMerge(N
, UnitSize
, 0, 16);
1640 * Common function used to match vmrgew and vmrgow shuffles
1642 * The indexOffset determines whether to look for even or odd words in
1643 * the shuffle mask. This is based on the of the endianness of the target
1646 * - Use offset of 0 to check for odd elements
1647 * - Use offset of 4 to check for even elements
1649 * - Use offset of 0 to check for even elements
1650 * - Use offset of 4 to check for odd elements
1651 * A detailed description of the vector element ordering for little endian and
1652 * big endian can be found at
1653 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1654 * Targeting your applications - what little endian and big endian IBM XL C/C++
1655 * compiler differences mean to you
1657 * The mask to the shuffle vector instruction specifies the indices of the
1658 * elements from the two input vectors to place in the result. The elements are
1659 * numbered in array-access order, starting with the first vector. These vectors
1660 * are always of type v16i8, thus each vector will contain 16 elements of size
1661 * 8. More info on the shuffle vector can be found in the
1662 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1663 * Language Reference.
1665 * The RHSStartValue indicates whether the same input vectors are used (unary)
1666 * or two different input vectors are used, based on the following:
1667 * - If the instruction uses the same vector for both inputs, the range of the
1668 * indices will be 0 to 15. In this case, the RHSStart value passed should
1670 * - If the instruction has two different vectors then the range of the
1671 * indices will be 0 to 31. In this case, the RHSStart value passed should
1672 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1673 * to 31 specify elements in the second vector).
1675 * \param[in] N The shuffle vector SD Node to analyze
1676 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1677 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1678 * vector to the shuffle_vector instruction
1679 * \return true iff this shuffle vector represents an even or odd word merge
1681 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned IndexOffset
,
1682 unsigned RHSStartValue
) {
1683 if (N
->getValueType(0) != MVT::v16i8
)
1686 for (unsigned i
= 0; i
< 2; ++i
)
1687 for (unsigned j
= 0; j
< 4; ++j
)
1688 if (!isConstantOrUndef(N
->getMaskElt(i
*4+j
),
1689 i
*RHSStartValue
+j
+IndexOffset
) ||
1690 !isConstantOrUndef(N
->getMaskElt(i
*4+j
+8),
1691 i
*RHSStartValue
+j
+IndexOffset
+8))
1697 * Determine if the specified shuffle mask is suitable for the vmrgew or
1698 * vmrgow instructions.
1700 * \param[in] N The shuffle vector SD Node to analyze
1701 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1702 * \param[in] ShuffleKind Identify the type of merge:
1703 * - 0 = big-endian merge with two different inputs;
1704 * - 1 = either-endian merge with two identical inputs;
1705 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1706 * little-endian merges).
1707 * \param[in] DAG The current SelectionDAG
1708 * \return true iff this shuffle mask
1710 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode
*N
, bool CheckEven
,
1711 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1712 if (DAG
.getDataLayout().isLittleEndian()) {
1713 unsigned indexOffset
= CheckEven
? 4 : 0;
1714 if (ShuffleKind
== 1) // Unary
1715 return isVMerge(N
, indexOffset
, 0);
1716 else if (ShuffleKind
== 2) // swapped
1717 return isVMerge(N
, indexOffset
, 16);
1722 unsigned indexOffset
= CheckEven
? 0 : 4;
1723 if (ShuffleKind
== 1) // Unary
1724 return isVMerge(N
, indexOffset
, 0);
1725 else if (ShuffleKind
== 0) // Normal
1726 return isVMerge(N
, indexOffset
, 16);
1733 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1734 /// amount, otherwise return -1.
1735 /// The ShuffleKind distinguishes between big-endian operations with two
1736 /// different inputs (0), either-endian operations with two identical inputs
1737 /// (1), and little-endian operations with two different inputs (2). For the
1738 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1739 int PPC::isVSLDOIShuffleMask(SDNode
*N
, unsigned ShuffleKind
,
1740 SelectionDAG
&DAG
) {
1741 if (N
->getValueType(0) != MVT::v16i8
)
1744 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
1746 // Find the first non-undef value in the shuffle mask.
1748 for (i
= 0; i
!= 16 && SVOp
->getMaskElt(i
) < 0; ++i
)
1751 if (i
== 16) return -1; // all undef.
1753 // Otherwise, check to see if the rest of the elements are consecutively
1754 // numbered from this value.
1755 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
1756 if (ShiftAmt
< i
) return -1;
1759 bool isLE
= DAG
.getDataLayout().isLittleEndian();
1761 if ((ShuffleKind
== 0 && !isLE
) || (ShuffleKind
== 2 && isLE
)) {
1762 // Check the rest of the elements to see if they are consecutive.
1763 for (++i
; i
!= 16; ++i
)
1764 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
1766 } else if (ShuffleKind
== 1) {
1767 // Check the rest of the elements to see if they are consecutive.
1768 for (++i
; i
!= 16; ++i
)
1769 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), (ShiftAmt
+i
) & 15))
1775 ShiftAmt
= 16 - ShiftAmt
;
1780 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1781 /// specifies a splat of a single element that is suitable for input to
1782 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
1783 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode
*N
, unsigned EltSize
) {
1784 assert(N
->getValueType(0) == MVT::v16i8
&& isPowerOf2_32(EltSize
) &&
1785 EltSize
<= 8 && "Can only handle 1,2,4,8 byte element sizes");
1787 // The consecutive indices need to specify an element, not part of two
1788 // different elements. So abandon ship early if this isn't the case.
1789 if (N
->getMaskElt(0) % EltSize
!= 0)
1792 // This is a splat operation if each element of the permute is the same, and
1793 // if the value doesn't reference the second vector.
1794 unsigned ElementBase
= N
->getMaskElt(0);
1796 // FIXME: Handle UNDEF elements too!
1797 if (ElementBase
>= 16)
1800 // Check that the indices are consecutive, in the case of a multi-byte element
1801 // splatted with a v16i8 mask.
1802 for (unsigned i
= 1; i
!= EltSize
; ++i
)
1803 if (N
->getMaskElt(i
) < 0 || N
->getMaskElt(i
) != (int)(i
+ElementBase
))
1806 for (unsigned i
= EltSize
, e
= 16; i
!= e
; i
+= EltSize
) {
1807 if (N
->getMaskElt(i
) < 0) continue;
1808 for (unsigned j
= 0; j
!= EltSize
; ++j
)
1809 if (N
->getMaskElt(i
+j
) != N
->getMaskElt(j
))
1815 /// Check that the mask is shuffling N byte elements. Within each N byte
1816 /// element of the mask, the indices could be either in increasing or
1817 /// decreasing order as long as they are consecutive.
1818 /// \param[in] N the shuffle vector SD Node to analyze
1819 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1820 /// Word/DoubleWord/QuadWord).
1821 /// \param[in] StepLen the delta indices number among the N byte element, if
1822 /// the mask is in increasing/decreasing order then it is 1/-1.
1823 /// \return true iff the mask is shuffling N byte elements.
1824 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*N
, unsigned Width
,
1826 assert((Width
== 2 || Width
== 4 || Width
== 8 || Width
== 16) &&
1827 "Unexpected element width.");
1828 assert((StepLen
== 1 || StepLen
== -1) && "Unexpected element width.");
1830 unsigned NumOfElem
= 16 / Width
;
1831 unsigned MaskVal
[16]; // Width is never greater than 16
1832 for (unsigned i
= 0; i
< NumOfElem
; ++i
) {
1833 MaskVal
[0] = N
->getMaskElt(i
* Width
);
1834 if ((StepLen
== 1) && (MaskVal
[0] % Width
)) {
1836 } else if ((StepLen
== -1) && ((MaskVal
[0] + 1) % Width
)) {
1840 for (unsigned int j
= 1; j
< Width
; ++j
) {
1841 MaskVal
[j
] = N
->getMaskElt(i
* Width
+ j
);
1842 if (MaskVal
[j
] != MaskVal
[j
-1] + StepLen
) {
1851 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
1852 unsigned &InsertAtByte
, bool &Swap
, bool IsLE
) {
1853 if (!isNByteElemShuffleMask(N
, 4, 1))
1856 // Now we look at mask elements 0,4,8,12
1857 unsigned M0
= N
->getMaskElt(0) / 4;
1858 unsigned M1
= N
->getMaskElt(4) / 4;
1859 unsigned M2
= N
->getMaskElt(8) / 4;
1860 unsigned M3
= N
->getMaskElt(12) / 4;
1861 unsigned LittleEndianShifts
[] = { 2, 1, 0, 3 };
1862 unsigned BigEndianShifts
[] = { 3, 0, 1, 2 };
1864 // Below, let H and L be arbitrary elements of the shuffle mask
1865 // where H is in the range [4,7] and L is in the range [0,3].
1866 // H, 1, 2, 3 or L, 5, 6, 7
1867 if ((M0
> 3 && M1
== 1 && M2
== 2 && M3
== 3) ||
1868 (M0
< 4 && M1
== 5 && M2
== 6 && M3
== 7)) {
1869 ShiftElts
= IsLE
? LittleEndianShifts
[M0
& 0x3] : BigEndianShifts
[M0
& 0x3];
1870 InsertAtByte
= IsLE
? 12 : 0;
1874 // 0, H, 2, 3 or 4, L, 6, 7
1875 if ((M1
> 3 && M0
== 0 && M2
== 2 && M3
== 3) ||
1876 (M1
< 4 && M0
== 4 && M2
== 6 && M3
== 7)) {
1877 ShiftElts
= IsLE
? LittleEndianShifts
[M1
& 0x3] : BigEndianShifts
[M1
& 0x3];
1878 InsertAtByte
= IsLE
? 8 : 4;
1882 // 0, 1, H, 3 or 4, 5, L, 7
1883 if ((M2
> 3 && M0
== 0 && M1
== 1 && M3
== 3) ||
1884 (M2
< 4 && M0
== 4 && M1
== 5 && M3
== 7)) {
1885 ShiftElts
= IsLE
? LittleEndianShifts
[M2
& 0x3] : BigEndianShifts
[M2
& 0x3];
1886 InsertAtByte
= IsLE
? 4 : 8;
1890 // 0, 1, 2, H or 4, 5, 6, L
1891 if ((M3
> 3 && M0
== 0 && M1
== 1 && M2
== 2) ||
1892 (M3
< 4 && M0
== 4 && M1
== 5 && M2
== 6)) {
1893 ShiftElts
= IsLE
? LittleEndianShifts
[M3
& 0x3] : BigEndianShifts
[M3
& 0x3];
1894 InsertAtByte
= IsLE
? 0 : 12;
1899 // If both vector operands for the shuffle are the same vector, the mask will
1900 // contain only elements from the first one and the second one will be undef.
1901 if (N
->getOperand(1).isUndef()) {
1904 unsigned XXINSERTWSrcElem
= IsLE
? 2 : 1;
1905 if (M0
== XXINSERTWSrcElem
&& M1
== 1 && M2
== 2 && M3
== 3) {
1906 InsertAtByte
= IsLE
? 12 : 0;
1909 if (M0
== 0 && M1
== XXINSERTWSrcElem
&& M2
== 2 && M3
== 3) {
1910 InsertAtByte
= IsLE
? 8 : 4;
1913 if (M0
== 0 && M1
== 1 && M2
== XXINSERTWSrcElem
&& M3
== 3) {
1914 InsertAtByte
= IsLE
? 4 : 8;
1917 if (M0
== 0 && M1
== 1 && M2
== 2 && M3
== XXINSERTWSrcElem
) {
1918 InsertAtByte
= IsLE
? 0 : 12;
1926 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
1927 bool &Swap
, bool IsLE
) {
1928 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
1929 // Ensure each byte index of the word is consecutive.
1930 if (!isNByteElemShuffleMask(N
, 4, 1))
1933 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1934 unsigned M0
= N
->getMaskElt(0) / 4;
1935 unsigned M1
= N
->getMaskElt(4) / 4;
1936 unsigned M2
= N
->getMaskElt(8) / 4;
1937 unsigned M3
= N
->getMaskElt(12) / 4;
1939 // If both vector operands for the shuffle are the same vector, the mask will
1940 // contain only elements from the first one and the second one will be undef.
1941 if (N
->getOperand(1).isUndef()) {
1942 assert(M0
< 4 && "Indexing into an undef vector?");
1943 if (M1
!= (M0
+ 1) % 4 || M2
!= (M1
+ 1) % 4 || M3
!= (M2
+ 1) % 4)
1946 ShiftElts
= IsLE
? (4 - M0
) % 4 : M0
;
1951 // Ensure each word index of the ShuffleVector Mask is consecutive.
1952 if (M1
!= (M0
+ 1) % 8 || M2
!= (M1
+ 1) % 8 || M3
!= (M2
+ 1) % 8)
1956 if (M0
== 0 || M0
== 7 || M0
== 6 || M0
== 5) {
1957 // Input vectors don't need to be swapped if the leading element
1958 // of the result is one of the 3 left elements of the second vector
1959 // (or if there is no shift to be done at all).
1961 ShiftElts
= (8 - M0
) % 8;
1962 } else if (M0
== 4 || M0
== 3 || M0
== 2 || M0
== 1) {
1963 // Input vectors need to be swapped if the leading element
1964 // of the result is one of the 3 left elements of the first vector
1965 // (or if we're shifting by 4 - thereby simply swapping the vectors).
1967 ShiftElts
= (4 - M0
) % 4;
1972 if (M0
== 0 || M0
== 1 || M0
== 2 || M0
== 3) {
1973 // Input vectors don't need to be swapped if the leading element
1974 // of the result is one of the 4 elements of the first vector.
1977 } else if (M0
== 4 || M0
== 5 || M0
== 6 || M0
== 7) {
1978 // Input vectors need to be swapped if the leading element
1979 // of the result is one of the 4 elements of the right vector.
1988 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode
*N
, int Width
) {
1989 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
1991 if (!isNByteElemShuffleMask(N
, Width
, -1))
1994 for (int i
= 0; i
< 16; i
+= Width
)
1995 if (N
->getMaskElt(i
) != i
+ Width
- 1)
2001 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode
*N
) {
2002 return isXXBRShuffleMaskHelper(N
, 2);
2005 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode
*N
) {
2006 return isXXBRShuffleMaskHelper(N
, 4);
2009 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode
*N
) {
2010 return isXXBRShuffleMaskHelper(N
, 8);
2013 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode
*N
) {
2014 return isXXBRShuffleMaskHelper(N
, 16);
2017 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2018 /// if the inputs to the instruction should be swapped and set \p DM to the
2019 /// value for the immediate.
2020 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2021 /// AND element 0 of the result comes from the first input (LE) or second input
2022 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2023 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2025 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &DM
,
2026 bool &Swap
, bool IsLE
) {
2027 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
2029 // Ensure each byte index of the double word is consecutive.
2030 if (!isNByteElemShuffleMask(N
, 8, 1))
2033 unsigned M0
= N
->getMaskElt(0) / 8;
2034 unsigned M1
= N
->getMaskElt(8) / 8;
2035 assert(((M0
| M1
) < 4) && "A mask element out of bounds?");
2037 // If both vector operands for the shuffle are the same vector, the mask will
2038 // contain only elements from the first one and the second one will be undef.
2039 if (N
->getOperand(1).isUndef()) {
2040 if ((M0
| M1
) < 2) {
2041 DM
= IsLE
? (((~M1
) & 1) << 1) + ((~M0
) & 1) : (M0
<< 1) + (M1
& 1);
2049 if (M0
> 1 && M1
< 2) {
2051 } else if (M0
< 2 && M1
> 1) {
2058 // Note: if control flow comes here that means Swap is already set above
2059 DM
= (((~M1
) & 1) << 1) + ((~M0
) & 1);
2062 if (M0
< 2 && M1
> 1) {
2064 } else if (M0
> 1 && M1
< 2) {
2071 // Note: if control flow comes here that means Swap is already set above
2072 DM
= (M0
<< 1) + (M1
& 1);
2078 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2079 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2080 /// elements are counted from the left of the vector register).
2081 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode
*N
, unsigned EltSize
,
2082 SelectionDAG
&DAG
) {
2083 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2084 assert(isSplatShuffleMask(SVOp
, EltSize
));
2085 if (DAG
.getDataLayout().isLittleEndian())
2086 return (16 / EltSize
) - 1 - (SVOp
->getMaskElt(0) / EltSize
);
2088 return SVOp
->getMaskElt(0) / EltSize
;
2091 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2092 /// by using a vspltis[bhw] instruction of the specified element size, return
2093 /// the constant being splatted. The ByteSize field indicates the number of
2094 /// bytes of each element [124] -> [bhw].
2095 SDValue
PPC::get_VSPLTI_elt(SDNode
*N
, unsigned ByteSize
, SelectionDAG
&DAG
) {
2096 SDValue
OpVal(nullptr, 0);
2098 // If ByteSize of the splat is bigger than the element size of the
2099 // build_vector, then we have a case where we are checking for a splat where
2100 // multiple elements of the buildvector are folded together into a single
2101 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2102 unsigned EltSize
= 16/N
->getNumOperands();
2103 if (EltSize
< ByteSize
) {
2104 unsigned Multiple
= ByteSize
/EltSize
; // Number of BV entries per spltval.
2105 SDValue UniquedVals
[4];
2106 assert(Multiple
> 1 && Multiple
<= 4 && "How can this happen?");
2108 // See if all of the elements in the buildvector agree across.
2109 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2110 if (N
->getOperand(i
).isUndef()) continue;
2111 // If the element isn't a constant, bail fully out.
2112 if (!isa
<ConstantSDNode
>(N
->getOperand(i
))) return SDValue();
2114 if (!UniquedVals
[i
&(Multiple
-1)].getNode())
2115 UniquedVals
[i
&(Multiple
-1)] = N
->getOperand(i
);
2116 else if (UniquedVals
[i
&(Multiple
-1)] != N
->getOperand(i
))
2117 return SDValue(); // no match.
2120 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2121 // either constant or undef values that are identical for each chunk. See
2122 // if these chunks can form into a larger vspltis*.
2124 // Check to see if all of the leading entries are either 0 or -1. If
2125 // neither, then this won't fit into the immediate field.
2126 bool LeadingZero
= true;
2127 bool LeadingOnes
= true;
2128 for (unsigned i
= 0; i
!= Multiple
-1; ++i
) {
2129 if (!UniquedVals
[i
].getNode()) continue; // Must have been undefs.
2131 LeadingZero
&= isNullConstant(UniquedVals
[i
]);
2132 LeadingOnes
&= isAllOnesConstant(UniquedVals
[i
]);
2134 // Finally, check the least significant entry.
2136 if (!UniquedVals
[Multiple
-1].getNode())
2137 return DAG
.getTargetConstant(0, SDLoc(N
), MVT::i32
); // 0,0,0,undef
2138 int Val
= cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getZExtValue();
2139 if (Val
< 16) // 0,0,0,4 -> vspltisw(4)
2140 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2143 if (!UniquedVals
[Multiple
-1].getNode())
2144 return DAG
.getTargetConstant(~0U, SDLoc(N
), MVT::i32
); // -1,-1,-1,undef
2145 int Val
=cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getSExtValue();
2146 if (Val
>= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2147 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2153 // Check to see if this buildvec has a single non-undef value in its elements.
2154 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2155 if (N
->getOperand(i
).isUndef()) continue;
2156 if (!OpVal
.getNode())
2157 OpVal
= N
->getOperand(i
);
2158 else if (OpVal
!= N
->getOperand(i
))
2162 if (!OpVal
.getNode()) return SDValue(); // All UNDEF: use implicit def.
2164 unsigned ValSizeInBytes
= EltSize
;
2166 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(OpVal
)) {
2167 Value
= CN
->getZExtValue();
2168 } else if (ConstantFPSDNode
*CN
= dyn_cast
<ConstantFPSDNode
>(OpVal
)) {
2169 assert(CN
->getValueType(0) == MVT::f32
&& "Only one legal FP vector type!");
2170 Value
= FloatToBits(CN
->getValueAPF().convertToFloat());
2173 // If the splat value is larger than the element value, then we can never do
2174 // this splat. The only case that we could fit the replicated bits into our
2175 // immediate field for would be zero, and we prefer to use vxor for it.
2176 if (ValSizeInBytes
< ByteSize
) return SDValue();
2178 // If the element value is larger than the splat value, check if it consists
2179 // of a repeated bit pattern of size ByteSize.
2180 if (!APInt(ValSizeInBytes
* 8, Value
).isSplat(ByteSize
* 8))
2183 // Properly sign extend the value.
2184 int MaskVal
= SignExtend32(Value
, ByteSize
* 8);
2186 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2187 if (MaskVal
== 0) return SDValue();
2189 // Finally, if this value fits in a 5 bit sext field, return it
2190 if (SignExtend32
<5>(MaskVal
) == MaskVal
)
2191 return DAG
.getTargetConstant(MaskVal
, SDLoc(N
), MVT::i32
);
2195 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2196 /// amount, otherwise return -1.
2197 int PPC::isQVALIGNIShuffleMask(SDNode
*N
) {
2198 EVT VT
= N
->getValueType(0);
2199 if (VT
!= MVT::v4f64
&& VT
!= MVT::v4f32
&& VT
!= MVT::v4i1
)
2202 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2204 // Find the first non-undef value in the shuffle mask.
2206 for (i
= 0; i
!= 4 && SVOp
->getMaskElt(i
) < 0; ++i
)
2209 if (i
== 4) return -1; // all undef.
2211 // Otherwise, check to see if the rest of the elements are consecutively
2212 // numbered from this value.
2213 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
2214 if (ShiftAmt
< i
) return -1;
2217 // Check the rest of the elements to see if they are consecutive.
2218 for (++i
; i
!= 4; ++i
)
2219 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
2225 //===----------------------------------------------------------------------===//
2226 // Addressing Mode Selection
2227 //===----------------------------------------------------------------------===//
2229 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2230 /// or 64-bit immediate, and if the value can be accurately represented as a
2231 /// sign extension from a 16-bit value. If so, this returns true and the
2233 bool llvm::isIntS16Immediate(SDNode
*N
, int16_t &Imm
) {
2234 if (!isa
<ConstantSDNode
>(N
))
2237 Imm
= (int16_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2238 if (N
->getValueType(0) == MVT::i32
)
2239 return Imm
== (int32_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2241 return Imm
== (int64_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2243 bool llvm::isIntS16Immediate(SDValue Op
, int16_t &Imm
) {
2244 return isIntS16Immediate(Op
.getNode(), Imm
);
2248 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2249 /// be represented as an indexed [r+r] operation.
2250 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N
, SDValue
&Base
,
2252 SelectionDAG
&DAG
) const {
2253 for (SDNode::use_iterator UI
= N
->use_begin(), E
= N
->use_end();
2255 if (MemSDNode
*Memop
= dyn_cast
<MemSDNode
>(*UI
)) {
2256 if (Memop
->getMemoryVT() == MVT::f64
) {
2257 Base
= N
.getOperand(0);
2258 Index
= N
.getOperand(1);
2266 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2267 /// can be represented as an indexed [r+r] operation. Returns false if it
2268 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2269 /// non-zero and N can be represented by a base register plus a signed 16-bit
2270 /// displacement, make a more precise judgement by checking (displacement % \p
2271 /// EncodingAlignment).
2272 bool PPCTargetLowering::SelectAddressRegReg(SDValue N
, SDValue
&Base
,
2273 SDValue
&Index
, SelectionDAG
&DAG
,
2274 unsigned EncodingAlignment
) const {
2276 if (N
.getOpcode() == ISD::ADD
) {
2277 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2278 // SPE load/store can only handle 8-bit offsets.
2279 if (hasSPE() && SelectAddressEVXRegReg(N
, Base
, Index
, DAG
))
2281 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2282 (!EncodingAlignment
|| !(imm
% EncodingAlignment
)))
2283 return false; // r+i
2284 if (N
.getOperand(1).getOpcode() == PPCISD::Lo
)
2285 return false; // r+i
2287 Base
= N
.getOperand(0);
2288 Index
= N
.getOperand(1);
2290 } else if (N
.getOpcode() == ISD::OR
) {
2291 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2292 (!EncodingAlignment
|| !(imm
% EncodingAlignment
)))
2293 return false; // r+i can fold it if we can.
2295 // If this is an or of disjoint bitfields, we can codegen this as an add
2296 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2298 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2300 if (LHSKnown
.Zero
.getBoolValue()) {
2301 KnownBits RHSKnown
= DAG
.computeKnownBits(N
.getOperand(1));
2302 // If all of the bits are known zero on the LHS or RHS, the add won't
2304 if (~(LHSKnown
.Zero
| RHSKnown
.Zero
) == 0) {
2305 Base
= N
.getOperand(0);
2306 Index
= N
.getOperand(1);
2315 // If we happen to be doing an i64 load or store into a stack slot that has
2316 // less than a 4-byte alignment, then the frame-index elimination may need to
2317 // use an indexed load or store instruction (because the offset may not be a
2318 // multiple of 4). The extra register needed to hold the offset comes from the
2319 // register scavenger, and it is possible that the scavenger will need to use
2320 // an emergency spill slot. As a result, we need to make sure that a spill slot
2321 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2323 static void fixupFuncForFI(SelectionDAG
&DAG
, int FrameIdx
, EVT VT
) {
2324 // FIXME: This does not handle the LWA case.
2328 // NOTE: We'll exclude negative FIs here, which come from argument
2329 // lowering, because there are no known test cases triggering this problem
2330 // using packed structures (or similar). We can remove this exclusion if
2331 // we find such a test case. The reason why this is so test-case driven is
2332 // because this entire 'fixup' is only to prevent crashes (from the
2333 // register scavenger) on not-really-valid inputs. For example, if we have:
2335 // %b = bitcast i1* %a to i64*
2336 // store i64* a, i64 b
2337 // then the store should really be marked as 'align 1', but is not. If it
2338 // were marked as 'align 1' then the indexed form would have been
2339 // instruction-selected initially, and the problem this 'fixup' is preventing
2340 // won't happen regardless.
2344 MachineFunction
&MF
= DAG
.getMachineFunction();
2345 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2347 unsigned Align
= MFI
.getObjectAlignment(FrameIdx
);
2351 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
2352 FuncInfo
->setHasNonRISpills();
2355 /// Returns true if the address N can be represented by a base register plus
2356 /// a signed 16-bit displacement [r+imm], and if it is not better
2357 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2358 /// displacements that are multiples of that value.
2359 bool PPCTargetLowering::SelectAddressRegImm(SDValue N
, SDValue
&Disp
,
2362 unsigned EncodingAlignment
) const {
2363 // FIXME dl should come from parent load or store, not from address
2365 // If this can be more profitably realized as r+r, fail.
2366 if (SelectAddressRegReg(N
, Disp
, Base
, DAG
, EncodingAlignment
))
2369 if (N
.getOpcode() == ISD::ADD
) {
2371 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2372 (!EncodingAlignment
|| (imm
% EncodingAlignment
) == 0)) {
2373 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2374 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2375 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2376 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2378 Base
= N
.getOperand(0);
2380 return true; // [r+i]
2381 } else if (N
.getOperand(1).getOpcode() == PPCISD::Lo
) {
2382 // Match LOAD (ADD (X, Lo(G))).
2383 assert(!cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))->getZExtValue()
2384 && "Cannot handle constant offsets yet!");
2385 Disp
= N
.getOperand(1).getOperand(0); // The global address.
2386 assert(Disp
.getOpcode() == ISD::TargetGlobalAddress
||
2387 Disp
.getOpcode() == ISD::TargetGlobalTLSAddress
||
2388 Disp
.getOpcode() == ISD::TargetConstantPool
||
2389 Disp
.getOpcode() == ISD::TargetJumpTable
);
2390 Base
= N
.getOperand(0);
2391 return true; // [&g+r]
2393 } else if (N
.getOpcode() == ISD::OR
) {
2395 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2396 (!EncodingAlignment
|| (imm
% EncodingAlignment
) == 0)) {
2397 // If this is an or of disjoint bitfields, we can codegen this as an add
2398 // (for better address arithmetic) if the LHS and RHS of the OR are
2399 // provably disjoint.
2400 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2402 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)imm
) == ~0ULL) {
2403 // If all of the bits are known zero on the LHS or RHS, the add won't
2405 if (FrameIndexSDNode
*FI
=
2406 dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2407 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2408 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2410 Base
= N
.getOperand(0);
2412 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2416 } else if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
)) {
2417 // Loading from a constant address.
2419 // If this address fits entirely in a 16-bit sext immediate field, codegen
2422 if (isIntS16Immediate(CN
, Imm
) &&
2423 (!EncodingAlignment
|| (Imm
% EncodingAlignment
) == 0)) {
2424 Disp
= DAG
.getTargetConstant(Imm
, dl
, CN
->getValueType(0));
2425 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2426 CN
->getValueType(0));
2430 // Handle 32-bit sext immediates with LIS + addr mode.
2431 if ((CN
->getValueType(0) == MVT::i32
||
2432 (int64_t)CN
->getZExtValue() == (int)CN
->getZExtValue()) &&
2433 (!EncodingAlignment
|| (CN
->getZExtValue() % EncodingAlignment
) == 0)) {
2434 int Addr
= (int)CN
->getZExtValue();
2436 // Otherwise, break this down into an LIS + disp.
2437 Disp
= DAG
.getTargetConstant((short)Addr
, dl
, MVT::i32
);
2439 Base
= DAG
.getTargetConstant((Addr
- (signed short)Addr
) >> 16, dl
,
2441 unsigned Opc
= CN
->getValueType(0) == MVT::i32
? PPC::LIS
: PPC::LIS8
;
2442 Base
= SDValue(DAG
.getMachineNode(Opc
, dl
, CN
->getValueType(0), Base
), 0);
2447 Disp
= DAG
.getTargetConstant(0, dl
, getPointerTy(DAG
.getDataLayout()));
2448 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
)) {
2449 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2450 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2453 return true; // [r+0]
2456 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2457 /// represented as an indexed [r+r] operation.
2458 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N
, SDValue
&Base
,
2460 SelectionDAG
&DAG
) const {
2461 // Check to see if we can easily represent this as an [r+r] address. This
2462 // will fail if it thinks that the address is more profitably represented as
2463 // reg+imm, e.g. where imm = 0.
2464 if (SelectAddressRegReg(N
, Base
, Index
, DAG
))
2467 // If the address is the result of an add, we will utilize the fact that the
2468 // address calculation includes an implicit add. However, we can reduce
2469 // register pressure if we do not materialize a constant just for use as the
2470 // index register. We only get rid of the add if it is not an add of a
2471 // value and a 16-bit signed constant and both have a single use.
2473 if (N
.getOpcode() == ISD::ADD
&&
2474 (!isIntS16Immediate(N
.getOperand(1), imm
) ||
2475 !N
.getOperand(1).hasOneUse() || !N
.getOperand(0).hasOneUse())) {
2476 Base
= N
.getOperand(0);
2477 Index
= N
.getOperand(1);
2481 // Otherwise, do it the hard way, using R0 as the base register.
2482 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2488 /// Returns true if we should use a direct load into vector instruction
2489 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2490 static bool usePartialVectorLoads(SDNode
*N
, const PPCSubtarget
& ST
) {
2492 // If there are any other uses other than scalar to vector, then we should
2493 // keep it as a scalar load -> direct move pattern to prevent multiple
2495 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
);
2499 EVT MemVT
= LD
->getMemoryVT();
2500 if (!MemVT
.isSimple())
2502 switch(MemVT
.getSimpleVT().SimpleTy
) {
2506 if (!ST
.hasP8Vector())
2511 if (!ST
.hasP9Vector())
2518 SDValue
LoadedVal(N
, 0);
2519 if (!LoadedVal
.hasOneUse())
2522 for (SDNode::use_iterator UI
= LD
->use_begin(), UE
= LD
->use_end();
2524 if (UI
.getUse().get().getResNo() == 0 &&
2525 UI
->getOpcode() != ISD::SCALAR_TO_VECTOR
)
2531 /// getPreIndexedAddressParts - returns true by value, base pointer and
2532 /// offset pointer and addressing mode by reference if the node's address
2533 /// can be legally represented as pre-indexed load / store address.
2534 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
2536 ISD::MemIndexedMode
&AM
,
2537 SelectionDAG
&DAG
) const {
2538 if (DisablePPCPreinc
) return false;
2544 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
2545 Ptr
= LD
->getBasePtr();
2546 VT
= LD
->getMemoryVT();
2547 Alignment
= LD
->getAlignment();
2548 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
2549 Ptr
= ST
->getBasePtr();
2550 VT
= ST
->getMemoryVT();
2551 Alignment
= ST
->getAlignment();
2556 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2557 // instructions because we can fold these into a more efficient instruction
2558 // instead, (such as LXSD).
2559 if (isLoad
&& usePartialVectorLoads(N
, Subtarget
)) {
2563 // PowerPC doesn't have preinc load/store instructions for vectors (except
2564 // for QPX, which does have preinc r+r forms).
2565 if (VT
.isVector()) {
2566 if (!Subtarget
.hasQPX() || (VT
!= MVT::v4f64
&& VT
!= MVT::v4f32
)) {
2568 } else if (SelectAddressRegRegOnly(Ptr
, Offset
, Base
, DAG
)) {
2574 if (SelectAddressRegReg(Ptr
, Base
, Offset
, DAG
)) {
2575 // Common code will reject creating a pre-inc form if the base pointer
2576 // is a frame index, or if N is a store and the base pointer is either
2577 // the same as or a predecessor of the value being stored. Check for
2578 // those situations here, and try with swapped Base/Offset instead.
2581 if (isa
<FrameIndexSDNode
>(Base
) || isa
<RegisterSDNode
>(Base
))
2584 SDValue Val
= cast
<StoreSDNode
>(N
)->getValue();
2585 if (Val
== Base
|| Base
.getNode()->isPredecessorOf(Val
.getNode()))
2590 std::swap(Base
, Offset
);
2596 // LDU/STU can only handle immediates that are a multiple of 4.
2597 if (VT
!= MVT::i64
) {
2598 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, 0))
2601 // LDU/STU need an address with at least 4-byte alignment.
2605 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, 4))
2609 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
2610 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2611 // sext i32 to i64 when addr mode is r+i.
2612 if (LD
->getValueType(0) == MVT::i64
&& LD
->getMemoryVT() == MVT::i32
&&
2613 LD
->getExtensionType() == ISD::SEXTLOAD
&&
2614 isa
<ConstantSDNode
>(Offset
))
2622 //===----------------------------------------------------------------------===//
2623 // LowerOperation implementation
2624 //===----------------------------------------------------------------------===//
2626 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2627 /// and LoOpFlags to the target MO flags.
2628 static void getLabelAccessInfo(bool IsPIC
, const PPCSubtarget
&Subtarget
,
2629 unsigned &HiOpFlags
, unsigned &LoOpFlags
,
2630 const GlobalValue
*GV
= nullptr) {
2631 HiOpFlags
= PPCII::MO_HA
;
2632 LoOpFlags
= PPCII::MO_LO
;
2634 // Don't use the pic base if not in PIC relocation model.
2636 HiOpFlags
|= PPCII::MO_PIC_FLAG
;
2637 LoOpFlags
|= PPCII::MO_PIC_FLAG
;
2640 // If this is a reference to a global value that requires a non-lazy-ptr, make
2641 // sure that instruction lowering adds it.
2642 if (GV
&& Subtarget
.hasLazyResolverStub(GV
)) {
2643 HiOpFlags
|= PPCII::MO_NLP_FLAG
;
2644 LoOpFlags
|= PPCII::MO_NLP_FLAG
;
2646 if (GV
->hasHiddenVisibility()) {
2647 HiOpFlags
|= PPCII::MO_NLP_HIDDEN_FLAG
;
2648 LoOpFlags
|= PPCII::MO_NLP_HIDDEN_FLAG
;
2653 static SDValue
LowerLabelRef(SDValue HiPart
, SDValue LoPart
, bool isPIC
,
2654 SelectionDAG
&DAG
) {
2656 EVT PtrVT
= HiPart
.getValueType();
2657 SDValue Zero
= DAG
.getConstant(0, DL
, PtrVT
);
2659 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, DL
, PtrVT
, HiPart
, Zero
);
2660 SDValue Lo
= DAG
.getNode(PPCISD::Lo
, DL
, PtrVT
, LoPart
, Zero
);
2662 // With PIC, the first instruction is actually "GR+hi(&G)".
2664 Hi
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
2665 DAG
.getNode(PPCISD::GlobalBaseReg
, DL
, PtrVT
), Hi
);
2667 // Generate non-pic code that has direct accesses to the constant pool.
2668 // The address of the global is just (hi(&g)+lo(&g)).
2669 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Hi
, Lo
);
2672 static void setUsesTOCBasePtr(MachineFunction
&MF
) {
2673 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
2674 FuncInfo
->setUsesTOCBasePtr();
2677 static void setUsesTOCBasePtr(SelectionDAG
&DAG
) {
2678 setUsesTOCBasePtr(DAG
.getMachineFunction());
2681 SDValue
PPCTargetLowering::getTOCEntry(SelectionDAG
&DAG
, const SDLoc
&dl
,
2683 const bool Is64Bit
= Subtarget
.isPPC64();
2684 EVT VT
= Is64Bit
? MVT::i64
: MVT::i32
;
2685 SDValue Reg
= Is64Bit
? DAG
.getRegister(PPC::X2
, VT
)
2686 : Subtarget
.isAIXABI()
2687 ? DAG
.getRegister(PPC::R2
, VT
)
2688 : DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, VT
);
2689 SDValue Ops
[] = { GA
, Reg
};
2690 return DAG
.getMemIntrinsicNode(
2691 PPCISD::TOC_ENTRY
, dl
, DAG
.getVTList(VT
, MVT::Other
), Ops
, VT
,
2692 MachinePointerInfo::getGOT(DAG
.getMachineFunction()), 0,
2693 MachineMemOperand::MOLoad
);
2696 SDValue
PPCTargetLowering::LowerConstantPool(SDValue Op
,
2697 SelectionDAG
&DAG
) const {
2698 EVT PtrVT
= Op
.getValueType();
2699 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
2700 const Constant
*C
= CP
->getConstVal();
2702 // 64-bit SVR4 ABI code is always position-independent.
2703 // The actual address of the GlobalValue is stored in the TOC.
2704 if (Subtarget
.is64BitELFABI()) {
2705 setUsesTOCBasePtr(DAG
);
2706 SDValue GA
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0);
2707 return getTOCEntry(DAG
, SDLoc(CP
), GA
);
2710 unsigned MOHiFlag
, MOLoFlag
;
2711 bool IsPIC
= isPositionIndependent();
2712 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2714 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2715 SDValue GA
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(),
2716 PPCII::MO_PIC_FLAG
);
2717 return getTOCEntry(DAG
, SDLoc(CP
), GA
);
2721 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0, MOHiFlag
);
2723 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0, MOLoFlag
);
2724 return LowerLabelRef(CPIHi
, CPILo
, IsPIC
, DAG
);
2727 // For 64-bit PowerPC, prefer the more compact relative encodings.
2728 // This trades 32 bits per jump table entry for one or two instructions
2729 // on the jump site.
2730 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2731 if (isJumpTableRelative())
2732 return MachineJumpTableInfo::EK_LabelDifference32
;
2734 return TargetLowering::getJumpTableEncoding();
2737 bool PPCTargetLowering::isJumpTableRelative() const {
2738 if (Subtarget
.isPPC64())
2740 return TargetLowering::isJumpTableRelative();
2743 SDValue
PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table
,
2744 SelectionDAG
&DAG
) const {
2745 if (!Subtarget
.isPPC64())
2746 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
2748 switch (getTargetMachine().getCodeModel()) {
2749 case CodeModel::Small
:
2750 case CodeModel::Medium
:
2751 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
2753 return DAG
.getNode(PPCISD::GlobalBaseReg
, SDLoc(),
2754 getPointerTy(DAG
.getDataLayout()));
2759 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction
*MF
,
2761 MCContext
&Ctx
) const {
2762 if (!Subtarget
.isPPC64())
2763 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
2765 switch (getTargetMachine().getCodeModel()) {
2766 case CodeModel::Small
:
2767 case CodeModel::Medium
:
2768 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
2770 return MCSymbolRefExpr::create(MF
->getPICBaseSymbol(), Ctx
);
2774 SDValue
PPCTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
2775 EVT PtrVT
= Op
.getValueType();
2776 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
2778 // 64-bit SVR4 ABI code is always position-independent.
2779 // The actual address of the GlobalValue is stored in the TOC.
2780 if (Subtarget
.is64BitELFABI()) {
2781 setUsesTOCBasePtr(DAG
);
2782 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
2783 return getTOCEntry(DAG
, SDLoc(JT
), GA
);
2786 unsigned MOHiFlag
, MOLoFlag
;
2787 bool IsPIC
= isPositionIndependent();
2788 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2790 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2791 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
,
2792 PPCII::MO_PIC_FLAG
);
2793 return getTOCEntry(DAG
, SDLoc(GA
), GA
);
2796 SDValue JTIHi
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOHiFlag
);
2797 SDValue JTILo
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOLoFlag
);
2798 return LowerLabelRef(JTIHi
, JTILo
, IsPIC
, DAG
);
2801 SDValue
PPCTargetLowering::LowerBlockAddress(SDValue Op
,
2802 SelectionDAG
&DAG
) const {
2803 EVT PtrVT
= Op
.getValueType();
2804 BlockAddressSDNode
*BASDN
= cast
<BlockAddressSDNode
>(Op
);
2805 const BlockAddress
*BA
= BASDN
->getBlockAddress();
2807 // 64-bit SVR4 ABI code is always position-independent.
2808 // The actual BlockAddress is stored in the TOC.
2809 if (Subtarget
.is64BitELFABI()) {
2810 setUsesTOCBasePtr(DAG
);
2811 SDValue GA
= DAG
.getTargetBlockAddress(BA
, PtrVT
, BASDN
->getOffset());
2812 return getTOCEntry(DAG
, SDLoc(BASDN
), GA
);
2815 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2816 if (Subtarget
.is32BitELFABI() && isPositionIndependent())
2819 DAG
.getTargetBlockAddress(BA
, PtrVT
, BASDN
->getOffset()));
2821 unsigned MOHiFlag
, MOLoFlag
;
2822 bool IsPIC
= isPositionIndependent();
2823 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2824 SDValue TgtBAHi
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOHiFlag
);
2825 SDValue TgtBALo
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOLoFlag
);
2826 return LowerLabelRef(TgtBAHi
, TgtBALo
, IsPIC
, DAG
);
2829 SDValue
PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
2830 SelectionDAG
&DAG
) const {
2831 // FIXME: TLS addresses currently use medium model code sequences,
2832 // which is the most useful form. Eventually support for small and
2833 // large models could be added if users need it, at the cost of
2834 // additional complexity.
2835 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
2836 if (DAG
.getTarget().useEmulatedTLS())
2837 return LowerToTLSEmulatedModel(GA
, DAG
);
2840 const GlobalValue
*GV
= GA
->getGlobal();
2841 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2842 bool is64bit
= Subtarget
.isPPC64();
2843 const Module
*M
= DAG
.getMachineFunction().getFunction().getParent();
2844 PICLevel::Level picLevel
= M
->getPICLevel();
2846 const TargetMachine
&TM
= getTargetMachine();
2847 TLSModel::Model Model
= TM
.getTLSModel(GV
);
2849 if (Model
== TLSModel::LocalExec
) {
2850 SDValue TGAHi
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2851 PPCII::MO_TPREL_HA
);
2852 SDValue TGALo
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2853 PPCII::MO_TPREL_LO
);
2854 SDValue TLSReg
= is64bit
? DAG
.getRegister(PPC::X13
, MVT::i64
)
2855 : DAG
.getRegister(PPC::R2
, MVT::i32
);
2857 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, dl
, PtrVT
, TGAHi
, TLSReg
);
2858 return DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, TGALo
, Hi
);
2861 if (Model
== TLSModel::InitialExec
) {
2862 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2863 SDValue TGATLS
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2867 setUsesTOCBasePtr(DAG
);
2868 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2869 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_GOT_TPREL_HA
, dl
,
2870 PtrVT
, GOTReg
, TGA
);
2872 if (!TM
.isPositionIndependent())
2873 GOTPtr
= DAG
.getNode(PPCISD::PPC32_GOT
, dl
, PtrVT
);
2874 else if (picLevel
== PICLevel::SmallPIC
)
2875 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2877 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2879 SDValue TPOffset
= DAG
.getNode(PPCISD::LD_GOT_TPREL_L
, dl
,
2880 PtrVT
, TGA
, GOTPtr
);
2881 return DAG
.getNode(PPCISD::ADD_TLS
, dl
, PtrVT
, TPOffset
, TGATLS
);
2884 if (Model
== TLSModel::GeneralDynamic
) {
2885 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2888 setUsesTOCBasePtr(DAG
);
2889 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2890 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSGD_HA
, dl
, PtrVT
,
2893 if (picLevel
== PICLevel::SmallPIC
)
2894 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2896 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2898 return DAG
.getNode(PPCISD::ADDI_TLSGD_L_ADDR
, dl
, PtrVT
,
2902 if (Model
== TLSModel::LocalDynamic
) {
2903 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2906 setUsesTOCBasePtr(DAG
);
2907 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2908 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSLD_HA
, dl
, PtrVT
,
2911 if (picLevel
== PICLevel::SmallPIC
)
2912 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2914 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2916 SDValue TLSAddr
= DAG
.getNode(PPCISD::ADDI_TLSLD_L_ADDR
, dl
,
2917 PtrVT
, GOTPtr
, TGA
, TGA
);
2918 SDValue DtvOffsetHi
= DAG
.getNode(PPCISD::ADDIS_DTPREL_HA
, dl
,
2919 PtrVT
, TLSAddr
, TGA
);
2920 return DAG
.getNode(PPCISD::ADDI_DTPREL_L
, dl
, PtrVT
, DtvOffsetHi
, TGA
);
2923 llvm_unreachable("Unknown TLS model!");
2926 SDValue
PPCTargetLowering::LowerGlobalAddress(SDValue Op
,
2927 SelectionDAG
&DAG
) const {
2928 EVT PtrVT
= Op
.getValueType();
2929 GlobalAddressSDNode
*GSDN
= cast
<GlobalAddressSDNode
>(Op
);
2931 const GlobalValue
*GV
= GSDN
->getGlobal();
2933 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
2934 // The actual address of the GlobalValue is stored in the TOC.
2935 if (Subtarget
.is64BitELFABI() || Subtarget
.isAIXABI()) {
2936 setUsesTOCBasePtr(DAG
);
2937 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset());
2938 return getTOCEntry(DAG
, DL
, GA
);
2941 unsigned MOHiFlag
, MOLoFlag
;
2942 bool IsPIC
= isPositionIndependent();
2943 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
, GV
);
2945 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2946 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
,
2948 PPCII::MO_PIC_FLAG
);
2949 return getTOCEntry(DAG
, DL
, GA
);
2953 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOHiFlag
);
2955 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOLoFlag
);
2957 SDValue Ptr
= LowerLabelRef(GAHi
, GALo
, IsPIC
, DAG
);
2959 // If the global reference is actually to a non-lazy-pointer, we have to do an
2960 // extra load to get the address of the global.
2961 if (MOHiFlag
& PPCII::MO_NLP_FLAG
)
2962 Ptr
= DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
2966 SDValue
PPCTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const {
2967 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
2970 if (Op
.getValueType() == MVT::v2i64
) {
2971 // When the operands themselves are v2i64 values, we need to do something
2972 // special because VSX has no underlying comparison operations for these.
2973 if (Op
.getOperand(0).getValueType() == MVT::v2i64
) {
2974 // Equality can be handled by casting to the legal type for Altivec
2975 // comparisons, everything else needs to be expanded.
2976 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
2977 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
,
2978 DAG
.getSetCC(dl
, MVT::v4i32
,
2979 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, Op
.getOperand(0)),
2980 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, Op
.getOperand(1)),
2987 // We handle most of these in the usual way.
2991 // If we're comparing for equality to zero, expose the fact that this is
2992 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2993 // fold the new nodes.
2994 if (SDValue V
= lowerCmpEqZeroToCtlzSrl(Op
, DAG
))
2997 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
2998 // Leave comparisons against 0 and -1 alone for now, since they're usually
2999 // optimized. FIXME: revisit this when we can custom lower all setcc
3001 if (C
->isAllOnesValue() || C
->isNullValue())
3005 // If we have an integer seteq/setne, turn it into a compare against zero
3006 // by xor'ing the rhs with the lhs, which is faster than setting a
3007 // condition register, reading it back out, and masking the correct bit. The
3008 // normal approach here uses sub to do this instead of xor. Using xor exposes
3009 // the result to other bit-twiddling opportunities.
3010 EVT LHSVT
= Op
.getOperand(0).getValueType();
3011 if (LHSVT
.isInteger() && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
)) {
3012 EVT VT
= Op
.getValueType();
3013 SDValue Sub
= DAG
.getNode(ISD::XOR
, dl
, LHSVT
, Op
.getOperand(0),
3015 return DAG
.getSetCC(dl
, VT
, Sub
, DAG
.getConstant(0, dl
, LHSVT
), CC
);
3020 SDValue
PPCTargetLowering::LowerVAARG(SDValue Op
, SelectionDAG
&DAG
) const {
3021 SDNode
*Node
= Op
.getNode();
3022 EVT VT
= Node
->getValueType(0);
3023 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3024 SDValue InChain
= Node
->getOperand(0);
3025 SDValue VAListPtr
= Node
->getOperand(1);
3026 const Value
*SV
= cast
<SrcValueSDNode
>(Node
->getOperand(2))->getValue();
3029 assert(!Subtarget
.isPPC64() && "LowerVAARG is PPC32 only");
3032 SDValue GprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
3033 VAListPtr
, MachinePointerInfo(SV
), MVT::i8
);
3034 InChain
= GprIndex
.getValue(1);
3036 if (VT
== MVT::i64
) {
3037 // Check if GprIndex is even
3038 SDValue GprAnd
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, GprIndex
,
3039 DAG
.getConstant(1, dl
, MVT::i32
));
3040 SDValue CC64
= DAG
.getSetCC(dl
, MVT::i32
, GprAnd
,
3041 DAG
.getConstant(0, dl
, MVT::i32
), ISD::SETNE
);
3042 SDValue GprIndexPlusOne
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, GprIndex
,
3043 DAG
.getConstant(1, dl
, MVT::i32
));
3044 // Align GprIndex to be even if it isn't
3045 GprIndex
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC64
, GprIndexPlusOne
,
3049 // fpr index is 1 byte after gpr
3050 SDValue FprPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3051 DAG
.getConstant(1, dl
, MVT::i32
));
3054 SDValue FprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
3055 FprPtr
, MachinePointerInfo(SV
), MVT::i8
);
3056 InChain
= FprIndex
.getValue(1);
3058 SDValue RegSaveAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3059 DAG
.getConstant(8, dl
, MVT::i32
));
3061 SDValue OverflowAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3062 DAG
.getConstant(4, dl
, MVT::i32
));
3065 SDValue OverflowArea
=
3066 DAG
.getLoad(MVT::i32
, dl
, InChain
, OverflowAreaPtr
, MachinePointerInfo());
3067 InChain
= OverflowArea
.getValue(1);
3069 SDValue RegSaveArea
=
3070 DAG
.getLoad(MVT::i32
, dl
, InChain
, RegSaveAreaPtr
, MachinePointerInfo());
3071 InChain
= RegSaveArea
.getValue(1);
3073 // select overflow_area if index > 8
3074 SDValue CC
= DAG
.getSetCC(dl
, MVT::i32
, VT
.isInteger() ? GprIndex
: FprIndex
,
3075 DAG
.getConstant(8, dl
, MVT::i32
), ISD::SETLT
);
3077 // adjustment constant gpr_index * 4/8
3078 SDValue RegConstant
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
,
3079 VT
.isInteger() ? GprIndex
: FprIndex
,
3080 DAG
.getConstant(VT
.isInteger() ? 4 : 8, dl
,
3083 // OurReg = RegSaveArea + RegConstant
3084 SDValue OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, RegSaveArea
,
3087 // Floating types are 32 bytes into RegSaveArea
3088 if (VT
.isFloatingPoint())
3089 OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OurReg
,
3090 DAG
.getConstant(32, dl
, MVT::i32
));
3092 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3093 SDValue IndexPlus1
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
,
3094 VT
.isInteger() ? GprIndex
: FprIndex
,
3095 DAG
.getConstant(VT
== MVT::i64
? 2 : 1, dl
,
3098 InChain
= DAG
.getTruncStore(InChain
, dl
, IndexPlus1
,
3099 VT
.isInteger() ? VAListPtr
: FprPtr
,
3100 MachinePointerInfo(SV
), MVT::i8
);
3102 // determine if we should load from reg_save_area or overflow_area
3103 SDValue Result
= DAG
.getNode(ISD::SELECT
, dl
, PtrVT
, CC
, OurReg
, OverflowArea
);
3105 // increase overflow_area by 4/8 if gpr/fpr > 8
3106 SDValue OverflowAreaPlusN
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OverflowArea
,
3107 DAG
.getConstant(VT
.isInteger() ? 4 : 8,
3110 OverflowArea
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC
, OverflowArea
,
3113 InChain
= DAG
.getTruncStore(InChain
, dl
, OverflowArea
, OverflowAreaPtr
,
3114 MachinePointerInfo(), MVT::i32
);
3116 return DAG
.getLoad(VT
, dl
, InChain
, Result
, MachinePointerInfo());
3119 SDValue
PPCTargetLowering::LowerVACOPY(SDValue Op
, SelectionDAG
&DAG
) const {
3120 assert(!Subtarget
.isPPC64() && "LowerVACOPY is PPC32 only");
3122 // We have to copy the entire va_list struct:
3123 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3124 return DAG
.getMemcpy(Op
.getOperand(0), Op
,
3125 Op
.getOperand(1), Op
.getOperand(2),
3126 DAG
.getConstant(12, SDLoc(Op
), MVT::i32
), 8, false, true,
3127 false, MachinePointerInfo(), MachinePointerInfo());
3130 SDValue
PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op
,
3131 SelectionDAG
&DAG
) const {
3132 return Op
.getOperand(0);
3135 SDValue
PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op
,
3136 SelectionDAG
&DAG
) const {
3137 SDValue Chain
= Op
.getOperand(0);
3138 SDValue Trmp
= Op
.getOperand(1); // trampoline
3139 SDValue FPtr
= Op
.getOperand(2); // nested function
3140 SDValue Nest
= Op
.getOperand(3); // 'nest' parameter value
3143 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3144 bool isPPC64
= (PtrVT
== MVT::i64
);
3145 Type
*IntPtrTy
= DAG
.getDataLayout().getIntPtrType(*DAG
.getContext());
3147 TargetLowering::ArgListTy Args
;
3148 TargetLowering::ArgListEntry Entry
;
3150 Entry
.Ty
= IntPtrTy
;
3151 Entry
.Node
= Trmp
; Args
.push_back(Entry
);
3153 // TrampSize == (isPPC64 ? 48 : 40);
3154 Entry
.Node
= DAG
.getConstant(isPPC64
? 48 : 40, dl
,
3155 isPPC64
? MVT::i64
: MVT::i32
);
3156 Args
.push_back(Entry
);
3158 Entry
.Node
= FPtr
; Args
.push_back(Entry
);
3159 Entry
.Node
= Nest
; Args
.push_back(Entry
);
3161 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3162 TargetLowering::CallLoweringInfo
CLI(DAG
);
3163 CLI
.setDebugLoc(dl
).setChain(Chain
).setLibCallee(
3164 CallingConv::C
, Type::getVoidTy(*DAG
.getContext()),
3165 DAG
.getExternalSymbol("__trampoline_setup", PtrVT
), std::move(Args
));
3167 std::pair
<SDValue
, SDValue
> CallResult
= LowerCallTo(CLI
);
3168 return CallResult
.second
;
3171 SDValue
PPCTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
3172 MachineFunction
&MF
= DAG
.getMachineFunction();
3173 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3174 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3178 if (Subtarget
.isDarwinABI() || Subtarget
.isPPC64()) {
3179 // vastart just stores the address of the VarArgsFrameIndex slot into the
3180 // memory location argument.
3181 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
3182 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3183 return DAG
.getStore(Op
.getOperand(0), dl
, FR
, Op
.getOperand(1),
3184 MachinePointerInfo(SV
));
3187 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3188 // We suppose the given va_list is already allocated.
3191 // char gpr; /* index into the array of 8 GPRs
3192 // * stored in the register save area
3193 // * gpr=0 corresponds to r3,
3194 // * gpr=1 to r4, etc.
3196 // char fpr; /* index into the array of 8 FPRs
3197 // * stored in the register save area
3198 // * fpr=0 corresponds to f1,
3199 // * fpr=1 to f2, etc.
3201 // char *overflow_arg_area;
3202 // /* location on stack that holds
3203 // * the next overflow argument
3205 // char *reg_save_area;
3206 // /* where r3:r10 and f1:f8 (if saved)
3211 SDValue ArgGPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumGPR(), dl
, MVT::i32
);
3212 SDValue ArgFPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumFPR(), dl
, MVT::i32
);
3213 SDValue StackOffsetFI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsStackOffset(),
3215 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
3218 uint64_t FrameOffset
= PtrVT
.getSizeInBits()/8;
3219 SDValue ConstFrameOffset
= DAG
.getConstant(FrameOffset
, dl
, PtrVT
);
3221 uint64_t StackOffset
= PtrVT
.getSizeInBits()/8 - 1;
3222 SDValue ConstStackOffset
= DAG
.getConstant(StackOffset
, dl
, PtrVT
);
3224 uint64_t FPROffset
= 1;
3225 SDValue ConstFPROffset
= DAG
.getConstant(FPROffset
, dl
, PtrVT
);
3227 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3229 // Store first byte : number of int regs
3230 SDValue firstStore
=
3231 DAG
.getTruncStore(Op
.getOperand(0), dl
, ArgGPR
, Op
.getOperand(1),
3232 MachinePointerInfo(SV
), MVT::i8
);
3233 uint64_t nextOffset
= FPROffset
;
3234 SDValue nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Op
.getOperand(1),
3237 // Store second byte : number of float regs
3238 SDValue secondStore
=
3239 DAG
.getTruncStore(firstStore
, dl
, ArgFPR
, nextPtr
,
3240 MachinePointerInfo(SV
, nextOffset
), MVT::i8
);
3241 nextOffset
+= StackOffset
;
3242 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstStackOffset
);
3244 // Store second word : arguments given on stack
3245 SDValue thirdStore
= DAG
.getStore(secondStore
, dl
, StackOffsetFI
, nextPtr
,
3246 MachinePointerInfo(SV
, nextOffset
));
3247 nextOffset
+= FrameOffset
;
3248 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstFrameOffset
);
3250 // Store third word : arguments given in registers
3251 return DAG
.getStore(thirdStore
, dl
, FR
, nextPtr
,
3252 MachinePointerInfo(SV
, nextOffset
));
3255 /// FPR - The set of FP registers that should be allocated for arguments
3256 /// on Darwin and AIX.
3257 static const MCPhysReg FPR
[] = {PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
,
3258 PPC::F6
, PPC::F7
, PPC::F8
, PPC::F9
, PPC::F10
,
3259 PPC::F11
, PPC::F12
, PPC::F13
};
3261 /// QFPR - The set of QPX registers that should be allocated for arguments.
3262 static const MCPhysReg QFPR
[] = {
3263 PPC::QF1
, PPC::QF2
, PPC::QF3
, PPC::QF4
, PPC::QF5
, PPC::QF6
, PPC::QF7
,
3264 PPC::QF8
, PPC::QF9
, PPC::QF10
, PPC::QF11
, PPC::QF12
, PPC::QF13
};
3266 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3268 static unsigned CalculateStackSlotSize(EVT ArgVT
, ISD::ArgFlagsTy Flags
,
3269 unsigned PtrByteSize
) {
3270 unsigned ArgSize
= ArgVT
.getStoreSize();
3271 if (Flags
.isByVal())
3272 ArgSize
= Flags
.getByValSize();
3274 // Round up to multiples of the pointer size, except for array members,
3275 // which are always packed.
3276 if (!Flags
.isInConsecutiveRegs())
3277 ArgSize
= ((ArgSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3282 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3284 static unsigned CalculateStackSlotAlignment(EVT ArgVT
, EVT OrigVT
,
3285 ISD::ArgFlagsTy Flags
,
3286 unsigned PtrByteSize
) {
3287 unsigned Align
= PtrByteSize
;
3289 // Altivec parameters are padded to a 16 byte boundary.
3290 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
3291 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
3292 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
3293 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
3295 // QPX vector types stored in double-precision are padded to a 32 byte
3297 else if (ArgVT
== MVT::v4f64
|| ArgVT
== MVT::v4i1
)
3300 // ByVal parameters are aligned as requested.
3301 if (Flags
.isByVal()) {
3302 unsigned BVAlign
= Flags
.getByValAlign();
3303 if (BVAlign
> PtrByteSize
) {
3304 if (BVAlign
% PtrByteSize
!= 0)
3306 "ByVal alignment is not a multiple of the pointer size");
3312 // Array members are always packed to their original alignment.
3313 if (Flags
.isInConsecutiveRegs()) {
3314 // If the array member was split into multiple registers, the first
3315 // needs to be aligned to the size of the full type. (Except for
3316 // ppcf128, which is only aligned as its f64 components.)
3317 if (Flags
.isSplit() && OrigVT
!= MVT::ppcf128
)
3318 Align
= OrigVT
.getStoreSize();
3320 Align
= ArgVT
.getStoreSize();
3326 /// CalculateStackSlotUsed - Return whether this argument will use its
3327 /// stack slot (instead of being passed in registers). ArgOffset,
3328 /// AvailableFPRs, and AvailableVRs must hold the current argument
3329 /// position, and will be updated to account for this argument.
3330 static bool CalculateStackSlotUsed(EVT ArgVT
, EVT OrigVT
,
3331 ISD::ArgFlagsTy Flags
,
3332 unsigned PtrByteSize
,
3333 unsigned LinkageSize
,
3334 unsigned ParamAreaSize
,
3335 unsigned &ArgOffset
,
3336 unsigned &AvailableFPRs
,
3337 unsigned &AvailableVRs
, bool HasQPX
) {
3338 bool UseMemory
= false;
3340 // Respect alignment of argument on the stack.
3342 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
3343 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
3344 // If there's no space left in the argument save area, we must
3345 // use memory (this check also catches zero-sized arguments).
3346 if (ArgOffset
>= LinkageSize
+ ParamAreaSize
)
3349 // Allocate argument on the stack.
3350 ArgOffset
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
3351 if (Flags
.isInConsecutiveRegsLast())
3352 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3353 // If we overran the argument save area, we must use memory
3354 // (this check catches arguments passed partially in memory)
3355 if (ArgOffset
> LinkageSize
+ ParamAreaSize
)
3358 // However, if the argument is actually passed in an FPR or a VR,
3359 // we don't use memory after all.
3360 if (!Flags
.isByVal()) {
3361 if (ArgVT
== MVT::f32
|| ArgVT
== MVT::f64
||
3362 // QPX registers overlap with the scalar FP registers.
3363 (HasQPX
&& (ArgVT
== MVT::v4f32
||
3364 ArgVT
== MVT::v4f64
||
3365 ArgVT
== MVT::v4i1
)))
3366 if (AvailableFPRs
> 0) {
3370 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
3371 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
3372 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
3373 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
3374 if (AvailableVRs
> 0) {
3383 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3384 /// ensure minimum alignment required for target.
3385 static unsigned EnsureStackAlignment(const PPCFrameLowering
*Lowering
,
3386 unsigned NumBytes
) {
3387 unsigned TargetAlign
= Lowering
->getStackAlignment();
3388 unsigned AlignMask
= TargetAlign
- 1;
3389 NumBytes
= (NumBytes
+ AlignMask
) & ~AlignMask
;
3393 SDValue
PPCTargetLowering::LowerFormalArguments(
3394 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3395 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3396 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3397 if (Subtarget
.is64BitELFABI())
3398 return LowerFormalArguments_64SVR4(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
3400 else if (Subtarget
.is32BitELFABI())
3401 return LowerFormalArguments_32SVR4(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
3404 // FIXME: We are using this for both AIX and Darwin. We should add appropriate
3405 // AIX testing, and rename it appropriately.
3406 return LowerFormalArguments_Darwin(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
3410 SDValue
PPCTargetLowering::LowerFormalArguments_32SVR4(
3411 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3412 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3413 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3415 // 32-bit SVR4 ABI Stack Frame Layout:
3416 // +-----------------------------------+
3417 // +--> | Back chain |
3418 // | +-----------------------------------+
3419 // | | Floating-point register save area |
3420 // | +-----------------------------------+
3421 // | | General register save area |
3422 // | +-----------------------------------+
3423 // | | CR save word |
3424 // | +-----------------------------------+
3425 // | | VRSAVE save word |
3426 // | +-----------------------------------+
3427 // | | Alignment padding |
3428 // | +-----------------------------------+
3429 // | | Vector register save area |
3430 // | +-----------------------------------+
3431 // | | Local variable space |
3432 // | +-----------------------------------+
3433 // | | Parameter list area |
3434 // | +-----------------------------------+
3435 // | | LR save word |
3436 // | +-----------------------------------+
3437 // SP--> +--- | Back chain |
3438 // +-----------------------------------+
3441 // System V Application Binary Interface PowerPC Processor Supplement
3442 // AltiVec Technology Programming Interface Manual
3444 MachineFunction
&MF
= DAG
.getMachineFunction();
3445 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3446 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3448 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3449 // Potential tail calls could cause overwriting of argument stack slots.
3450 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
3451 (CallConv
== CallingConv::Fast
));
3452 unsigned PtrByteSize
= 4;
3454 // Assign locations to all of the incoming arguments.
3455 SmallVector
<CCValAssign
, 16> ArgLocs
;
3456 PPCCCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), ArgLocs
,
3459 // Reserve space for the linkage area on the stack.
3460 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
3461 CCInfo
.AllocateStack(LinkageSize
, PtrByteSize
);
3463 CCInfo
.PreAnalyzeFormalArguments(Ins
);
3465 CCInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4
);
3466 CCInfo
.clearWasPPCF128();
3468 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
3469 CCValAssign
&VA
= ArgLocs
[i
];
3471 // Arguments stored in registers.
3472 if (VA
.isRegLoc()) {
3473 const TargetRegisterClass
*RC
;
3474 EVT ValVT
= VA
.getValVT();
3476 switch (ValVT
.getSimpleVT().SimpleTy
) {
3478 llvm_unreachable("ValVT not supported by formal arguments Lowering");
3481 RC
= &PPC::GPRCRegClass
;
3484 if (Subtarget
.hasP8Vector())
3485 RC
= &PPC::VSSRCRegClass
;
3486 else if (Subtarget
.hasSPE())
3487 RC
= &PPC::GPRCRegClass
;
3489 RC
= &PPC::F4RCRegClass
;
3492 if (Subtarget
.hasVSX())
3493 RC
= &PPC::VSFRCRegClass
;
3494 else if (Subtarget
.hasSPE())
3495 // SPE passes doubles in GPR pairs.
3496 RC
= &PPC::GPRCRegClass
;
3498 RC
= &PPC::F8RCRegClass
;
3503 RC
= &PPC::VRRCRegClass
;
3506 RC
= Subtarget
.hasQPX() ? &PPC::QSRCRegClass
: &PPC::VRRCRegClass
;
3510 RC
= &PPC::VRRCRegClass
;
3513 RC
= &PPC::QFRCRegClass
;
3516 RC
= &PPC::QBRCRegClass
;
3521 // Transform the arguments stored in physical registers into
3523 if (VA
.getLocVT() == MVT::f64
&& Subtarget
.hasSPE()) {
3524 assert(i
+ 1 < e
&& "No second half of double precision argument");
3525 unsigned RegLo
= MF
.addLiveIn(VA
.getLocReg(), RC
);
3526 unsigned RegHi
= MF
.addLiveIn(ArgLocs
[++i
].getLocReg(), RC
);
3527 SDValue ArgValueLo
= DAG
.getCopyFromReg(Chain
, dl
, RegLo
, MVT::i32
);
3528 SDValue ArgValueHi
= DAG
.getCopyFromReg(Chain
, dl
, RegHi
, MVT::i32
);
3529 if (!Subtarget
.isLittleEndian())
3530 std::swap (ArgValueLo
, ArgValueHi
);
3531 ArgValue
= DAG
.getNode(PPCISD::BUILD_SPE64
, dl
, MVT::f64
, ArgValueLo
,
3534 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
3535 ArgValue
= DAG
.getCopyFromReg(Chain
, dl
, Reg
,
3536 ValVT
== MVT::i1
? MVT::i32
: ValVT
);
3537 if (ValVT
== MVT::i1
)
3538 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, ArgValue
);
3541 InVals
.push_back(ArgValue
);
3543 // Argument stored in memory.
3544 assert(VA
.isMemLoc());
3546 // Get the extended size of the argument type in stack
3547 unsigned ArgSize
= VA
.getLocVT().getStoreSize();
3548 // Get the actual size of the argument type
3549 unsigned ObjSize
= VA
.getValVT().getStoreSize();
3550 unsigned ArgOffset
= VA
.getLocMemOffset();
3551 // Stack objects in PPC32 are right justified.
3552 ArgOffset
+= ArgSize
- ObjSize
;
3553 int FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, isImmutable
);
3555 // Create load nodes to retrieve arguments from the stack.
3556 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3558 DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
, MachinePointerInfo()));
3562 // Assign locations to all of the incoming aggregate by value arguments.
3563 // Aggregates passed by value are stored in the local variable space of the
3564 // caller's stack frame, right above the parameter list area.
3565 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
3566 CCState
CCByValInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
3567 ByValArgLocs
, *DAG
.getContext());
3569 // Reserve stack space for the allocations in CCInfo.
3570 CCByValInfo
.AllocateStack(CCInfo
.getNextStackOffset(), PtrByteSize
);
3572 CCByValInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4_ByVal
);
3574 // Area that is at least reserved in the caller of this function.
3575 unsigned MinReservedArea
= CCByValInfo
.getNextStackOffset();
3576 MinReservedArea
= std::max(MinReservedArea
, LinkageSize
);
3578 // Set the size that is at least reserved in caller of this function. Tail
3579 // call optimized function's reserved stack space needs to be aligned so that
3580 // taking the difference between two stack areas will result in an aligned
3583 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
3584 FuncInfo
->setMinReservedArea(MinReservedArea
);
3586 SmallVector
<SDValue
, 8> MemOps
;
3588 // If the function takes variable number of arguments, make a frame index for
3589 // the start of the first vararg value... for expansion of llvm.va_start.
3591 static const MCPhysReg GPArgRegs
[] = {
3592 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
3593 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
3595 const unsigned NumGPArgRegs
= array_lengthof(GPArgRegs
);
3597 static const MCPhysReg FPArgRegs
[] = {
3598 PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
, PPC::F6
, PPC::F7
,
3601 unsigned NumFPArgRegs
= array_lengthof(FPArgRegs
);
3603 if (useSoftFloat() || hasSPE())
3606 FuncInfo
->setVarArgsNumGPR(CCInfo
.getFirstUnallocated(GPArgRegs
));
3607 FuncInfo
->setVarArgsNumFPR(CCInfo
.getFirstUnallocated(FPArgRegs
));
3609 // Make room for NumGPArgRegs and NumFPArgRegs.
3610 int Depth
= NumGPArgRegs
* PtrVT
.getSizeInBits()/8 +
3611 NumFPArgRegs
* MVT(MVT::f64
).getSizeInBits()/8;
3613 FuncInfo
->setVarArgsStackOffset(
3614 MFI
.CreateFixedObject(PtrVT
.getSizeInBits()/8,
3615 CCInfo
.getNextStackOffset(), true));
3617 FuncInfo
->setVarArgsFrameIndex(MFI
.CreateStackObject(Depth
, 8, false));
3618 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
3620 // The fixed integer arguments of a variadic function are stored to the
3621 // VarArgsFrameIndex on the stack so that they may be loaded by
3622 // dereferencing the result of va_next.
3623 for (unsigned GPRIndex
= 0; GPRIndex
!= NumGPArgRegs
; ++GPRIndex
) {
3624 // Get an existing live-in vreg, or add a new one.
3625 unsigned VReg
= MF
.getRegInfo().getLiveInVirtReg(GPArgRegs
[GPRIndex
]);
3627 VReg
= MF
.addLiveIn(GPArgRegs
[GPRIndex
], &PPC::GPRCRegClass
);
3629 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3631 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
3632 MemOps
.push_back(Store
);
3633 // Increment the address by four for the next argument to store
3634 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, dl
, PtrVT
);
3635 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
3638 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3640 // The double arguments are stored to the VarArgsFrameIndex
3642 for (unsigned FPRIndex
= 0; FPRIndex
!= NumFPArgRegs
; ++FPRIndex
) {
3643 // Get an existing live-in vreg, or add a new one.
3644 unsigned VReg
= MF
.getRegInfo().getLiveInVirtReg(FPArgRegs
[FPRIndex
]);
3646 VReg
= MF
.addLiveIn(FPArgRegs
[FPRIndex
], &PPC::F8RCRegClass
);
3648 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::f64
);
3650 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
3651 MemOps
.push_back(Store
);
3652 // Increment the address by eight for the next argument to store
3653 SDValue PtrOff
= DAG
.getConstant(MVT(MVT::f64
).getSizeInBits()/8, dl
,
3655 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
3659 if (!MemOps
.empty())
3660 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
3665 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3666 // value to MVT::i64 and then truncate to the correct register size.
3667 SDValue
PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags
,
3668 EVT ObjectVT
, SelectionDAG
&DAG
,
3670 const SDLoc
&dl
) const {
3672 ArgVal
= DAG
.getNode(ISD::AssertSext
, dl
, MVT::i64
, ArgVal
,
3673 DAG
.getValueType(ObjectVT
));
3674 else if (Flags
.isZExt())
3675 ArgVal
= DAG
.getNode(ISD::AssertZext
, dl
, MVT::i64
, ArgVal
,
3676 DAG
.getValueType(ObjectVT
));
3678 return DAG
.getNode(ISD::TRUNCATE
, dl
, ObjectVT
, ArgVal
);
3681 SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
3682 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3683 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3684 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3685 // TODO: add description of PPC stack frame format, or at least some docs.
3687 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
3688 bool isLittleEndian
= Subtarget
.isLittleEndian();
3689 MachineFunction
&MF
= DAG
.getMachineFunction();
3690 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3691 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3693 assert(!(CallConv
== CallingConv::Fast
&& isVarArg
) &&
3694 "fastcc not supported on varargs functions");
3696 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3697 // Potential tail calls could cause overwriting of argument stack slots.
3698 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
3699 (CallConv
== CallingConv::Fast
));
3700 unsigned PtrByteSize
= 8;
3701 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
3703 static const MCPhysReg GPR
[] = {
3704 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
3705 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
3707 static const MCPhysReg VR
[] = {
3708 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
3709 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
3712 const unsigned Num_GPR_Regs
= array_lengthof(GPR
);
3713 const unsigned Num_FPR_Regs
= useSoftFloat() ? 0 : 13;
3714 const unsigned Num_VR_Regs
= array_lengthof(VR
);
3715 const unsigned Num_QFPR_Regs
= Num_FPR_Regs
;
3717 // Do a first pass over the arguments to determine whether the ABI
3718 // guarantees that our caller has allocated the parameter save area
3719 // on its stack frame. In the ELFv1 ABI, this is always the case;
3720 // in the ELFv2 ABI, it is true if this is a vararg function or if
3721 // any parameter is located in a stack slot.
3723 bool HasParameterArea
= !isELFv2ABI
|| isVarArg
;
3724 unsigned ParamAreaSize
= Num_GPR_Regs
* PtrByteSize
;
3725 unsigned NumBytes
= LinkageSize
;
3726 unsigned AvailableFPRs
= Num_FPR_Regs
;
3727 unsigned AvailableVRs
= Num_VR_Regs
;
3728 for (unsigned i
= 0, e
= Ins
.size(); i
!= e
; ++i
) {
3729 if (Ins
[i
].Flags
.isNest())
3732 if (CalculateStackSlotUsed(Ins
[i
].VT
, Ins
[i
].ArgVT
, Ins
[i
].Flags
,
3733 PtrByteSize
, LinkageSize
, ParamAreaSize
,
3734 NumBytes
, AvailableFPRs
, AvailableVRs
,
3735 Subtarget
.hasQPX()))
3736 HasParameterArea
= true;
3739 // Add DAG nodes to load the arguments or copy them out of registers. On
3740 // entry to a function on PPC, the arguments start after the linkage area,
3741 // although the first ones are often in registers.
3743 unsigned ArgOffset
= LinkageSize
;
3744 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
3745 unsigned &QFPR_idx
= FPR_idx
;
3746 SmallVector
<SDValue
, 8> MemOps
;
3747 Function::const_arg_iterator FuncArg
= MF
.getFunction().arg_begin();
3748 unsigned CurArgIdx
= 0;
3749 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
; ++ArgNo
) {
3751 bool needsLoad
= false;
3752 EVT ObjectVT
= Ins
[ArgNo
].VT
;
3753 EVT OrigVT
= Ins
[ArgNo
].ArgVT
;
3754 unsigned ObjSize
= ObjectVT
.getStoreSize();
3755 unsigned ArgSize
= ObjSize
;
3756 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
3757 if (Ins
[ArgNo
].isOrigArg()) {
3758 std::advance(FuncArg
, Ins
[ArgNo
].getOrigArgIndex() - CurArgIdx
);
3759 CurArgIdx
= Ins
[ArgNo
].getOrigArgIndex();
3761 // We re-align the argument offset for each argument, except when using the
3762 // fast calling convention, when we need to make sure we do that only when
3763 // we'll actually use a stack slot.
3764 unsigned CurArgOffset
, Align
;
3765 auto ComputeArgOffset
= [&]() {
3766 /* Respect alignment of argument on the stack. */
3767 Align
= CalculateStackSlotAlignment(ObjectVT
, OrigVT
, Flags
, PtrByteSize
);
3768 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
3769 CurArgOffset
= ArgOffset
;
3772 if (CallConv
!= CallingConv::Fast
) {
3775 /* Compute GPR index associated with argument offset. */
3776 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
3777 GPR_idx
= std::min(GPR_idx
, Num_GPR_Regs
);
3780 // FIXME the codegen can be much improved in some cases.
3781 // We do not have to keep everything in memory.
3782 if (Flags
.isByVal()) {
3783 assert(Ins
[ArgNo
].isOrigArg() && "Byval arguments cannot be implicit");
3785 if (CallConv
== CallingConv::Fast
)
3788 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3789 ObjSize
= Flags
.getByValSize();
3790 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3791 // Empty aggregate parameters do not take up registers. Examples:
3795 // etc. However, we have to provide a place-holder in InVals, so
3796 // pretend we have an 8-byte item at the current address for that
3799 int FI
= MFI
.CreateFixedObject(PtrByteSize
, ArgOffset
, true);
3800 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3801 InVals
.push_back(FIN
);
3805 // Create a stack object covering all stack doublewords occupied
3806 // by the argument. If the argument is (fully or partially) on
3807 // the stack, or if the argument is fully in registers but the
3808 // caller has allocated the parameter save anyway, we can refer
3809 // directly to the caller's stack frame. Otherwise, create a
3810 // local copy in our own frame.
3812 if (HasParameterArea
||
3813 ArgSize
+ ArgOffset
> LinkageSize
+ Num_GPR_Regs
* PtrByteSize
)
3814 FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, false, true);
3816 FI
= MFI
.CreateStackObject(ArgSize
, Align
, false);
3817 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3819 // Handle aggregates smaller than 8 bytes.
3820 if (ObjSize
< PtrByteSize
) {
3821 // The value of the object is its address, which differs from the
3822 // address of the enclosing doubleword on big-endian systems.
3824 if (!isLittleEndian
) {
3825 SDValue ArgOff
= DAG
.getConstant(PtrByteSize
- ObjSize
, dl
, PtrVT
);
3826 Arg
= DAG
.getNode(ISD::ADD
, dl
, ArgOff
.getValueType(), Arg
, ArgOff
);
3828 InVals
.push_back(Arg
);
3830 if (GPR_idx
!= Num_GPR_Regs
) {
3831 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3832 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3833 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3836 if (ObjSize
==1 || ObjSize
==2 || ObjSize
==4) {
3837 EVT ObjType
= (ObjSize
== 1 ? MVT::i8
:
3838 (ObjSize
== 2 ? MVT::i16
: MVT::i32
));
3839 Store
= DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, Arg
,
3840 MachinePointerInfo(&*FuncArg
), ObjType
);
3842 // For sizes that don't fit a truncating store (3, 5, 6, 7),
3843 // store the whole register as-is to the parameter save area
3845 Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
,
3846 MachinePointerInfo(&*FuncArg
));
3849 MemOps
.push_back(Store
);
3851 // Whether we copied from a register or not, advance the offset
3852 // into the parameter save area by a full doubleword.
3853 ArgOffset
+= PtrByteSize
;
3857 // The value of the object is its address, which is the address of
3858 // its first stack doubleword.
3859 InVals
.push_back(FIN
);
3861 // Store whatever pieces of the object are in registers to memory.
3862 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
3863 if (GPR_idx
== Num_GPR_Regs
)
3866 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
3867 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3868 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3871 SDValue Off
= DAG
.getConstant(j
, dl
, PtrVT
);
3872 Addr
= DAG
.getNode(ISD::ADD
, dl
, Off
.getValueType(), Addr
, Off
);
3874 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, Addr
,
3875 MachinePointerInfo(&*FuncArg
, j
));
3876 MemOps
.push_back(Store
);
3879 ArgOffset
+= ArgSize
;
3883 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
3884 default: llvm_unreachable("Unhandled argument type!");
3888 if (Flags
.isNest()) {
3889 // The 'nest' parameter, if any, is passed in R11.
3890 unsigned VReg
= MF
.addLiveIn(PPC::X11
, &PPC::G8RCRegClass
);
3891 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3893 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
3894 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
3899 // These can be scalar arguments or elements of an integer array type
3900 // passed directly. Clang may use those instead of "byval" aggregate
3901 // types to avoid forcing arguments to memory unnecessarily.
3902 if (GPR_idx
!= Num_GPR_Regs
) {
3903 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3904 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3905 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3907 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
3908 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3909 // value to MVT::i64 and then truncate to the correct register size.
3910 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
3912 if (CallConv
== CallingConv::Fast
)
3916 ArgSize
= PtrByteSize
;
3918 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
3924 // These can be scalar arguments or elements of a float array type
3925 // passed directly. The latter are used to implement ELFv2 homogenous
3926 // float aggregates.
3927 if (FPR_idx
!= Num_FPR_Regs
) {
3930 if (ObjectVT
== MVT::f32
)
3931 VReg
= MF
.addLiveIn(FPR
[FPR_idx
],
3932 Subtarget
.hasP8Vector()
3933 ? &PPC::VSSRCRegClass
3934 : &PPC::F4RCRegClass
);
3936 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], Subtarget
.hasVSX()
3937 ? &PPC::VSFRCRegClass
3938 : &PPC::F8RCRegClass
);
3940 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
3942 } else if (GPR_idx
!= Num_GPR_Regs
&& CallConv
!= CallingConv::Fast
) {
3943 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3944 // once we support fp <-> gpr moves.
3946 // This can only ever happen in the presence of f32 array types,
3947 // since otherwise we never run out of FPRs before running out
3949 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3950 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3951 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3953 if (ObjectVT
== MVT::f32
) {
3954 if ((ArgOffset
% PtrByteSize
) == (isLittleEndian
? 4 : 0))
3955 ArgVal
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, ArgVal
,
3956 DAG
.getConstant(32, dl
, MVT::i32
));
3957 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, ArgVal
);
3960 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, ObjectVT
, ArgVal
);
3962 if (CallConv
== CallingConv::Fast
)
3968 // When passing an array of floats, the array occupies consecutive
3969 // space in the argument area; only round up to the next doubleword
3970 // at the end of the array. Otherwise, each float takes 8 bytes.
3971 if (CallConv
!= CallingConv::Fast
|| needsLoad
) {
3972 ArgSize
= Flags
.isInConsecutiveRegs() ? ObjSize
: PtrByteSize
;
3973 ArgOffset
+= ArgSize
;
3974 if (Flags
.isInConsecutiveRegsLast())
3975 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3986 if (!Subtarget
.hasQPX()) {
3987 // These can be scalar arguments or elements of a vector array type
3988 // passed directly. The latter are used to implement ELFv2 homogenous
3989 // vector aggregates.
3990 if (VR_idx
!= Num_VR_Regs
) {
3991 unsigned VReg
= MF
.addLiveIn(VR
[VR_idx
], &PPC::VRRCRegClass
);
3992 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
3995 if (CallConv
== CallingConv::Fast
)
3999 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
4004 assert(ObjectVT
.getSimpleVT().SimpleTy
== MVT::v4f32
&&
4005 "Invalid QPX parameter type");
4010 // QPX vectors are treated like their scalar floating-point subregisters
4011 // (except that they're larger).
4012 unsigned Sz
= ObjectVT
.getSimpleVT().SimpleTy
== MVT::v4f32
? 16 : 32;
4013 if (QFPR_idx
!= Num_QFPR_Regs
) {
4014 const TargetRegisterClass
*RC
;
4015 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
4016 case MVT::v4f64
: RC
= &PPC::QFRCRegClass
; break;
4017 case MVT::v4f32
: RC
= &PPC::QSRCRegClass
; break;
4018 default: RC
= &PPC::QBRCRegClass
; break;
4021 unsigned VReg
= MF
.addLiveIn(QFPR
[QFPR_idx
], RC
);
4022 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4025 if (CallConv
== CallingConv::Fast
)
4029 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
4034 // We need to load the argument to a virtual register if we determined
4035 // above that we ran out of physical registers of the appropriate type.
4037 if (ObjSize
< ArgSize
&& !isLittleEndian
)
4038 CurArgOffset
+= ArgSize
- ObjSize
;
4039 int FI
= MFI
.CreateFixedObject(ObjSize
, CurArgOffset
, isImmutable
);
4040 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4041 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Chain
, FIN
, MachinePointerInfo());
4044 InVals
.push_back(ArgVal
);
4047 // Area that is at least reserved in the caller of this function.
4048 unsigned MinReservedArea
;
4049 if (HasParameterArea
)
4050 MinReservedArea
= std::max(ArgOffset
, LinkageSize
+ 8 * PtrByteSize
);
4052 MinReservedArea
= LinkageSize
;
4054 // Set the size that is at least reserved in caller of this function. Tail
4055 // call optimized functions' reserved stack space needs to be aligned so that
4056 // taking the difference between two stack areas will result in an aligned
4059 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
4060 FuncInfo
->setMinReservedArea(MinReservedArea
);
4062 // If the function takes variable number of arguments, make a frame index for
4063 // the start of the first vararg value... for expansion of llvm.va_start.
4065 int Depth
= ArgOffset
;
4067 FuncInfo
->setVarArgsFrameIndex(
4068 MFI
.CreateFixedObject(PtrByteSize
, Depth
, true));
4069 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4071 // If this function is vararg, store any remaining integer argument regs
4072 // to their spots on the stack so that they may be loaded by dereferencing
4073 // the result of va_next.
4074 for (GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
4075 GPR_idx
< Num_GPR_Regs
; ++GPR_idx
) {
4076 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4077 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4079 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4080 MemOps
.push_back(Store
);
4081 // Increment the address by four for the next argument to store
4082 SDValue PtrOff
= DAG
.getConstant(PtrByteSize
, dl
, PtrVT
);
4083 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4087 if (!MemOps
.empty())
4088 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
4093 SDValue
PPCTargetLowering::LowerFormalArguments_Darwin(
4094 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
4095 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
4096 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
4097 // TODO: add description of PPC stack frame format, or at least some docs.
4099 MachineFunction
&MF
= DAG
.getMachineFunction();
4100 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
4101 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
4103 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
4104 bool isPPC64
= PtrVT
== MVT::i64
;
4105 // Potential tail calls could cause overwriting of argument stack slots.
4106 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
4107 (CallConv
== CallingConv::Fast
));
4108 unsigned PtrByteSize
= isPPC64
? 8 : 4;
4109 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4110 unsigned ArgOffset
= LinkageSize
;
4111 // Area that is at least reserved in caller of this function.
4112 unsigned MinReservedArea
= ArgOffset
;
4114 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
4115 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
4116 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
4118 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
4119 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4120 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4122 static const MCPhysReg VR
[] = {
4123 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4124 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4127 const unsigned Num_GPR_Regs
= array_lengthof(GPR_32
);
4128 const unsigned Num_FPR_Regs
= useSoftFloat() ? 0 : 13;
4129 const unsigned Num_VR_Regs
= array_lengthof( VR
);
4131 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
4133 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
4135 // In 32-bit non-varargs functions, the stack space for vectors is after the
4136 // stack space for non-vectors. We do not use this space unless we have
4137 // too many vectors to fit in registers, something that only occurs in
4138 // constructed examples:), but we have to walk the arglist to figure
4139 // that out...for the pathological case, compute VecArgOffset as the
4140 // start of the vector parameter area. Computing VecArgOffset is the
4141 // entire point of the following loop.
4142 unsigned VecArgOffset
= ArgOffset
;
4143 if (!isVarArg
&& !isPPC64
) {
4144 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
;
4146 EVT ObjectVT
= Ins
[ArgNo
].VT
;
4147 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
4149 if (Flags
.isByVal()) {
4150 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4151 unsigned ObjSize
= Flags
.getByValSize();
4153 ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4154 VecArgOffset
+= ArgSize
;
4158 switch(ObjectVT
.getSimpleVT().SimpleTy
) {
4159 default: llvm_unreachable("Unhandled argument type!");
4165 case MVT::i64
: // PPC64
4167 // FIXME: We are guaranteed to be !isPPC64 at this point.
4168 // Does MVT::i64 apply?
4175 // Nothing to do, we're only looking at Nonvector args here.
4180 // We've found where the vector parameter area in memory is. Skip the
4181 // first 12 parameters; these don't use that memory.
4182 VecArgOffset
= ((VecArgOffset
+15)/16)*16;
4183 VecArgOffset
+= 12*16;
4185 // Add DAG nodes to load the arguments or copy them out of registers. On
4186 // entry to a function on PPC, the arguments start after the linkage area,
4187 // although the first ones are often in registers.
4189 SmallVector
<SDValue
, 8> MemOps
;
4190 unsigned nAltivecParamsAtEnd
= 0;
4191 Function::const_arg_iterator FuncArg
= MF
.getFunction().arg_begin();
4192 unsigned CurArgIdx
= 0;
4193 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
; ++ArgNo
) {
4195 bool needsLoad
= false;
4196 EVT ObjectVT
= Ins
[ArgNo
].VT
;
4197 unsigned ObjSize
= ObjectVT
.getSizeInBits()/8;
4198 unsigned ArgSize
= ObjSize
;
4199 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
4200 if (Ins
[ArgNo
].isOrigArg()) {
4201 std::advance(FuncArg
, Ins
[ArgNo
].getOrigArgIndex() - CurArgIdx
);
4202 CurArgIdx
= Ins
[ArgNo
].getOrigArgIndex();
4204 unsigned CurArgOffset
= ArgOffset
;
4206 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4207 if (ObjectVT
==MVT::v4f32
|| ObjectVT
==MVT::v4i32
||
4208 ObjectVT
==MVT::v8i16
|| ObjectVT
==MVT::v16i8
) {
4209 if (isVarArg
|| isPPC64
) {
4210 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
4211 MinReservedArea
+= CalculateStackSlotSize(ObjectVT
,
4214 } else nAltivecParamsAtEnd
++;
4216 // Calculate min reserved area.
4217 MinReservedArea
+= CalculateStackSlotSize(Ins
[ArgNo
].VT
,
4221 // FIXME the codegen can be much improved in some cases.
4222 // We do not have to keep everything in memory.
4223 if (Flags
.isByVal()) {
4224 assert(Ins
[ArgNo
].isOrigArg() && "Byval arguments cannot be implicit");
4226 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4227 ObjSize
= Flags
.getByValSize();
4228 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4229 // Objects of size 1 and 2 are right justified, everything else is
4230 // left justified. This means the memory address is adjusted forwards.
4231 if (ObjSize
==1 || ObjSize
==2) {
4232 CurArgOffset
= CurArgOffset
+ (4 - ObjSize
);
4234 // The value of the object is its address.
4235 int FI
= MFI
.CreateFixedObject(ObjSize
, CurArgOffset
, false, true);
4236 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4237 InVals
.push_back(FIN
);
4238 if (ObjSize
==1 || ObjSize
==2) {
4239 if (GPR_idx
!= Num_GPR_Regs
) {
4242 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4244 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4245 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4246 EVT ObjType
= ObjSize
== 1 ? MVT::i8
: MVT::i16
;
4248 DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, FIN
,
4249 MachinePointerInfo(&*FuncArg
), ObjType
);
4250 MemOps
.push_back(Store
);
4254 ArgOffset
+= PtrByteSize
;
4258 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
4259 // Store whatever pieces of the object are in registers
4260 // to memory. ArgOffset will be the address of the beginning
4262 if (GPR_idx
!= Num_GPR_Regs
) {
4265 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4267 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4268 int FI
= MFI
.CreateFixedObject(PtrByteSize
, ArgOffset
, true);
4269 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4270 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4271 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
,
4272 MachinePointerInfo(&*FuncArg
, j
));
4273 MemOps
.push_back(Store
);
4275 ArgOffset
+= PtrByteSize
;
4277 ArgOffset
+= ArgSize
- (ArgOffset
-CurArgOffset
);
4284 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
4285 default: llvm_unreachable("Unhandled argument type!");
4289 if (GPR_idx
!= Num_GPR_Regs
) {
4290 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4291 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i32
);
4293 if (ObjectVT
== MVT::i1
)
4294 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, ArgVal
);
4299 ArgSize
= PtrByteSize
;
4301 // All int arguments reserve stack space in the Darwin ABI.
4302 ArgOffset
+= PtrByteSize
;
4306 case MVT::i64
: // PPC64
4307 if (GPR_idx
!= Num_GPR_Regs
) {
4308 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4309 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
4311 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
4312 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4313 // value to MVT::i64 and then truncate to the correct register size.
4314 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
4319 ArgSize
= PtrByteSize
;
4321 // All int arguments reserve stack space in the Darwin ABI.
4327 // Every 4 bytes of argument space consumes one of the GPRs available for
4328 // argument passing.
4329 if (GPR_idx
!= Num_GPR_Regs
) {
4331 if (ObjSize
== 8 && GPR_idx
!= Num_GPR_Regs
&& !isPPC64
)
4334 if (FPR_idx
!= Num_FPR_Regs
) {
4337 if (ObjectVT
== MVT::f32
)
4338 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], &PPC::F4RCRegClass
);
4340 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], &PPC::F8RCRegClass
);
4342 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4348 // All FP arguments reserve stack space in the Darwin ABI.
4349 ArgOffset
+= isPPC64
? 8 : ObjSize
;
4355 // Note that vector arguments in registers don't reserve stack space,
4356 // except in varargs functions.
4357 if (VR_idx
!= Num_VR_Regs
) {
4358 unsigned VReg
= MF
.addLiveIn(VR
[VR_idx
], &PPC::VRRCRegClass
);
4359 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4361 while ((ArgOffset
% 16) != 0) {
4362 ArgOffset
+= PtrByteSize
;
4363 if (GPR_idx
!= Num_GPR_Regs
)
4367 GPR_idx
= std::min(GPR_idx
+4, Num_GPR_Regs
); // FIXME correct for ppc64?
4371 if (!isVarArg
&& !isPPC64
) {
4372 // Vectors go after all the nonvectors.
4373 CurArgOffset
= VecArgOffset
;
4376 // Vectors are aligned.
4377 ArgOffset
= ((ArgOffset
+15)/16)*16;
4378 CurArgOffset
= ArgOffset
;
4386 // We need to load the argument to a virtual register if we determined above
4387 // that we ran out of physical registers of the appropriate type.
4389 int FI
= MFI
.CreateFixedObject(ObjSize
,
4390 CurArgOffset
+ (ArgSize
- ObjSize
),
4392 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4393 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Chain
, FIN
, MachinePointerInfo());
4396 InVals
.push_back(ArgVal
);
4399 // Allow for Altivec parameters at the end, if needed.
4400 if (nAltivecParamsAtEnd
) {
4401 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
4402 MinReservedArea
+= 16*nAltivecParamsAtEnd
;
4405 // Area that is at least reserved in the caller of this function.
4406 MinReservedArea
= std::max(MinReservedArea
, LinkageSize
+ 8 * PtrByteSize
);
4408 // Set the size that is at least reserved in caller of this function. Tail
4409 // call optimized functions' reserved stack space needs to be aligned so that
4410 // taking the difference between two stack areas will result in an aligned
4413 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
4414 FuncInfo
->setMinReservedArea(MinReservedArea
);
4416 // If the function takes variable number of arguments, make a frame index for
4417 // the start of the first vararg value... for expansion of llvm.va_start.
4419 int Depth
= ArgOffset
;
4421 FuncInfo
->setVarArgsFrameIndex(
4422 MFI
.CreateFixedObject(PtrVT
.getSizeInBits()/8,
4424 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4426 // If this function is vararg, store any remaining integer argument regs
4427 // to their spots on the stack so that they may be loaded by dereferencing
4428 // the result of va_next.
4429 for (; GPR_idx
!= Num_GPR_Regs
; ++GPR_idx
) {
4433 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4435 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4437 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4439 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4440 MemOps
.push_back(Store
);
4441 // Increment the address by four for the next argument to store
4442 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, dl
, PtrVT
);
4443 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4447 if (!MemOps
.empty())
4448 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
4453 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4454 /// adjusted to accommodate the arguments for the tailcall.
4455 static int CalculateTailCallSPDiff(SelectionDAG
& DAG
, bool isTailCall
,
4456 unsigned ParamSize
) {
4458 if (!isTailCall
) return 0;
4460 PPCFunctionInfo
*FI
= DAG
.getMachineFunction().getInfo
<PPCFunctionInfo
>();
4461 unsigned CallerMinReservedArea
= FI
->getMinReservedArea();
4462 int SPDiff
= (int)CallerMinReservedArea
- (int)ParamSize
;
4463 // Remember only if the new adjustment is bigger.
4464 if (SPDiff
< FI
->getTailCallSPDelta())
4465 FI
->setTailCallSPDelta(SPDiff
);
4470 static bool isFunctionGlobalAddress(SDValue Callee
);
4473 callsShareTOCBase(const Function
*Caller
, SDValue Callee
,
4474 const TargetMachine
&TM
) {
4475 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4476 // don't have enough information to determine if the caller and calle share
4477 // the same TOC base, so we have to pessimistically assume they don't for
4479 GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
4483 const GlobalValue
*GV
= G
->getGlobal();
4484 // The medium and large code models are expected to provide a sufficiently
4485 // large TOC to provide all data addressing needs of a module with a
4486 // single TOC. Since each module will be addressed with a single TOC then we
4487 // only need to check that caller and callee don't cross dso boundaries.
4488 if (CodeModel::Medium
== TM
.getCodeModel() ||
4489 CodeModel::Large
== TM
.getCodeModel())
4490 return TM
.shouldAssumeDSOLocal(*Caller
->getParent(), GV
);
4492 // Otherwise we need to ensure callee and caller are in the same section,
4493 // since the linker may allocate multiple TOCs, and we don't know which
4494 // sections will belong to the same TOC base.
4496 if (!GV
->isStrongDefinitionForLinker())
4499 // Any explicitly-specified sections and section prefixes must also match.
4500 // Also, if we're using -ffunction-sections, then each function is always in
4501 // a different section (the same is true for COMDAT functions).
4502 if (TM
.getFunctionSections() || GV
->hasComdat() || Caller
->hasComdat() ||
4503 GV
->getSection() != Caller
->getSection())
4505 if (const auto *F
= dyn_cast
<Function
>(GV
)) {
4506 if (F
->getSectionPrefix() != Caller
->getSectionPrefix())
4510 // If the callee might be interposed, then we can't assume the ultimate call
4511 // target will be in the same section. Even in cases where we can assume that
4512 // interposition won't happen, in any case where the linker might insert a
4513 // stub to allow for interposition, we must generate code as though
4514 // interposition might occur. To understand why this matters, consider a
4515 // situation where: a -> b -> c where the arrows indicate calls. b and c are
4516 // in the same section, but a is in a different module (i.e. has a different
4517 // TOC base pointer). If the linker allows for interposition between b and c,
4518 // then it will generate a stub for the call edge between b and c which will
4519 // save the TOC pointer into the designated stack slot allocated by b. If we
4520 // return true here, and therefore allow a tail call between b and c, that
4521 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4522 // pointer into the stack slot allocated by a (where the a -> b stub saved
4523 // a's TOC base pointer). If we're not considering a tail call, but rather,
4524 // whether a nop is needed after the call instruction in b, because the linker
4525 // will insert a stub, it might complain about a missing nop if we omit it
4526 // (although many don't complain in this case).
4527 if (!TM
.shouldAssumeDSOLocal(*Caller
->getParent(), GV
))
4534 needStackSlotPassParameters(const PPCSubtarget
&Subtarget
,
4535 const SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
4536 assert(Subtarget
.is64BitELFABI());
4538 const unsigned PtrByteSize
= 8;
4539 const unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4541 static const MCPhysReg GPR
[] = {
4542 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4543 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4545 static const MCPhysReg VR
[] = {
4546 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4547 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4550 const unsigned NumGPRs
= array_lengthof(GPR
);
4551 const unsigned NumFPRs
= 13;
4552 const unsigned NumVRs
= array_lengthof(VR
);
4553 const unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
4555 unsigned NumBytes
= LinkageSize
;
4556 unsigned AvailableFPRs
= NumFPRs
;
4557 unsigned AvailableVRs
= NumVRs
;
4559 for (const ISD::OutputArg
& Param
: Outs
) {
4560 if (Param
.Flags
.isNest()) continue;
4562 if (CalculateStackSlotUsed(Param
.VT
, Param
.ArgVT
, Param
.Flags
,
4563 PtrByteSize
, LinkageSize
, ParamAreaSize
,
4564 NumBytes
, AvailableFPRs
, AvailableVRs
,
4565 Subtarget
.hasQPX()))
4572 hasSameArgumentList(const Function
*CallerFn
, ImmutableCallSite CS
) {
4573 if (CS
.arg_size() != CallerFn
->arg_size())
4576 ImmutableCallSite::arg_iterator CalleeArgIter
= CS
.arg_begin();
4577 ImmutableCallSite::arg_iterator CalleeArgEnd
= CS
.arg_end();
4578 Function::const_arg_iterator CallerArgIter
= CallerFn
->arg_begin();
4580 for (; CalleeArgIter
!= CalleeArgEnd
; ++CalleeArgIter
, ++CallerArgIter
) {
4581 const Value
* CalleeArg
= *CalleeArgIter
;
4582 const Value
* CallerArg
= &(*CallerArgIter
);
4583 if (CalleeArg
== CallerArg
)
4586 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4587 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4589 // 1st argument of callee is undef and has the same type as caller.
4590 if (CalleeArg
->getType() == CallerArg
->getType() &&
4591 isa
<UndefValue
>(CalleeArg
))
4600 // Returns true if TCO is possible between the callers and callees
4601 // calling conventions.
4603 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC
,
4604 CallingConv::ID CalleeCC
) {
4605 // Tail calls are possible with fastcc and ccc.
4606 auto isTailCallableCC
= [] (CallingConv::ID CC
){
4607 return CC
== CallingConv::C
|| CC
== CallingConv::Fast
;
4609 if (!isTailCallableCC(CallerCC
) || !isTailCallableCC(CalleeCC
))
4612 // We can safely tail call both fastcc and ccc callees from a c calling
4613 // convention caller. If the caller is fastcc, we may have less stack space
4614 // than a non-fastcc caller with the same signature so disable tail-calls in
4616 return CallerCC
== CallingConv::C
|| CallerCC
== CalleeCC
;
4620 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4622 CallingConv::ID CalleeCC
,
4623 ImmutableCallSite CS
,
4625 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
4626 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
4627 SelectionDAG
& DAG
) const {
4628 bool TailCallOpt
= getTargetMachine().Options
.GuaranteedTailCallOpt
;
4630 if (DisableSCO
&& !TailCallOpt
) return false;
4632 // Variadic argument functions are not supported.
4633 if (isVarArg
) return false;
4635 auto &Caller
= DAG
.getMachineFunction().getFunction();
4636 // Check that the calling conventions are compatible for tco.
4637 if (!areCallingConvEligibleForTCO_64SVR4(Caller
.getCallingConv(), CalleeCC
))
4640 // Caller contains any byval parameter is not supported.
4641 if (any_of(Ins
, [](const ISD::InputArg
&IA
) { return IA
.Flags
.isByVal(); }))
4644 // Callee contains any byval parameter is not supported, too.
4645 // Note: This is a quick work around, because in some cases, e.g.
4646 // caller's stack size > callee's stack size, we are still able to apply
4647 // sibling call optimization. For example, gcc is able to do SCO for caller1
4648 // in the following example, but not for caller2.
4653 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4657 // void caller1(struct test a, struct test c, struct test *b) {
4658 // callee(gTest, b); }
4659 // void caller2(struct test *b) { callee(gTest, b); }
4660 if (any_of(Outs
, [](const ISD::OutputArg
& OA
) { return OA
.Flags
.isByVal(); }))
4663 // If callee and caller use different calling conventions, we cannot pass
4664 // parameters on stack since offsets for the parameter area may be different.
4665 if (Caller
.getCallingConv() != CalleeCC
&&
4666 needStackSlotPassParameters(Subtarget
, Outs
))
4669 // No TCO/SCO on indirect call because Caller have to restore its TOC
4670 if (!isFunctionGlobalAddress(Callee
) &&
4671 !isa
<ExternalSymbolSDNode
>(Callee
))
4674 // If the caller and callee potentially have different TOC bases then we
4675 // cannot tail call since we need to restore the TOC pointer after the call.
4676 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4677 if (!callsShareTOCBase(&Caller
, Callee
, getTargetMachine()))
4680 // TCO allows altering callee ABI, so we don't have to check further.
4681 if (CalleeCC
== CallingConv::Fast
&& TailCallOpt
)
4684 if (DisableSCO
) return false;
4686 // If callee use the same argument list that caller is using, then we can
4687 // apply SCO on this case. If it is not, then we need to check if callee needs
4688 // stack for passing arguments.
4689 if (!hasSameArgumentList(&Caller
, CS
) &&
4690 needStackSlotPassParameters(Subtarget
, Outs
)) {
4697 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4698 /// for tail call optimization. Targets which want to do tail call
4699 /// optimization should implement this function.
4701 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee
,
4702 CallingConv::ID CalleeCC
,
4704 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
4705 SelectionDAG
& DAG
) const {
4706 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
4709 // Variable argument functions are not supported.
4713 MachineFunction
&MF
= DAG
.getMachineFunction();
4714 CallingConv::ID CallerCC
= MF
.getFunction().getCallingConv();
4715 if (CalleeCC
== CallingConv::Fast
&& CallerCC
== CalleeCC
) {
4716 // Functions containing by val parameters are not supported.
4717 for (unsigned i
= 0; i
!= Ins
.size(); i
++) {
4718 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
4719 if (Flags
.isByVal()) return false;
4722 // Non-PIC/GOT tail calls are supported.
4723 if (getTargetMachine().getRelocationModel() != Reloc::PIC_
)
4726 // At the moment we can only do local tail calls (in same module, hidden
4727 // or protected) if we are generating PIC.
4728 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
4729 return G
->getGlobal()->hasHiddenVisibility()
4730 || G
->getGlobal()->hasProtectedVisibility();
4736 /// isCallCompatibleAddress - Return the immediate to use if the specified
4737 /// 32-bit value is representable in the immediate field of a BxA instruction.
4738 static SDNode
*isBLACompatibleAddress(SDValue Op
, SelectionDAG
&DAG
) {
4739 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
4740 if (!C
) return nullptr;
4742 int Addr
= C
->getZExtValue();
4743 if ((Addr
& 3) != 0 || // Low 2 bits are implicitly zero.
4744 SignExtend32
<26>(Addr
) != Addr
)
4745 return nullptr; // Top 6 bits have to be sext of immediate.
4749 (int)C
->getZExtValue() >> 2, SDLoc(Op
),
4750 DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout()))
4756 struct TailCallArgumentInfo
{
4761 TailCallArgumentInfo() = default;
4764 } // end anonymous namespace
4766 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4767 static void StoreTailCallArgumentsToStackSlot(
4768 SelectionDAG
&DAG
, SDValue Chain
,
4769 const SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArgs
,
4770 SmallVectorImpl
<SDValue
> &MemOpChains
, const SDLoc
&dl
) {
4771 for (unsigned i
= 0, e
= TailCallArgs
.size(); i
!= e
; ++i
) {
4772 SDValue Arg
= TailCallArgs
[i
].Arg
;
4773 SDValue FIN
= TailCallArgs
[i
].FrameIdxOp
;
4774 int FI
= TailCallArgs
[i
].FrameIdx
;
4775 // Store relative to framepointer.
4776 MemOpChains
.push_back(DAG
.getStore(
4777 Chain
, dl
, Arg
, FIN
,
4778 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
)));
4782 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4783 /// the appropriate stack slot for the tail call optimized function call.
4784 static SDValue
EmitTailCallStoreFPAndRetAddr(SelectionDAG
&DAG
, SDValue Chain
,
4785 SDValue OldRetAddr
, SDValue OldFP
,
4786 int SPDiff
, const SDLoc
&dl
) {
4788 // Calculate the new stack slot for the return address.
4789 MachineFunction
&MF
= DAG
.getMachineFunction();
4790 const PPCSubtarget
&Subtarget
= MF
.getSubtarget
<PPCSubtarget
>();
4791 const PPCFrameLowering
*FL
= Subtarget
.getFrameLowering();
4792 bool isPPC64
= Subtarget
.isPPC64();
4793 int SlotSize
= isPPC64
? 8 : 4;
4794 int NewRetAddrLoc
= SPDiff
+ FL
->getReturnSaveOffset();
4795 int NewRetAddr
= MF
.getFrameInfo().CreateFixedObject(SlotSize
,
4796 NewRetAddrLoc
, true);
4797 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
4798 SDValue NewRetAddrFrIdx
= DAG
.getFrameIndex(NewRetAddr
, VT
);
4799 Chain
= DAG
.getStore(Chain
, dl
, OldRetAddr
, NewRetAddrFrIdx
,
4800 MachinePointerInfo::getFixedStack(MF
, NewRetAddr
));
4802 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4803 // slot as the FP is never overwritten.
4804 if (Subtarget
.isDarwinABI()) {
4805 int NewFPLoc
= SPDiff
+ FL
->getFramePointerSaveOffset();
4806 int NewFPIdx
= MF
.getFrameInfo().CreateFixedObject(SlotSize
, NewFPLoc
,
4808 SDValue NewFramePtrIdx
= DAG
.getFrameIndex(NewFPIdx
, VT
);
4809 Chain
= DAG
.getStore(Chain
, dl
, OldFP
, NewFramePtrIdx
,
4810 MachinePointerInfo::getFixedStack(
4811 DAG
.getMachineFunction(), NewFPIdx
));
4817 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4818 /// the position of the argument.
4820 CalculateTailCallArgDest(SelectionDAG
&DAG
, MachineFunction
&MF
, bool isPPC64
,
4821 SDValue Arg
, int SPDiff
, unsigned ArgOffset
,
4822 SmallVectorImpl
<TailCallArgumentInfo
>& TailCallArguments
) {
4823 int Offset
= ArgOffset
+ SPDiff
;
4824 uint32_t OpSize
= (Arg
.getValueSizeInBits() + 7) / 8;
4825 int FI
= MF
.getFrameInfo().CreateFixedObject(OpSize
, Offset
, true);
4826 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
4827 SDValue FIN
= DAG
.getFrameIndex(FI
, VT
);
4828 TailCallArgumentInfo Info
;
4830 Info
.FrameIdxOp
= FIN
;
4832 TailCallArguments
.push_back(Info
);
4835 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4836 /// stack slot. Returns the chain as result and the loaded frame pointers in
4837 /// LROpOut/FPOpout. Used when tail calling.
4838 SDValue
PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4839 SelectionDAG
&DAG
, int SPDiff
, SDValue Chain
, SDValue
&LROpOut
,
4840 SDValue
&FPOpOut
, const SDLoc
&dl
) const {
4842 // Load the LR and FP stack slot for later adjusting.
4843 EVT VT
= Subtarget
.isPPC64() ? MVT::i64
: MVT::i32
;
4844 LROpOut
= getReturnAddrFrameIndex(DAG
);
4845 LROpOut
= DAG
.getLoad(VT
, dl
, Chain
, LROpOut
, MachinePointerInfo());
4846 Chain
= SDValue(LROpOut
.getNode(), 1);
4848 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4849 // slot as the FP is never overwritten.
4850 if (Subtarget
.isDarwinABI()) {
4851 FPOpOut
= getFramePointerFrameIndex(DAG
);
4852 FPOpOut
= DAG
.getLoad(VT
, dl
, Chain
, FPOpOut
, MachinePointerInfo());
4853 Chain
= SDValue(FPOpOut
.getNode(), 1);
4859 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4860 /// by "Src" to address "Dst" of size "Size". Alignment information is
4861 /// specified by the specific parameter attribute. The copy will be passed as
4862 /// a byval function parameter.
4863 /// Sometimes what we are copying is the end of a larger object, the part that
4864 /// does not fit in registers.
4865 static SDValue
CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
,
4866 SDValue Chain
, ISD::ArgFlagsTy Flags
,
4867 SelectionDAG
&DAG
, const SDLoc
&dl
) {
4868 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), dl
, MVT::i32
);
4869 return DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
4870 false, false, false, MachinePointerInfo(),
4871 MachinePointerInfo());
4874 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4876 static void LowerMemOpCallTo(
4877 SelectionDAG
&DAG
, MachineFunction
&MF
, SDValue Chain
, SDValue Arg
,
4878 SDValue PtrOff
, int SPDiff
, unsigned ArgOffset
, bool isPPC64
,
4879 bool isTailCall
, bool isVector
, SmallVectorImpl
<SDValue
> &MemOpChains
,
4880 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
, const SDLoc
&dl
) {
4881 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
4886 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
4888 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
4889 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
4890 DAG
.getConstant(ArgOffset
, dl
, PtrVT
));
4892 MemOpChains
.push_back(
4893 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
4894 // Calculate and remember argument location.
4895 } else CalculateTailCallArgDest(DAG
, MF
, isPPC64
, Arg
, SPDiff
, ArgOffset
,
4900 PrepareTailCall(SelectionDAG
&DAG
, SDValue
&InFlag
, SDValue
&Chain
,
4901 const SDLoc
&dl
, int SPDiff
, unsigned NumBytes
, SDValue LROp
,
4903 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
) {
4904 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4905 // might overwrite each other in case of tail call optimization.
4906 SmallVector
<SDValue
, 8> MemOpChains2
;
4907 // Do not flag preceding copytoreg stuff together with the following stuff.
4909 StoreTailCallArgumentsToStackSlot(DAG
, Chain
, TailCallArguments
,
4911 if (!MemOpChains2
.empty())
4912 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains2
);
4914 // Store the return address to the appropriate stack slot.
4915 Chain
= EmitTailCallStoreFPAndRetAddr(DAG
, Chain
, LROp
, FPOp
, SPDiff
, dl
);
4917 // Emit callseq_end just before tailcall node.
4918 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
4919 DAG
.getIntPtrConstant(0, dl
, true), InFlag
, dl
);
4920 InFlag
= Chain
.getValue(1);
4923 // Is this global address that of a function that can be called by name? (as
4924 // opposed to something that must hold a descriptor for an indirect call).
4925 static bool isFunctionGlobalAddress(SDValue Callee
) {
4926 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
4927 if (Callee
.getOpcode() == ISD::GlobalTLSAddress
||
4928 Callee
.getOpcode() == ISD::TargetGlobalTLSAddress
)
4931 return G
->getGlobal()->getValueType()->isFunctionTy();
4938 PrepareCall(SelectionDAG
&DAG
, SDValue
&Callee
, SDValue
&InFlag
, SDValue
&Chain
,
4939 SDValue CallSeqStart
, const SDLoc
&dl
, int SPDiff
, bool isTailCall
,
4940 bool isPatchPoint
, bool hasNest
,
4941 SmallVectorImpl
<std::pair
<unsigned, SDValue
>> &RegsToPass
,
4942 SmallVectorImpl
<SDValue
> &Ops
, std::vector
<EVT
> &NodeTys
,
4943 ImmutableCallSite CS
, const PPCSubtarget
&Subtarget
) {
4944 bool isPPC64
= Subtarget
.isPPC64();
4945 bool isSVR4ABI
= Subtarget
.isSVR4ABI();
4946 bool is64BitELFv1ABI
= isPPC64
&& isSVR4ABI
&& !Subtarget
.isELFv2ABI();
4947 bool isAIXABI
= Subtarget
.isAIXABI();
4949 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
4950 NodeTys
.push_back(MVT::Other
); // Returns a chain
4951 NodeTys
.push_back(MVT::Glue
); // Returns a flag for retval copy to use.
4953 unsigned CallOpc
= PPCISD::CALL
;
4955 bool needIndirectCall
= true;
4956 if (!isSVR4ABI
|| !isPPC64
)
4957 if (SDNode
*Dest
= isBLACompatibleAddress(Callee
, DAG
)) {
4958 // If this is an absolute destination address, use the munged value.
4959 Callee
= SDValue(Dest
, 0);
4960 needIndirectCall
= false;
4963 // PC-relative references to external symbols should go through $stub, unless
4964 // we're building with the leopard linker or later, which automatically
4965 // synthesizes these stubs.
4966 const TargetMachine
&TM
= DAG
.getTarget();
4967 const Module
*Mod
= DAG
.getMachineFunction().getFunction().getParent();
4968 const GlobalValue
*GV
= nullptr;
4969 if (auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
4970 GV
= G
->getGlobal();
4971 bool Local
= TM
.shouldAssumeDSOLocal(*Mod
, GV
);
4972 bool UsePlt
= !Local
&& Subtarget
.isTargetELF() && !isPPC64
;
4974 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4975 // every direct call is) turn it into a TargetGlobalAddress /
4976 // TargetExternalSymbol node so that legalize doesn't hack it.
4977 if (isFunctionGlobalAddress(Callee
)) {
4978 GlobalAddressSDNode
*G
= cast
<GlobalAddressSDNode
>(Callee
);
4980 // A call to a TLS address is actually an indirect call to a
4981 // thread-specific pointer.
4982 unsigned OpFlags
= 0;
4984 OpFlags
= PPCII::MO_PLT
;
4986 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
,
4987 Callee
.getValueType(), 0, OpFlags
);
4988 needIndirectCall
= false;
4991 if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
4992 unsigned char OpFlags
= 0;
4995 OpFlags
= PPCII::MO_PLT
;
4997 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), Callee
.getValueType(),
4999 needIndirectCall
= false;
5003 // We'll form an invalid direct call when lowering a patchpoint; the full
5004 // sequence for an indirect call is complicated, and many of the
5005 // instructions introduced might have side effects (and, thus, can't be
5006 // removed later). The call itself will be removed as soon as the
5007 // argument/return lowering is complete, so the fact that it has the wrong
5008 // kind of operands should not really matter.
5009 needIndirectCall
= false;
5012 if (needIndirectCall
) {
5013 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
5014 // to do the call, we can't use PPCISD::CALL.
5015 SDValue MTCTROps
[] = {Chain
, Callee
, InFlag
};
5017 if (is64BitELFv1ABI
) {
5018 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5019 // entry point, but to the function descriptor (the function entry point
5020 // address is part of the function descriptor though).
5021 // The function descriptor is a three doubleword structure with the
5022 // following fields: function entry point, TOC base address and
5023 // environment pointer.
5024 // Thus for a call through a function pointer, the following actions need
5026 // 1. Save the TOC of the caller in the TOC save area of its stack
5027 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5028 // 2. Load the address of the function entry point from the function
5030 // 3. Load the TOC of the callee from the function descriptor into r2.
5031 // 4. Load the environment pointer from the function descriptor into
5033 // 5. Branch to the function entry point address.
5034 // 6. On return of the callee, the TOC of the caller needs to be
5035 // restored (this is done in FinishCall()).
5037 // The loads are scheduled at the beginning of the call sequence, and the
5038 // register copies are flagged together to ensure that no other
5039 // operations can be scheduled in between. E.g. without flagging the
5040 // copies together, a TOC access in the caller could be scheduled between
5041 // the assignment of the callee TOC and the branch to the callee, which
5042 // results in the TOC access going through the TOC of the callee instead
5043 // of going through the TOC of the caller, which leads to incorrect code.
5045 // Load the address of the function entry point from the function
5047 SDValue LDChain
= CallSeqStart
.getValue(CallSeqStart
->getNumValues()-1);
5048 if (LDChain
.getValueType() == MVT::Glue
)
5049 LDChain
= CallSeqStart
.getValue(CallSeqStart
->getNumValues()-2);
5051 auto MMOFlags
= Subtarget
.hasInvariantFunctionDescriptors()
5052 ? (MachineMemOperand::MODereferenceable
|
5053 MachineMemOperand::MOInvariant
)
5054 : MachineMemOperand::MONone
;
5056 MachinePointerInfo
MPI(CS
? CS
.getCalledValue() : nullptr);
5057 SDValue LoadFuncPtr
= DAG
.getLoad(MVT::i64
, dl
, LDChain
, Callee
, MPI
,
5058 /* Alignment = */ 8, MMOFlags
);
5060 // Load environment pointer into r11.
5061 SDValue PtrOff
= DAG
.getIntPtrConstant(16, dl
);
5062 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, Callee
, PtrOff
);
5063 SDValue LoadEnvPtr
=
5064 DAG
.getLoad(MVT::i64
, dl
, LDChain
, AddPtr
, MPI
.getWithOffset(16),
5065 /* Alignment = */ 8, MMOFlags
);
5067 SDValue TOCOff
= DAG
.getIntPtrConstant(8, dl
);
5068 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, Callee
, TOCOff
);
5070 DAG
.getLoad(MVT::i64
, dl
, LDChain
, AddTOC
, MPI
.getWithOffset(8),
5071 /* Alignment = */ 8, MMOFlags
);
5073 setUsesTOCBasePtr(DAG
);
5074 SDValue TOCVal
= DAG
.getCopyToReg(Chain
, dl
, PPC::X2
, TOCPtr
,
5076 Chain
= TOCVal
.getValue(0);
5077 InFlag
= TOCVal
.getValue(1);
5079 // If the function call has an explicit 'nest' parameter, it takes the
5080 // place of the environment pointer.
5082 SDValue EnvVal
= DAG
.getCopyToReg(Chain
, dl
, PPC::X11
, LoadEnvPtr
,
5085 Chain
= EnvVal
.getValue(0);
5086 InFlag
= EnvVal
.getValue(1);
5089 MTCTROps
[0] = Chain
;
5090 MTCTROps
[1] = LoadFuncPtr
;
5091 MTCTROps
[2] = InFlag
;
5094 Chain
= DAG
.getNode(PPCISD::MTCTR
, dl
, NodeTys
,
5095 makeArrayRef(MTCTROps
, InFlag
.getNode() ? 3 : 2));
5096 InFlag
= Chain
.getValue(1);
5099 NodeTys
.push_back(MVT::Other
);
5100 NodeTys
.push_back(MVT::Glue
);
5101 Ops
.push_back(Chain
);
5102 CallOpc
= PPCISD::BCTRL
;
5103 Callee
.setNode(nullptr);
5104 // Add use of X11 (holding environment pointer)
5105 if (is64BitELFv1ABI
&& !hasNest
)
5106 Ops
.push_back(DAG
.getRegister(PPC::X11
, PtrVT
));
5107 // Add CTR register as callee so a bctr can be emitted later.
5109 Ops
.push_back(DAG
.getRegister(isPPC64
? PPC::CTR8
: PPC::CTR
, PtrVT
));
5112 // If this is a direct call, pass the chain and the callee.
5113 if (Callee
.getNode()) {
5114 Ops
.push_back(Chain
);
5115 Ops
.push_back(Callee
);
5117 // If this is a tail call add stack pointer delta.
5119 Ops
.push_back(DAG
.getConstant(SPDiff
, dl
, MVT::i32
));
5121 // Add argument registers to the end of the list so that they are known live
5123 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
5124 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
5125 RegsToPass
[i
].second
.getValueType()));
5127 // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
5128 // live into the call.
5129 // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
5130 if ((isSVR4ABI
&& isPPC64
) || isAIXABI
) {
5131 setUsesTOCBasePtr(DAG
);
5133 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5134 // no way to mark dependencies as implicit here.
5135 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5137 Ops
.push_back(DAG
.getRegister(isPPC64
? PPC::X2
5144 SDValue
PPCTargetLowering::LowerCallResult(
5145 SDValue Chain
, SDValue InFlag
, CallingConv::ID CallConv
, bool isVarArg
,
5146 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5147 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
5148 SmallVector
<CCValAssign
, 16> RVLocs
;
5149 CCState
CCRetInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
5152 CCRetInfo
.AnalyzeCallResult(
5153 Ins
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
5157 // Copy all of the result registers out of their specified physreg.
5158 for (unsigned i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
5159 CCValAssign
&VA
= RVLocs
[i
];
5160 assert(VA
.isRegLoc() && "Can only return in registers!");
5164 if (Subtarget
.hasSPE() && VA
.getLocVT() == MVT::f64
) {
5165 SDValue Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
5167 Chain
= Lo
.getValue(1);
5168 InFlag
= Lo
.getValue(2);
5169 VA
= RVLocs
[++i
]; // skip ahead to next loc
5170 SDValue Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
5172 Chain
= Hi
.getValue(1);
5173 InFlag
= Hi
.getValue(2);
5174 if (!Subtarget
.isLittleEndian())
5176 Val
= DAG
.getNode(PPCISD::BUILD_SPE64
, dl
, MVT::f64
, Lo
, Hi
);
5178 Val
= DAG
.getCopyFromReg(Chain
, dl
,
5179 VA
.getLocReg(), VA
.getLocVT(), InFlag
);
5180 Chain
= Val
.getValue(1);
5181 InFlag
= Val
.getValue(2);
5184 switch (VA
.getLocInfo()) {
5185 default: llvm_unreachable("Unknown loc info!");
5186 case CCValAssign::Full
: break;
5187 case CCValAssign::AExt
:
5188 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5190 case CCValAssign::ZExt
:
5191 Val
= DAG
.getNode(ISD::AssertZext
, dl
, VA
.getLocVT(), Val
,
5192 DAG
.getValueType(VA
.getValVT()));
5193 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5195 case CCValAssign::SExt
:
5196 Val
= DAG
.getNode(ISD::AssertSext
, dl
, VA
.getLocVT(), Val
,
5197 DAG
.getValueType(VA
.getValVT()));
5198 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5202 InVals
.push_back(Val
);
5208 SDValue
PPCTargetLowering::FinishCall(
5209 CallingConv::ID CallConv
, const SDLoc
&dl
, bool isTailCall
, bool isVarArg
,
5210 bool isPatchPoint
, bool hasNest
, SelectionDAG
&DAG
,
5211 SmallVector
<std::pair
<unsigned, SDValue
>, 8> &RegsToPass
, SDValue InFlag
,
5212 SDValue Chain
, SDValue CallSeqStart
, SDValue
&Callee
, int SPDiff
,
5213 unsigned NumBytes
, const SmallVectorImpl
<ISD::InputArg
> &Ins
,
5214 SmallVectorImpl
<SDValue
> &InVals
, ImmutableCallSite CS
) const {
5215 std::vector
<EVT
> NodeTys
;
5216 SmallVector
<SDValue
, 8> Ops
;
5217 unsigned CallOpc
= PrepareCall(DAG
, Callee
, InFlag
, Chain
, CallSeqStart
, dl
,
5218 SPDiff
, isTailCall
, isPatchPoint
, hasNest
,
5219 RegsToPass
, Ops
, NodeTys
, CS
, Subtarget
);
5221 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5222 if (isVarArg
&& Subtarget
.isSVR4ABI() && !Subtarget
.isPPC64())
5223 Ops
.push_back(DAG
.getRegister(PPC::CR1EQ
, MVT::i32
));
5225 // When performing tail call optimization the callee pops its arguments off
5226 // the stack. Account for this here so these bytes can be pushed back on in
5227 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5228 int BytesCalleePops
=
5229 (CallConv
== CallingConv::Fast
&&
5230 getTargetMachine().Options
.GuaranteedTailCallOpt
) ? NumBytes
: 0;
5232 // Add a register mask operand representing the call-preserved registers.
5233 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
5234 const uint32_t *Mask
=
5235 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CallConv
);
5236 assert(Mask
&& "Missing call preserved mask for calling convention");
5237 Ops
.push_back(DAG
.getRegisterMask(Mask
));
5239 if (InFlag
.getNode())
5240 Ops
.push_back(InFlag
);
5244 assert(((Callee
.getOpcode() == ISD::Register
&&
5245 cast
<RegisterSDNode
>(Callee
)->getReg() == PPC::CTR
) ||
5246 Callee
.getOpcode() == ISD::TargetExternalSymbol
||
5247 Callee
.getOpcode() == ISD::TargetGlobalAddress
||
5248 isa
<ConstantSDNode
>(Callee
)) &&
5249 "Expecting an global address, external symbol, absolute value or register");
5251 DAG
.getMachineFunction().getFrameInfo().setHasTailCall();
5252 return DAG
.getNode(PPCISD::TC_RETURN
, dl
, MVT::Other
, Ops
);
5255 // Add a NOP immediately after the branch instruction when using the 64-bit
5256 // SVR4 or the AIX ABI.
5257 // At link time, if caller and callee are in a different module and
5258 // thus have a different TOC, the call will be replaced with a call to a stub
5259 // function which saves the current TOC, loads the TOC of the callee and
5260 // branches to the callee. The NOP will be replaced with a load instruction
5261 // which restores the TOC of the caller from the TOC save slot of the current
5262 // stack frame. If caller and callee belong to the same module (and have the
5263 // same TOC), the NOP will remain unchanged, or become some other NOP.
5265 MachineFunction
&MF
= DAG
.getMachineFunction();
5266 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5267 if (!isTailCall
&& !isPatchPoint
&&
5268 ((Subtarget
.isSVR4ABI() && Subtarget
.isPPC64()) ||
5269 Subtarget
.isAIXABI())) {
5270 if (CallOpc
== PPCISD::BCTRL
) {
5271 if (Subtarget
.isAIXABI())
5272 report_fatal_error("Indirect call on AIX is not implemented.");
5274 // This is a call through a function pointer.
5275 // Restore the caller TOC from the save area into R2.
5276 // See PrepareCall() for more information about calls through function
5277 // pointers in the 64-bit SVR4 ABI.
5278 // We are using a target-specific load with r2 hard coded, because the
5279 // result of a target-independent load would never go directly into r2,
5280 // since r2 is a reserved register (which prevents the register allocator
5281 // from allocating it), resulting in an additional register being
5282 // allocated and an unnecessary move instruction being generated.
5283 CallOpc
= PPCISD::BCTRL_LOAD_TOC
;
5285 SDValue StackPtr
= DAG
.getRegister(PPC::X1
, PtrVT
);
5286 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
5287 SDValue TOCOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
5288 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, StackPtr
, TOCOff
);
5290 // The address needs to go after the chain input but before the flag (or
5291 // any other variadic arguments).
5292 Ops
.insert(std::next(Ops
.begin()), AddTOC
);
5293 } else if (CallOpc
== PPCISD::CALL
&&
5294 !callsShareTOCBase(&MF
.getFunction(), Callee
, DAG
.getTarget())) {
5295 // Otherwise insert NOP for non-local calls.
5296 CallOpc
= PPCISD::CALL_NOP
;
5300 if (Subtarget
.isAIXABI() && isFunctionGlobalAddress(Callee
)) {
5301 // On AIX, direct function calls reference the symbol for the function's
5302 // entry point, which is named by inserting a "." before the function's
5304 GlobalAddressSDNode
*G
= cast
<GlobalAddressSDNode
>(Callee
);
5305 auto &Context
= DAG
.getMachineFunction().getMMI().getContext();
5306 MCSymbol
*S
= Context
.getOrCreateSymbol(Twine(".") +
5307 Twine(G
->getGlobal()->getName()));
5308 Callee
= DAG
.getMCSymbol(S
, PtrVT
);
5309 // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
5313 Chain
= DAG
.getNode(CallOpc
, dl
, NodeTys
, Ops
);
5314 InFlag
= Chain
.getValue(1);
5316 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
5317 DAG
.getIntPtrConstant(BytesCalleePops
, dl
, true),
5320 InFlag
= Chain
.getValue(1);
5322 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
,
5323 Ins
, dl
, DAG
, InVals
);
5327 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
5328 SmallVectorImpl
<SDValue
> &InVals
) const {
5329 SelectionDAG
&DAG
= CLI
.DAG
;
5331 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
5332 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
5333 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
5334 SDValue Chain
= CLI
.Chain
;
5335 SDValue Callee
= CLI
.Callee
;
5336 bool &isTailCall
= CLI
.IsTailCall
;
5337 CallingConv::ID CallConv
= CLI
.CallConv
;
5338 bool isVarArg
= CLI
.IsVarArg
;
5339 bool isPatchPoint
= CLI
.IsPatchPoint
;
5340 ImmutableCallSite CS
= CLI
.CS
;
5343 if (Subtarget
.useLongCalls() && !(CS
&& CS
.isMustTailCall()))
5345 else if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64())
5347 IsEligibleForTailCallOptimization_64SVR4(Callee
, CallConv
, CS
,
5348 isVarArg
, Outs
, Ins
, DAG
);
5350 isTailCall
= IsEligibleForTailCallOptimization(Callee
, CallConv
, isVarArg
,
5354 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
5357 assert(isa
<GlobalAddressSDNode
>(Callee
) &&
5358 "Callee should be an llvm::Function object.");
5360 const GlobalValue
*GV
=
5361 cast
<GlobalAddressSDNode
>(Callee
)->getGlobal();
5362 const unsigned Width
=
5363 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
5364 dbgs() << "TCO caller: "
5365 << left_justify(DAG
.getMachineFunction().getName(), Width
)
5366 << ", callee linkage: " << GV
->getVisibility() << ", "
5367 << GV
->getLinkage() << "\n");
5371 if (!isTailCall
&& CS
&& CS
.isMustTailCall())
5372 report_fatal_error("failed to perform tail call elimination on a call "
5373 "site marked musttail");
5375 // When long calls (i.e. indirect calls) are always used, calls are always
5376 // made via function pointer. If we have a function name, first translate it
5378 if (Subtarget
.useLongCalls() && isa
<GlobalAddressSDNode
>(Callee
) &&
5380 Callee
= LowerGlobalAddress(Callee
, DAG
);
5382 if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64())
5383 return LowerCall_64SVR4(Chain
, Callee
, CallConv
, isVarArg
,
5384 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5385 dl
, DAG
, InVals
, CS
);
5387 if (Subtarget
.isSVR4ABI())
5388 return LowerCall_32SVR4(Chain
, Callee
, CallConv
, isVarArg
,
5389 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5390 dl
, DAG
, InVals
, CS
);
5392 if (Subtarget
.isAIXABI())
5393 return LowerCall_AIX(Chain
, Callee
, CallConv
, isVarArg
,
5394 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5395 dl
, DAG
, InVals
, CS
);
5397 return LowerCall_Darwin(Chain
, Callee
, CallConv
, isVarArg
,
5398 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5399 dl
, DAG
, InVals
, CS
);
5402 SDValue
PPCTargetLowering::LowerCall_32SVR4(
5403 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
5404 bool isTailCall
, bool isPatchPoint
,
5405 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5406 const SmallVectorImpl
<SDValue
> &OutVals
,
5407 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5408 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
5409 ImmutableCallSite CS
) const {
5410 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5411 // of the 32-bit SVR4 ABI stack frame layout.
5413 assert((CallConv
== CallingConv::C
||
5414 CallConv
== CallingConv::Cold
||
5415 CallConv
== CallingConv::Fast
) && "Unknown calling convention!");
5417 unsigned PtrByteSize
= 4;
5419 MachineFunction
&MF
= DAG
.getMachineFunction();
5421 // Mark this function as potentially containing a function that contains a
5422 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5423 // and restoring the callers stack pointer in this functions epilog. This is
5424 // done because by tail calling the called function might overwrite the value
5425 // in this function's (MF) stack pointer stack slot 0(SP).
5426 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5427 CallConv
== CallingConv::Fast
)
5428 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
5430 // Count how many bytes are to be pushed on the stack, including the linkage
5431 // area, parameter list area and the part of the local variable space which
5432 // contains copies of aggregates which are passed by value.
5434 // Assign locations to all of the outgoing arguments.
5435 SmallVector
<CCValAssign
, 16> ArgLocs
;
5436 PPCCCState
CCInfo(CallConv
, isVarArg
, MF
, ArgLocs
, *DAG
.getContext());
5438 // Reserve space for the linkage area on the stack.
5439 CCInfo
.AllocateStack(Subtarget
.getFrameLowering()->getLinkageSize(),
5442 CCInfo
.PreAnalyzeCallOperands(Outs
);
5445 // Handle fixed and variable vector arguments differently.
5446 // Fixed vector arguments go into registers as long as registers are
5447 // available. Variable vector arguments always go into memory.
5448 unsigned NumArgs
= Outs
.size();
5450 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
5451 MVT ArgVT
= Outs
[i
].VT
;
5452 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
5455 if (Outs
[i
].IsFixed
) {
5456 Result
= CC_PPC32_SVR4(i
, ArgVT
, ArgVT
, CCValAssign::Full
, ArgFlags
,
5459 Result
= CC_PPC32_SVR4_VarArg(i
, ArgVT
, ArgVT
, CCValAssign::Full
,
5465 errs() << "Call operand #" << i
<< " has unhandled type "
5466 << EVT(ArgVT
).getEVTString() << "\n";
5468 llvm_unreachable(nullptr);
5472 // All arguments are treated the same.
5473 CCInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4
);
5475 CCInfo
.clearWasPPCF128();
5477 // Assign locations to all of the outgoing aggregate by value arguments.
5478 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
5479 CCState
CCByValInfo(CallConv
, isVarArg
, MF
, ByValArgLocs
, *DAG
.getContext());
5481 // Reserve stack space for the allocations in CCInfo.
5482 CCByValInfo
.AllocateStack(CCInfo
.getNextStackOffset(), PtrByteSize
);
5484 CCByValInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4_ByVal
);
5486 // Size of the linkage area, parameter list area and the part of the local
5487 // space variable where copies of aggregates which are passed by value are
5489 unsigned NumBytes
= CCByValInfo
.getNextStackOffset();
5491 // Calculate by how many bytes the stack has to be adjusted in case of tail
5492 // call optimization.
5493 int SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
5495 // Adjust the stack pointer for the new arguments...
5496 // These operations are automatically eliminated by the prolog/epilog pass
5497 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
5498 SDValue CallSeqStart
= Chain
;
5500 // Load the return address and frame pointer so it can be moved somewhere else
5503 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
5505 // Set up a copy of the stack pointer for use loading and storing any
5506 // arguments that may not fit in the registers available for argument
5508 SDValue StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
5510 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
5511 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
5512 SmallVector
<SDValue
, 8> MemOpChains
;
5514 bool seenFloatArg
= false;
5515 // Walk the register/memloc assignments, inserting copies/loads.
5516 // i - Tracks the index into the list of registers allocated for the call
5517 // RealArgIdx - Tracks the index into the list of actual function arguments
5518 // j - Tracks the index into the list of byval arguments
5519 for (unsigned i
= 0, RealArgIdx
= 0, j
= 0, e
= ArgLocs
.size();
5521 ++i
, ++RealArgIdx
) {
5522 CCValAssign
&VA
= ArgLocs
[i
];
5523 SDValue Arg
= OutVals
[RealArgIdx
];
5524 ISD::ArgFlagsTy Flags
= Outs
[RealArgIdx
].Flags
;
5526 if (Flags
.isByVal()) {
5527 // Argument is an aggregate which is passed by value, thus we need to
5528 // create a copy of it in the local variable space of the current stack
5529 // frame (which is the stack frame of the caller) and pass the address of
5530 // this copy to the callee.
5531 assert((j
< ByValArgLocs
.size()) && "Index out of bounds!");
5532 CCValAssign
&ByValVA
= ByValArgLocs
[j
++];
5533 assert((VA
.getValNo() == ByValVA
.getValNo()) && "ValNo mismatch!");
5535 // Memory reserved in the local variable space of the callers stack frame.
5536 unsigned LocMemOffset
= ByValVA
.getLocMemOffset();
5538 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
5539 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
5542 // Create a copy of the argument in the local area of the current
5544 SDValue MemcpyCall
=
5545 CreateCopyOfByValArgument(Arg
, PtrOff
,
5546 CallSeqStart
.getNode()->getOperand(0),
5549 // This must go outside the CALLSEQ_START..END.
5550 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, NumBytes
, 0,
5552 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
5553 NewCallSeqStart
.getNode());
5554 Chain
= CallSeqStart
= NewCallSeqStart
;
5556 // Pass the address of the aggregate copy on the stack either in a
5557 // physical register or in the parameter list area of the current stack
5558 // frame to the callee.
5562 // When useCRBits() is true, there can be i1 arguments.
5563 // It is because getRegisterType(MVT::i1) => MVT::i1,
5564 // and for other integer types getRegisterType() => MVT::i32.
5565 // Extend i1 and ensure callee will get i32.
5566 if (Arg
.getValueType() == MVT::i1
)
5567 Arg
= DAG
.getNode(Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
,
5570 if (VA
.isRegLoc()) {
5571 seenFloatArg
|= VA
.getLocVT().isFloatingPoint();
5572 // Put argument in a physical register.
5573 if (Subtarget
.hasSPE() && Arg
.getValueType() == MVT::f64
) {
5574 bool IsLE
= Subtarget
.isLittleEndian();
5575 SDValue SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
5576 DAG
.getIntPtrConstant(IsLE
? 0 : 1, dl
));
5577 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), SVal
.getValue(0)));
5578 SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
5579 DAG
.getIntPtrConstant(IsLE
? 1 : 0, dl
));
5580 RegsToPass
.push_back(std::make_pair(ArgLocs
[++i
].getLocReg(),
5583 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
5585 // Put argument in the parameter list area of the current stack frame.
5586 assert(VA
.isMemLoc());
5587 unsigned LocMemOffset
= VA
.getLocMemOffset();
5590 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
5591 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
5594 MemOpChains
.push_back(
5595 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
5597 // Calculate and remember argument location.
5598 CalculateTailCallArgDest(DAG
, MF
, false, Arg
, SPDiff
, LocMemOffset
,
5604 if (!MemOpChains
.empty())
5605 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
5607 // Build a sequence of copy-to-reg nodes chained together with token chain
5608 // and flag operands which copy the outgoing args into the appropriate regs.
5610 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
5611 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
5612 RegsToPass
[i
].second
, InFlag
);
5613 InFlag
= Chain
.getValue(1);
5616 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5619 SDVTList VTs
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
5620 SDValue Ops
[] = { Chain
, InFlag
};
5622 Chain
= DAG
.getNode(seenFloatArg
? PPCISD::CR6SET
: PPCISD::CR6UNSET
,
5623 dl
, VTs
, makeArrayRef(Ops
, InFlag
.getNode() ? 2 : 1));
5625 InFlag
= Chain
.getValue(1);
5629 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
5632 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
5633 /* unused except on PPC64 ELFv1 */ false, DAG
,
5634 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
5635 NumBytes
, Ins
, InVals
, CS
);
5638 // Copy an argument into memory, being careful to do this outside the
5639 // call sequence for the call to which the argument belongs.
5640 SDValue
PPCTargetLowering::createMemcpyOutsideCallSeq(
5641 SDValue Arg
, SDValue PtrOff
, SDValue CallSeqStart
, ISD::ArgFlagsTy Flags
,
5642 SelectionDAG
&DAG
, const SDLoc
&dl
) const {
5643 SDValue MemcpyCall
= CreateCopyOfByValArgument(Arg
, PtrOff
,
5644 CallSeqStart
.getNode()->getOperand(0),
5646 // The MEMCPY must go outside the CALLSEQ_START..END.
5647 int64_t FrameSize
= CallSeqStart
.getConstantOperandVal(1);
5648 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, FrameSize
, 0,
5650 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
5651 NewCallSeqStart
.getNode());
5652 return NewCallSeqStart
;
5655 SDValue
PPCTargetLowering::LowerCall_64SVR4(
5656 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
5657 bool isTailCall
, bool isPatchPoint
,
5658 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5659 const SmallVectorImpl
<SDValue
> &OutVals
,
5660 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5661 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
5662 ImmutableCallSite CS
) const {
5663 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
5664 bool isLittleEndian
= Subtarget
.isLittleEndian();
5665 unsigned NumOps
= Outs
.size();
5666 bool hasNest
= false;
5667 bool IsSibCall
= false;
5669 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5670 unsigned PtrByteSize
= 8;
5672 MachineFunction
&MF
= DAG
.getMachineFunction();
5674 if (isTailCall
&& !getTargetMachine().Options
.GuaranteedTailCallOpt
)
5677 // Mark this function as potentially containing a function that contains a
5678 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5679 // and restoring the callers stack pointer in this functions epilog. This is
5680 // done because by tail calling the called function might overwrite the value
5681 // in this function's (MF) stack pointer stack slot 0(SP).
5682 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5683 CallConv
== CallingConv::Fast
)
5684 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
5686 assert(!(CallConv
== CallingConv::Fast
&& isVarArg
) &&
5687 "fastcc not supported on varargs functions");
5689 // Count how many bytes are to be pushed on the stack, including the linkage
5690 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5691 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5692 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5693 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
5694 unsigned NumBytes
= LinkageSize
;
5695 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
5696 unsigned &QFPR_idx
= FPR_idx
;
5698 static const MCPhysReg GPR
[] = {
5699 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
5700 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
5702 static const MCPhysReg VR
[] = {
5703 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
5704 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
5707 const unsigned NumGPRs
= array_lengthof(GPR
);
5708 const unsigned NumFPRs
= useSoftFloat() ? 0 : 13;
5709 const unsigned NumVRs
= array_lengthof(VR
);
5710 const unsigned NumQFPRs
= NumFPRs
;
5712 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5713 // can be passed to the callee in registers.
5714 // For the fast calling convention, there is another check below.
5715 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5716 bool HasParameterArea
= !isELFv2ABI
|| isVarArg
|| CallConv
== CallingConv::Fast
;
5717 if (!HasParameterArea
) {
5718 unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
5719 unsigned AvailableFPRs
= NumFPRs
;
5720 unsigned AvailableVRs
= NumVRs
;
5721 unsigned NumBytesTmp
= NumBytes
;
5722 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5723 if (Outs
[i
].Flags
.isNest()) continue;
5724 if (CalculateStackSlotUsed(Outs
[i
].VT
, Outs
[i
].ArgVT
, Outs
[i
].Flags
,
5725 PtrByteSize
, LinkageSize
, ParamAreaSize
,
5726 NumBytesTmp
, AvailableFPRs
, AvailableVRs
,
5727 Subtarget
.hasQPX()))
5728 HasParameterArea
= true;
5732 // When using the fast calling convention, we don't provide backing for
5733 // arguments that will be in registers.
5734 unsigned NumGPRsUsed
= 0, NumFPRsUsed
= 0, NumVRsUsed
= 0;
5736 // Avoid allocating parameter area for fastcc functions if all the arguments
5737 // can be passed in the registers.
5738 if (CallConv
== CallingConv::Fast
)
5739 HasParameterArea
= false;
5741 // Add up all the space actually used.
5742 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5743 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5744 EVT ArgVT
= Outs
[i
].VT
;
5745 EVT OrigVT
= Outs
[i
].ArgVT
;
5750 if (CallConv
== CallingConv::Fast
) {
5751 if (Flags
.isByVal()) {
5752 NumGPRsUsed
+= (Flags
.getByValSize()+7)/8;
5753 if (NumGPRsUsed
> NumGPRs
)
5754 HasParameterArea
= true;
5756 switch (ArgVT
.getSimpleVT().SimpleTy
) {
5757 default: llvm_unreachable("Unexpected ValueType for argument!");
5761 if (++NumGPRsUsed
<= NumGPRs
)
5771 if (++NumVRsUsed
<= NumVRs
)
5775 // When using QPX, this is handled like a FP register, otherwise, it
5776 // is an Altivec register.
5777 if (Subtarget
.hasQPX()) {
5778 if (++NumFPRsUsed
<= NumFPRs
)
5781 if (++NumVRsUsed
<= NumVRs
)
5787 case MVT::v4f64
: // QPX
5788 case MVT::v4i1
: // QPX
5789 if (++NumFPRsUsed
<= NumFPRs
)
5793 HasParameterArea
= true;
5797 /* Respect alignment of argument on the stack. */
5799 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
5800 NumBytes
= ((NumBytes
+ Align
- 1) / Align
) * Align
;
5802 NumBytes
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
5803 if (Flags
.isInConsecutiveRegsLast())
5804 NumBytes
= ((NumBytes
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
5807 unsigned NumBytesActuallyUsed
= NumBytes
;
5809 // In the old ELFv1 ABI,
5810 // the prolog code of the callee may store up to 8 GPR argument registers to
5811 // the stack, allowing va_start to index over them in memory if its varargs.
5812 // Because we cannot tell if this is needed on the caller side, we have to
5813 // conservatively assume that it is needed. As such, make sure we have at
5814 // least enough stack space for the caller to store the 8 GPRs.
5815 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5816 // really requires memory operands, e.g. a vararg function.
5817 if (HasParameterArea
)
5818 NumBytes
= std::max(NumBytes
, LinkageSize
+ 8 * PtrByteSize
);
5820 NumBytes
= LinkageSize
;
5822 // Tail call needs the stack to be aligned.
5823 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5824 CallConv
== CallingConv::Fast
)
5825 NumBytes
= EnsureStackAlignment(Subtarget
.getFrameLowering(), NumBytes
);
5829 // Calculate by how many bytes the stack has to be adjusted in case of tail
5830 // call optimization.
5832 SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
5834 // To protect arguments on the stack from being clobbered in a tail call,
5835 // force all the loads to happen before doing any other lowering.
5837 Chain
= DAG
.getStackArgumentTokenFactor(Chain
);
5839 // Adjust the stack pointer for the new arguments...
5840 // These operations are automatically eliminated by the prolog/epilog pass
5842 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
5843 SDValue CallSeqStart
= Chain
;
5845 // Load the return address and frame pointer so it can be move somewhere else
5848 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
5850 // Set up a copy of the stack pointer for use loading and storing any
5851 // arguments that may not fit in the registers available for argument
5853 SDValue StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
5855 // Figure out which arguments are going to go in registers, and which in
5856 // memory. Also, if this is a vararg function, floating point operations
5857 // must be stored to our stack, and loaded into integer regs as well, if
5858 // any integer regs are available for argument passing.
5859 unsigned ArgOffset
= LinkageSize
;
5861 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
5862 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
5864 SmallVector
<SDValue
, 8> MemOpChains
;
5865 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5866 SDValue Arg
= OutVals
[i
];
5867 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5868 EVT ArgVT
= Outs
[i
].VT
;
5869 EVT OrigVT
= Outs
[i
].ArgVT
;
5871 // PtrOff will be used to store the current argument to the stack if a
5872 // register cannot be found for it.
5875 // We re-align the argument offset for each argument, except when using the
5876 // fast calling convention, when we need to make sure we do that only when
5877 // we'll actually use a stack slot.
5878 auto ComputePtrOff
= [&]() {
5879 /* Respect alignment of argument on the stack. */
5881 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
5882 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
5884 PtrOff
= DAG
.getConstant(ArgOffset
, dl
, StackPtr
.getValueType());
5886 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
5889 if (CallConv
!= CallingConv::Fast
) {
5892 /* Compute GPR index associated with argument offset. */
5893 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
5894 GPR_idx
= std::min(GPR_idx
, NumGPRs
);
5897 // Promote integers to 64-bit values.
5898 if (Arg
.getValueType() == MVT::i32
|| Arg
.getValueType() == MVT::i1
) {
5899 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5900 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
5901 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
5904 // FIXME memcpy is used way more than necessary. Correctness first.
5905 // Note: "by value" is code for passing a structure by value, not
5907 if (Flags
.isByVal()) {
5908 // Note: Size includes alignment padding, so
5909 // struct x { short a; char b; }
5910 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5911 // These are the proper values we need for right-justifying the
5912 // aggregate in a parameter register.
5913 unsigned Size
= Flags
.getByValSize();
5915 // An empty aggregate parameter takes up no storage and no
5920 if (CallConv
== CallingConv::Fast
)
5923 // All aggregates smaller than 8 bytes must be passed right-justified.
5924 if (Size
==1 || Size
==2 || Size
==4) {
5925 EVT VT
= (Size
==1) ? MVT::i8
: ((Size
==2) ? MVT::i16
: MVT::i32
);
5926 if (GPR_idx
!= NumGPRs
) {
5927 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
5928 MachinePointerInfo(), VT
);
5929 MemOpChains
.push_back(Load
.getValue(1));
5930 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
5932 ArgOffset
+= PtrByteSize
;
5937 if (GPR_idx
== NumGPRs
&& Size
< 8) {
5938 SDValue AddPtr
= PtrOff
;
5939 if (!isLittleEndian
) {
5940 SDValue Const
= DAG
.getConstant(PtrByteSize
- Size
, dl
,
5941 PtrOff
.getValueType());
5942 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
5944 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
5947 ArgOffset
+= PtrByteSize
;
5950 // Copy entire object into memory. There are cases where gcc-generated
5951 // code assumes it is there, even if it could be put entirely into
5952 // registers. (This is not what the doc says.)
5954 // FIXME: The above statement is likely due to a misunderstanding of the
5955 // documents. All arguments must be copied into the parameter area BY
5956 // THE CALLEE in the event that the callee takes the address of any
5957 // formal argument. That has not yet been implemented. However, it is
5958 // reasonable to use the stack area as a staging area for the register
5961 // Skip this for small aggregates, as we will use the same slot for a
5962 // right-justified copy, below.
5964 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, PtrOff
,
5968 // When a register is available, pass a small aggregate right-justified.
5969 if (Size
< 8 && GPR_idx
!= NumGPRs
) {
5970 // The easiest way to get this right-justified in a register
5971 // is to copy the structure into the rightmost portion of a
5972 // local variable slot, then load the whole slot into the
5974 // FIXME: The memcpy seems to produce pretty awful code for
5975 // small aggregates, particularly for packed ones.
5976 // FIXME: It would be preferable to use the slot in the
5977 // parameter save area instead of a new local variable.
5978 SDValue AddPtr
= PtrOff
;
5979 if (!isLittleEndian
) {
5980 SDValue Const
= DAG
.getConstant(8 - Size
, dl
, PtrOff
.getValueType());
5981 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
5983 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
5987 // Load the slot into the register.
5989 DAG
.getLoad(PtrVT
, dl
, Chain
, PtrOff
, MachinePointerInfo());
5990 MemOpChains
.push_back(Load
.getValue(1));
5991 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
5993 // Done with this argument.
5994 ArgOffset
+= PtrByteSize
;
5998 // For aggregates larger than PtrByteSize, copy the pieces of the
5999 // object that fit into registers from the parameter save area.
6000 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
6001 SDValue Const
= DAG
.getConstant(j
, dl
, PtrOff
.getValueType());
6002 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
6003 if (GPR_idx
!= NumGPRs
) {
6005 DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
, MachinePointerInfo());
6006 MemOpChains
.push_back(Load
.getValue(1));
6007 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6008 ArgOffset
+= PtrByteSize
;
6010 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
6017 switch (Arg
.getSimpleValueType().SimpleTy
) {
6018 default: llvm_unreachable("Unexpected ValueType for argument!");
6022 if (Flags
.isNest()) {
6023 // The 'nest' parameter, if any, is passed in R11.
6024 RegsToPass
.push_back(std::make_pair(PPC::X11
, Arg
));
6029 // These can be scalar arguments or elements of an integer array type
6030 // passed directly. Clang may use those instead of "byval" aggregate
6031 // types to avoid forcing arguments to memory unnecessarily.
6032 if (GPR_idx
!= NumGPRs
) {
6033 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6035 if (CallConv
== CallingConv::Fast
)
6038 assert(HasParameterArea
&&
6039 "Parameter area must exist to pass an argument in memory.");
6040 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6041 true, isTailCall
, false, MemOpChains
,
6042 TailCallArguments
, dl
);
6043 if (CallConv
== CallingConv::Fast
)
6044 ArgOffset
+= PtrByteSize
;
6046 if (CallConv
!= CallingConv::Fast
)
6047 ArgOffset
+= PtrByteSize
;
6051 // These can be scalar arguments or elements of a float array type
6052 // passed directly. The latter are used to implement ELFv2 homogenous
6053 // float aggregates.
6055 // Named arguments go into FPRs first, and once they overflow, the
6056 // remaining arguments go into GPRs and then the parameter save area.
6057 // Unnamed arguments for vararg functions always go to GPRs and
6058 // then the parameter save area. For now, put all arguments to vararg
6059 // routines always in both locations (FPR *and* GPR or stack slot).
6060 bool NeedGPROrStack
= isVarArg
|| FPR_idx
== NumFPRs
;
6061 bool NeededLoad
= false;
6063 // First load the argument into the next available FPR.
6064 if (FPR_idx
!= NumFPRs
)
6065 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6067 // Next, load the argument into GPR or stack slot if needed.
6068 if (!NeedGPROrStack
)
6070 else if (GPR_idx
!= NumGPRs
&& CallConv
!= CallingConv::Fast
) {
6071 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6072 // once we support fp <-> gpr moves.
6074 // In the non-vararg case, this can only ever happen in the
6075 // presence of f32 array types, since otherwise we never run
6076 // out of FPRs before running out of GPRs.
6079 // Double values are always passed in a single GPR.
6080 if (Arg
.getValueType() != MVT::f32
) {
6081 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i64
, Arg
);
6083 // Non-array float values are extended and passed in a GPR.
6084 } else if (!Flags
.isInConsecutiveRegs()) {
6085 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6086 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
6088 // If we have an array of floats, we collect every odd element
6089 // together with its predecessor into one GPR.
6090 } else if (ArgOffset
% PtrByteSize
!= 0) {
6092 Lo
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, OutVals
[i
- 1]);
6093 Hi
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6094 if (!isLittleEndian
)
6096 ArgVal
= DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Lo
, Hi
);
6098 // The final element, if even, goes into the first half of a GPR.
6099 } else if (Flags
.isInConsecutiveRegsLast()) {
6100 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6101 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
6102 if (!isLittleEndian
)
6103 ArgVal
= DAG
.getNode(ISD::SHL
, dl
, MVT::i64
, ArgVal
,
6104 DAG
.getConstant(32, dl
, MVT::i32
));
6106 // Non-final even elements are skipped; they will be handled
6107 // together the with subsequent argument on the next go-around.
6111 if (ArgVal
.getNode())
6112 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], ArgVal
));
6114 if (CallConv
== CallingConv::Fast
)
6117 // Single-precision floating-point values are mapped to the
6118 // second (rightmost) word of the stack doubleword.
6119 if (Arg
.getValueType() == MVT::f32
&&
6120 !isLittleEndian
&& !Flags
.isInConsecutiveRegs()) {
6121 SDValue ConstFour
= DAG
.getConstant(4, dl
, PtrOff
.getValueType());
6122 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
6125 assert(HasParameterArea
&&
6126 "Parameter area must exist to pass an argument in memory.");
6127 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6128 true, isTailCall
, false, MemOpChains
,
6129 TailCallArguments
, dl
);
6133 // When passing an array of floats, the array occupies consecutive
6134 // space in the argument area; only round up to the next doubleword
6135 // at the end of the array. Otherwise, each float takes 8 bytes.
6136 if (CallConv
!= CallingConv::Fast
|| NeededLoad
) {
6137 ArgOffset
+= (Arg
.getValueType() == MVT::f32
&&
6138 Flags
.isInConsecutiveRegs()) ? 4 : 8;
6139 if (Flags
.isInConsecutiveRegsLast())
6140 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
6152 if (!Subtarget
.hasQPX()) {
6153 // These can be scalar arguments or elements of a vector array type
6154 // passed directly. The latter are used to implement ELFv2 homogenous
6155 // vector aggregates.
6157 // For a varargs call, named arguments go into VRs or on the stack as
6158 // usual; unnamed arguments always go to the stack or the corresponding
6159 // GPRs when within range. For now, we always put the value in both
6160 // locations (or even all three).
6162 assert(HasParameterArea
&&
6163 "Parameter area must exist if we have a varargs call.");
6164 // We could elide this store in the case where the object fits
6165 // entirely in R registers. Maybe later.
6167 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6168 MemOpChains
.push_back(Store
);
6169 if (VR_idx
!= NumVRs
) {
6171 DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, MachinePointerInfo());
6172 MemOpChains
.push_back(Load
.getValue(1));
6173 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
6176 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
6177 if (GPR_idx
== NumGPRs
)
6179 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6180 DAG
.getConstant(i
, dl
, PtrVT
));
6182 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6183 MemOpChains
.push_back(Load
.getValue(1));
6184 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6189 // Non-varargs Altivec params go into VRs or on the stack.
6190 if (VR_idx
!= NumVRs
) {
6191 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
6193 if (CallConv
== CallingConv::Fast
)
6196 assert(HasParameterArea
&&
6197 "Parameter area must exist to pass an argument in memory.");
6198 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6199 true, isTailCall
, true, MemOpChains
,
6200 TailCallArguments
, dl
);
6201 if (CallConv
== CallingConv::Fast
)
6205 if (CallConv
!= CallingConv::Fast
)
6210 assert(Arg
.getValueType().getSimpleVT().SimpleTy
== MVT::v4f32
&&
6211 "Invalid QPX parameter type");
6216 bool IsF32
= Arg
.getValueType().getSimpleVT().SimpleTy
== MVT::v4f32
;
6218 assert(HasParameterArea
&&
6219 "Parameter area must exist if we have a varargs call.");
6220 // We could elide this store in the case where the object fits
6221 // entirely in R registers. Maybe later.
6223 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6224 MemOpChains
.push_back(Store
);
6225 if (QFPR_idx
!= NumQFPRs
) {
6226 SDValue Load
= DAG
.getLoad(IsF32
? MVT::v4f32
: MVT::v4f64
, dl
, Store
,
6227 PtrOff
, MachinePointerInfo());
6228 MemOpChains
.push_back(Load
.getValue(1));
6229 RegsToPass
.push_back(std::make_pair(QFPR
[QFPR_idx
++], Load
));
6231 ArgOffset
+= (IsF32
? 16 : 32);
6232 for (unsigned i
= 0; i
< (IsF32
? 16U : 32U); i
+= PtrByteSize
) {
6233 if (GPR_idx
== NumGPRs
)
6235 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6236 DAG
.getConstant(i
, dl
, PtrVT
));
6238 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6239 MemOpChains
.push_back(Load
.getValue(1));
6240 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6245 // Non-varargs QPX params go into registers or on the stack.
6246 if (QFPR_idx
!= NumQFPRs
) {
6247 RegsToPass
.push_back(std::make_pair(QFPR
[QFPR_idx
++], Arg
));
6249 if (CallConv
== CallingConv::Fast
)
6252 assert(HasParameterArea
&&
6253 "Parameter area must exist to pass an argument in memory.");
6254 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6255 true, isTailCall
, true, MemOpChains
,
6256 TailCallArguments
, dl
);
6257 if (CallConv
== CallingConv::Fast
)
6258 ArgOffset
+= (IsF32
? 16 : 32);
6261 if (CallConv
!= CallingConv::Fast
)
6262 ArgOffset
+= (IsF32
? 16 : 32);
6268 assert((!HasParameterArea
|| NumBytesActuallyUsed
== ArgOffset
) &&
6269 "mismatch in size of parameter area");
6270 (void)NumBytesActuallyUsed
;
6272 if (!MemOpChains
.empty())
6273 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6275 // Check if this is an indirect call (MTCTR/BCTRL).
6276 // See PrepareCall() for more information about calls through function
6277 // pointers in the 64-bit SVR4 ABI.
6278 if (!isTailCall
&& !isPatchPoint
&&
6279 !isFunctionGlobalAddress(Callee
) &&
6280 !isa
<ExternalSymbolSDNode
>(Callee
)) {
6281 // Load r2 into a virtual register and store it to the TOC save area.
6282 setUsesTOCBasePtr(DAG
);
6283 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, PPC::X2
, MVT::i64
);
6284 // TOC save area offset.
6285 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
6286 SDValue PtrOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
6287 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6288 Chain
= DAG
.getStore(
6289 Val
.getValue(1), dl
, Val
, AddPtr
,
6290 MachinePointerInfo::getStack(DAG
.getMachineFunction(), TOCSaveOffset
));
6291 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6292 // This does not mean the MTCTR instruction must use R12; it's easier
6293 // to model this as an extra parameter, so do that.
6294 if (isELFv2ABI
&& !isPatchPoint
)
6295 RegsToPass
.push_back(std::make_pair((unsigned)PPC::X12
, Callee
));
6298 // Build a sequence of copy-to-reg nodes chained together with token chain
6299 // and flag operands which copy the outgoing args into the appropriate regs.
6301 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6302 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6303 RegsToPass
[i
].second
, InFlag
);
6304 InFlag
= Chain
.getValue(1);
6307 if (isTailCall
&& !IsSibCall
)
6308 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6311 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
, hasNest
,
6312 DAG
, RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
,
6313 SPDiff
, NumBytes
, Ins
, InVals
, CS
);
6316 SDValue
PPCTargetLowering::LowerCall_Darwin(
6317 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
6318 bool isTailCall
, bool isPatchPoint
,
6319 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6320 const SmallVectorImpl
<SDValue
> &OutVals
,
6321 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
6322 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
6323 ImmutableCallSite CS
) const {
6324 unsigned NumOps
= Outs
.size();
6326 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6327 bool isPPC64
= PtrVT
== MVT::i64
;
6328 unsigned PtrByteSize
= isPPC64
? 8 : 4;
6330 MachineFunction
&MF
= DAG
.getMachineFunction();
6332 // Mark this function as potentially containing a function that contains a
6333 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6334 // and restoring the callers stack pointer in this functions epilog. This is
6335 // done because by tail calling the called function might overwrite the value
6336 // in this function's (MF) stack pointer stack slot 0(SP).
6337 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
6338 CallConv
== CallingConv::Fast
)
6339 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
6341 // Count how many bytes are to be pushed on the stack, including the linkage
6342 // area, and parameter passing area. We start with 24/48 bytes, which is
6343 // prereserved space for [SP][CR][LR][3 x unused].
6344 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
6345 unsigned NumBytes
= LinkageSize
;
6347 // Add up all the space actually used.
6348 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6349 // they all go in registers, but we must reserve stack space for them for
6350 // possible use by the caller. In varargs or 64-bit calls, parameters are
6351 // assigned stack space in order, with padding so Altivec parameters are
6353 unsigned nAltivecParamsAtEnd
= 0;
6354 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6355 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6356 EVT ArgVT
= Outs
[i
].VT
;
6357 // Varargs Altivec parameters are padded to a 16 byte boundary.
6358 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
6359 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
6360 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
) {
6361 if (!isVarArg
&& !isPPC64
) {
6362 // Non-varargs Altivec parameters go after all the non-Altivec
6363 // parameters; handle those later so we know how much padding we need.
6364 nAltivecParamsAtEnd
++;
6367 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6368 NumBytes
= ((NumBytes
+15)/16)*16;
6370 NumBytes
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
6373 // Allow for Altivec parameters at the end, if needed.
6374 if (nAltivecParamsAtEnd
) {
6375 NumBytes
= ((NumBytes
+15)/16)*16;
6376 NumBytes
+= 16*nAltivecParamsAtEnd
;
6379 // The prolog code of the callee may store up to 8 GPR argument registers to
6380 // the stack, allowing va_start to index over them in memory if its varargs.
6381 // Because we cannot tell if this is needed on the caller side, we have to
6382 // conservatively assume that it is needed. As such, make sure we have at
6383 // least enough stack space for the caller to store the 8 GPRs.
6384 NumBytes
= std::max(NumBytes
, LinkageSize
+ 8 * PtrByteSize
);
6386 // Tail call needs the stack to be aligned.
6387 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
6388 CallConv
== CallingConv::Fast
)
6389 NumBytes
= EnsureStackAlignment(Subtarget
.getFrameLowering(), NumBytes
);
6391 // Calculate by how many bytes the stack has to be adjusted in case of tail
6392 // call optimization.
6393 int SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
6395 // To protect arguments on the stack from being clobbered in a tail call,
6396 // force all the loads to happen before doing any other lowering.
6398 Chain
= DAG
.getStackArgumentTokenFactor(Chain
);
6400 // Adjust the stack pointer for the new arguments...
6401 // These operations are automatically eliminated by the prolog/epilog pass
6402 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
6403 SDValue CallSeqStart
= Chain
;
6405 // Load the return address and frame pointer so it can be move somewhere else
6408 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
6410 // Set up a copy of the stack pointer for use loading and storing any
6411 // arguments that may not fit in the registers available for argument
6415 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
6417 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
6419 // Figure out which arguments are going to go in registers, and which in
6420 // memory. Also, if this is a vararg function, floating point operations
6421 // must be stored to our stack, and loaded into integer regs as well, if
6422 // any integer regs are available for argument passing.
6423 unsigned ArgOffset
= LinkageSize
;
6424 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
6426 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
6427 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
6428 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
6430 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
6431 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
6432 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
6434 static const MCPhysReg VR
[] = {
6435 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
6436 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
6438 const unsigned NumGPRs
= array_lengthof(GPR_32
);
6439 const unsigned NumFPRs
= 13;
6440 const unsigned NumVRs
= array_lengthof(VR
);
6442 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
6444 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
6445 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
6447 SmallVector
<SDValue
, 8> MemOpChains
;
6448 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6449 SDValue Arg
= OutVals
[i
];
6450 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6452 // PtrOff will be used to store the current argument to the stack if a
6453 // register cannot be found for it.
6456 PtrOff
= DAG
.getConstant(ArgOffset
, dl
, StackPtr
.getValueType());
6458 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6460 // On PPC64, promote integers to 64-bit values.
6461 if (isPPC64
&& Arg
.getValueType() == MVT::i32
) {
6462 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6463 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
6464 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
6467 // FIXME memcpy is used way more than necessary. Correctness first.
6468 // Note: "by value" is code for passing a structure by value, not
6470 if (Flags
.isByVal()) {
6471 unsigned Size
= Flags
.getByValSize();
6472 // Very small objects are passed right-justified. Everything else is
6473 // passed left-justified.
6474 if (Size
==1 || Size
==2) {
6475 EVT VT
= (Size
==1) ? MVT::i8
: MVT::i16
;
6476 if (GPR_idx
!= NumGPRs
) {
6477 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
6478 MachinePointerInfo(), VT
);
6479 MemOpChains
.push_back(Load
.getValue(1));
6480 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6482 ArgOffset
+= PtrByteSize
;
6484 SDValue Const
= DAG
.getConstant(PtrByteSize
- Size
, dl
,
6485 PtrOff
.getValueType());
6486 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
6487 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
6490 ArgOffset
+= PtrByteSize
;
6494 // Copy entire object into memory. There are cases where gcc-generated
6495 // code assumes it is there, even if it could be put entirely into
6496 // registers. (This is not what the doc says.)
6497 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, PtrOff
,
6501 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6502 // copy the pieces of the object that fit into registers from the
6503 // parameter save area.
6504 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
6505 SDValue Const
= DAG
.getConstant(j
, dl
, PtrOff
.getValueType());
6506 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
6507 if (GPR_idx
!= NumGPRs
) {
6509 DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
, MachinePointerInfo());
6510 MemOpChains
.push_back(Load
.getValue(1));
6511 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6512 ArgOffset
+= PtrByteSize
;
6514 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
6521 switch (Arg
.getSimpleValueType().SimpleTy
) {
6522 default: llvm_unreachable("Unexpected ValueType for argument!");
6526 if (GPR_idx
!= NumGPRs
) {
6527 if (Arg
.getValueType() == MVT::i1
)
6528 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, PtrVT
, Arg
);
6530 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6532 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6533 isPPC64
, isTailCall
, false, MemOpChains
,
6534 TailCallArguments
, dl
);
6536 ArgOffset
+= PtrByteSize
;
6540 if (FPR_idx
!= NumFPRs
) {
6541 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6545 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6546 MemOpChains
.push_back(Store
);
6548 // Float varargs are always shadowed in available integer registers
6549 if (GPR_idx
!= NumGPRs
) {
6551 DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, MachinePointerInfo());
6552 MemOpChains
.push_back(Load
.getValue(1));
6553 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6555 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&& !isPPC64
){
6556 SDValue ConstFour
= DAG
.getConstant(4, dl
, PtrOff
.getValueType());
6557 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
6559 DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, MachinePointerInfo());
6560 MemOpChains
.push_back(Load
.getValue(1));
6561 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6564 // If we have any FPRs remaining, we may also have GPRs remaining.
6565 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6567 if (GPR_idx
!= NumGPRs
)
6569 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&&
6570 !isPPC64
) // PPC64 has 64-bit GPR's obviously :)
6574 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6575 isPPC64
, isTailCall
, false, MemOpChains
,
6576 TailCallArguments
, dl
);
6580 ArgOffset
+= Arg
.getValueType() == MVT::f32
? 4 : 8;
6587 // These go aligned on the stack, or in the corresponding R registers
6588 // when within range. The Darwin PPC ABI doc claims they also go in
6589 // V registers; in fact gcc does this only for arguments that are
6590 // prototyped, not for those that match the ... We do it for all
6591 // arguments, seems to work.
6592 while (ArgOffset
% 16 !=0) {
6593 ArgOffset
+= PtrByteSize
;
6594 if (GPR_idx
!= NumGPRs
)
6597 // We could elide this store in the case where the object fits
6598 // entirely in R registers. Maybe later.
6599 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
6600 DAG
.getConstant(ArgOffset
, dl
, PtrVT
));
6602 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6603 MemOpChains
.push_back(Store
);
6604 if (VR_idx
!= NumVRs
) {
6606 DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, MachinePointerInfo());
6607 MemOpChains
.push_back(Load
.getValue(1));
6608 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
6611 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
6612 if (GPR_idx
== NumGPRs
)
6614 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6615 DAG
.getConstant(i
, dl
, PtrVT
));
6617 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6618 MemOpChains
.push_back(Load
.getValue(1));
6619 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6624 // Non-varargs Altivec params generally go in registers, but have
6625 // stack space allocated at the end.
6626 if (VR_idx
!= NumVRs
) {
6627 // Doesn't have GPR space allocated.
6628 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
6629 } else if (nAltivecParamsAtEnd
==0) {
6630 // We are emitting Altivec params in order.
6631 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6632 isPPC64
, isTailCall
, true, MemOpChains
,
6633 TailCallArguments
, dl
);
6639 // If all Altivec parameters fit in registers, as they usually do,
6640 // they get stack space following the non-Altivec parameters. We
6641 // don't track this here because nobody below needs it.
6642 // If there are more Altivec parameters than fit in registers emit
6644 if (!isVarArg
&& nAltivecParamsAtEnd
> NumVRs
) {
6646 // Offset is aligned; skip 1st 12 params which go in V registers.
6647 ArgOffset
= ((ArgOffset
+15)/16)*16;
6649 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6650 SDValue Arg
= OutVals
[i
];
6651 EVT ArgType
= Outs
[i
].VT
;
6652 if (ArgType
==MVT::v4f32
|| ArgType
==MVT::v4i32
||
6653 ArgType
==MVT::v8i16
|| ArgType
==MVT::v16i8
) {
6656 // We are emitting Altivec params in order.
6657 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6658 isPPC64
, isTailCall
, true, MemOpChains
,
6659 TailCallArguments
, dl
);
6666 if (!MemOpChains
.empty())
6667 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6669 // On Darwin, R12 must contain the address of an indirect callee. This does
6670 // not mean the MTCTR instruction must use R12; it's easier to model this as
6671 // an extra parameter, so do that.
6673 !isFunctionGlobalAddress(Callee
) &&
6674 !isa
<ExternalSymbolSDNode
>(Callee
) &&
6675 !isBLACompatibleAddress(Callee
, DAG
))
6676 RegsToPass
.push_back(std::make_pair((unsigned)(isPPC64
? PPC::X12
:
6677 PPC::R12
), Callee
));
6679 // Build a sequence of copy-to-reg nodes chained together with token chain
6680 // and flag operands which copy the outgoing args into the appropriate regs.
6682 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6683 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6684 RegsToPass
[i
].second
, InFlag
);
6685 InFlag
= Chain
.getValue(1);
6689 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6692 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
6693 /* unused except on PPC64 ELFv1 */ false, DAG
,
6694 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
6695 NumBytes
, Ins
, InVals
, CS
);
6699 SDValue
PPCTargetLowering::LowerCall_AIX(
6700 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
6701 bool isTailCall
, bool isPatchPoint
,
6702 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6703 const SmallVectorImpl
<SDValue
> &OutVals
,
6704 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
6705 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
6706 ImmutableCallSite CS
) const {
6708 assert((CallConv
== CallingConv::C
|| CallConv
== CallingConv::Fast
) &&
6709 "Unimplemented calling convention!");
6710 if (isVarArg
|| isPatchPoint
)
6711 report_fatal_error("This call type is unimplemented on AIX.");
6713 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6714 bool isPPC64
= PtrVT
== MVT::i64
;
6715 unsigned PtrByteSize
= isPPC64
? 8 : 4;
6716 unsigned NumOps
= Outs
.size();
6719 // Count how many bytes are to be pushed on the stack, including the linkage
6720 // area, parameter list area.
6721 // On XCOFF, we start with 24/48, which is reserved space for
6722 // [SP][CR][LR][2 x reserved][TOC].
6723 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
6725 // The prolog code of the callee may store up to 8 GPR argument registers to
6726 // the stack, allowing va_start to index over them in memory if the callee
6728 // Because we cannot tell if this is needed on the caller side, we have to
6729 // conservatively assume that it is needed. As such, make sure we have at
6730 // least enough stack space for the caller to store the 8 GPRs.
6731 unsigned NumBytes
= LinkageSize
+ 8 * PtrByteSize
;
6733 // Adjust the stack pointer for the new arguments...
6734 // These operations are automatically eliminated by the prolog/epilog
6736 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
6737 SDValue CallSeqStart
= Chain
;
6739 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
6740 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
6741 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
6743 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
6744 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
6745 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
6748 const unsigned NumGPRs
= isPPC64
? array_lengthof(GPR_64
)
6749 : array_lengthof(GPR_32
);
6750 const unsigned NumFPRs
= array_lengthof(FPR
);
6751 assert(NumFPRs
== 13 && "Only FPR 1-13 could be used for parameter passing "
6754 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
6755 unsigned GPR_idx
= 0, FPR_idx
= 0;
6757 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
6760 report_fatal_error("Handling of tail call is unimplemented!");
6763 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6764 SDValue Arg
= OutVals
[i
];
6765 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6767 // Promote integers if needed.
6768 if (Arg
.getValueType() == MVT::i1
||
6769 (isPPC64
&& Arg
.getValueType() == MVT::i32
)) {
6770 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
6771 Arg
= DAG
.getNode(ExtOp
, dl
, PtrVT
, Arg
);
6774 // Note: "by value" is code for passing a structure by value, not
6776 if (Flags
.isByVal())
6777 report_fatal_error("Passing structure by value is unimplemented!");
6779 switch (Arg
.getSimpleValueType().SimpleTy
) {
6780 default: llvm_unreachable("Unexpected ValueType for argument!");
6784 if (GPR_idx
!= NumGPRs
)
6785 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6787 report_fatal_error("Handling of placing parameters on the stack is "
6792 if (FPR_idx
!= NumFPRs
) {
6793 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6795 // If we have any FPRs remaining, we may also have GPRs remaining.
6796 // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
6798 if (GPR_idx
!= NumGPRs
)
6800 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&& !isPPC64
)
6803 report_fatal_error("Handling of placing parameters on the stack is "
6816 report_fatal_error("Handling of this parameter type is unimplemented!");
6820 if (!isFunctionGlobalAddress(Callee
) &&
6821 !isa
<ExternalSymbolSDNode
>(Callee
))
6822 report_fatal_error("Handling of indirect call is unimplemented!");
6824 // Build a sequence of copy-to-reg nodes chained together with token chain
6825 // and flag operands which copy the outgoing args into the appropriate regs.
6827 for (auto Reg
: RegsToPass
) {
6828 Chain
= DAG
.getCopyToReg(Chain
, dl
, Reg
.first
, Reg
.second
, InFlag
);
6829 InFlag
= Chain
.getValue(1);
6832 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
6833 /* unused except on PPC64 ELFv1 */ false, DAG
,
6834 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
6835 NumBytes
, Ins
, InVals
, CS
);
6839 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv
,
6840 MachineFunction
&MF
, bool isVarArg
,
6841 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6842 LLVMContext
&Context
) const {
6843 SmallVector
<CCValAssign
, 16> RVLocs
;
6844 CCState
CCInfo(CallConv
, isVarArg
, MF
, RVLocs
, Context
);
6845 return CCInfo
.CheckReturn(
6846 Outs
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
6852 PPCTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
6854 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6855 const SmallVectorImpl
<SDValue
> &OutVals
,
6856 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
6857 SmallVector
<CCValAssign
, 16> RVLocs
;
6858 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
6860 CCInfo
.AnalyzeReturn(Outs
,
6861 (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
6866 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
6868 // Copy the result values into the output registers.
6869 for (unsigned i
= 0, RealResIdx
= 0; i
!= RVLocs
.size(); ++i
, ++RealResIdx
) {
6870 CCValAssign
&VA
= RVLocs
[i
];
6871 assert(VA
.isRegLoc() && "Can only return in registers!");
6873 SDValue Arg
= OutVals
[RealResIdx
];
6875 switch (VA
.getLocInfo()) {
6876 default: llvm_unreachable("Unknown loc info!");
6877 case CCValAssign::Full
: break;
6878 case CCValAssign::AExt
:
6879 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6881 case CCValAssign::ZExt
:
6882 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6884 case CCValAssign::SExt
:
6885 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6888 if (Subtarget
.hasSPE() && VA
.getLocVT() == MVT::f64
) {
6889 bool isLittleEndian
= Subtarget
.isLittleEndian();
6890 // Legalize ret f64 -> ret 2 x i32.
6892 DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
6893 DAG
.getIntPtrConstant(isLittleEndian
? 0 : 1, dl
));
6894 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), SVal
, Flag
);
6895 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
6896 SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
6897 DAG
.getIntPtrConstant(isLittleEndian
? 1 : 0, dl
));
6898 Flag
= Chain
.getValue(1);
6899 VA
= RVLocs
[++i
]; // skip ahead to next loc
6900 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), SVal
, Flag
);
6902 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Arg
, Flag
);
6903 Flag
= Chain
.getValue(1);
6904 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
6907 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
6908 const MCPhysReg
*I
=
6909 TRI
->getCalleeSavedRegsViaCopy(&DAG
.getMachineFunction());
6913 if (PPC::G8RCRegClass
.contains(*I
))
6914 RetOps
.push_back(DAG
.getRegister(*I
, MVT::i64
));
6915 else if (PPC::F8RCRegClass
.contains(*I
))
6916 RetOps
.push_back(DAG
.getRegister(*I
, MVT::getFloatingPointVT(64)));
6917 else if (PPC::CRRCRegClass
.contains(*I
))
6918 RetOps
.push_back(DAG
.getRegister(*I
, MVT::i1
));
6919 else if (PPC::VRRCRegClass
.contains(*I
))
6920 RetOps
.push_back(DAG
.getRegister(*I
, MVT::Other
));
6922 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6926 RetOps
[0] = Chain
; // Update chain.
6928 // Add the flag if we have it.
6930 RetOps
.push_back(Flag
);
6932 return DAG
.getNode(PPCISD::RET_FLAG
, dl
, MVT::Other
, RetOps
);
6936 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op
,
6937 SelectionDAG
&DAG
) const {
6940 // Get the correct type for integers.
6941 EVT IntVT
= Op
.getValueType();
6944 SDValue Chain
= Op
.getOperand(0);
6945 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
6946 // Build a DYNAREAOFFSET node.
6947 SDValue Ops
[2] = {Chain
, FPSIdx
};
6948 SDVTList VTs
= DAG
.getVTList(IntVT
);
6949 return DAG
.getNode(PPCISD::DYNAREAOFFSET
, dl
, VTs
, Ops
);
6952 SDValue
PPCTargetLowering::LowerSTACKRESTORE(SDValue Op
,
6953 SelectionDAG
&DAG
) const {
6954 // When we pop the dynamic allocation we need to restore the SP link.
6957 // Get the correct type for pointers.
6958 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6960 // Construct the stack pointer operand.
6961 bool isPPC64
= Subtarget
.isPPC64();
6962 unsigned SP
= isPPC64
? PPC::X1
: PPC::R1
;
6963 SDValue StackPtr
= DAG
.getRegister(SP
, PtrVT
);
6965 // Get the operands for the STACKRESTORE.
6966 SDValue Chain
= Op
.getOperand(0);
6967 SDValue SaveSP
= Op
.getOperand(1);
6969 // Load the old link SP.
6970 SDValue LoadLinkSP
=
6971 DAG
.getLoad(PtrVT
, dl
, Chain
, StackPtr
, MachinePointerInfo());
6973 // Restore the stack pointer.
6974 Chain
= DAG
.getCopyToReg(LoadLinkSP
.getValue(1), dl
, SP
, SaveSP
);
6976 // Store the old link SP.
6977 return DAG
.getStore(Chain
, dl
, LoadLinkSP
, StackPtr
, MachinePointerInfo());
6980 SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG
&DAG
) const {
6981 MachineFunction
&MF
= DAG
.getMachineFunction();
6982 bool isPPC64
= Subtarget
.isPPC64();
6983 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
6985 // Get current frame pointer save index. The users of this index will be
6986 // primarily DYNALLOC instructions.
6987 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
6988 int RASI
= FI
->getReturnAddrSaveIndex();
6990 // If the frame pointer save index hasn't been defined yet.
6992 // Find out what the fix offset of the frame pointer save area.
6993 int LROffset
= Subtarget
.getFrameLowering()->getReturnSaveOffset();
6994 // Allocate the frame index for frame pointer save area.
6995 RASI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, LROffset
, false);
6997 FI
->setReturnAddrSaveIndex(RASI
);
6999 return DAG
.getFrameIndex(RASI
, PtrVT
);
7003 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG
& DAG
) const {
7004 MachineFunction
&MF
= DAG
.getMachineFunction();
7005 bool isPPC64
= Subtarget
.isPPC64();
7006 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7008 // Get current frame pointer save index. The users of this index will be
7009 // primarily DYNALLOC instructions.
7010 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
7011 int FPSI
= FI
->getFramePointerSaveIndex();
7013 // If the frame pointer save index hasn't been defined yet.
7015 // Find out what the fix offset of the frame pointer save area.
7016 int FPOffset
= Subtarget
.getFrameLowering()->getFramePointerSaveOffset();
7017 // Allocate the frame index for frame pointer save area.
7018 FPSI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, FPOffset
, true);
7020 FI
->setFramePointerSaveIndex(FPSI
);
7022 return DAG
.getFrameIndex(FPSI
, PtrVT
);
7025 SDValue
PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
7026 SelectionDAG
&DAG
) const {
7028 SDValue Chain
= Op
.getOperand(0);
7029 SDValue Size
= Op
.getOperand(1);
7032 // Get the correct type for pointers.
7033 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7035 SDValue NegSize
= DAG
.getNode(ISD::SUB
, dl
, PtrVT
,
7036 DAG
.getConstant(0, dl
, PtrVT
), Size
);
7037 // Construct a node for the frame pointer save index.
7038 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
7039 // Build a DYNALLOC node.
7040 SDValue Ops
[3] = { Chain
, NegSize
, FPSIdx
};
7041 SDVTList VTs
= DAG
.getVTList(PtrVT
, MVT::Other
);
7042 return DAG
.getNode(PPCISD::DYNALLOC
, dl
, VTs
, Ops
);
7045 SDValue
PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op
,
7046 SelectionDAG
&DAG
) const {
7047 MachineFunction
&MF
= DAG
.getMachineFunction();
7049 bool isPPC64
= Subtarget
.isPPC64();
7050 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7052 int FI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, 0, false);
7053 return DAG
.getFrameIndex(FI
, PtrVT
);
7056 SDValue
PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op
,
7057 SelectionDAG
&DAG
) const {
7059 return DAG
.getNode(PPCISD::EH_SJLJ_SETJMP
, DL
,
7060 DAG
.getVTList(MVT::i32
, MVT::Other
),
7061 Op
.getOperand(0), Op
.getOperand(1));
7064 SDValue
PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op
,
7065 SelectionDAG
&DAG
) const {
7067 return DAG
.getNode(PPCISD::EH_SJLJ_LONGJMP
, DL
, MVT::Other
,
7068 Op
.getOperand(0), Op
.getOperand(1));
7071 SDValue
PPCTargetLowering::LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
7072 if (Op
.getValueType().isVector())
7073 return LowerVectorLoad(Op
, DAG
);
7075 assert(Op
.getValueType() == MVT::i1
&&
7076 "Custom lowering only for i1 loads");
7078 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7081 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op
);
7083 SDValue Chain
= LD
->getChain();
7084 SDValue BasePtr
= LD
->getBasePtr();
7085 MachineMemOperand
*MMO
= LD
->getMemOperand();
7088 DAG
.getExtLoad(ISD::EXTLOAD
, dl
, getPointerTy(DAG
.getDataLayout()), Chain
,
7089 BasePtr
, MVT::i8
, MMO
);
7090 SDValue Result
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewLD
);
7092 SDValue Ops
[] = { Result
, SDValue(NewLD
.getNode(), 1) };
7093 return DAG
.getMergeValues(Ops
, dl
);
7096 SDValue
PPCTargetLowering::LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
7097 if (Op
.getOperand(1).getValueType().isVector())
7098 return LowerVectorStore(Op
, DAG
);
7100 assert(Op
.getOperand(1).getValueType() == MVT::i1
&&
7101 "Custom lowering only for i1 stores");
7103 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7106 StoreSDNode
*ST
= cast
<StoreSDNode
>(Op
);
7108 SDValue Chain
= ST
->getChain();
7109 SDValue BasePtr
= ST
->getBasePtr();
7110 SDValue Value
= ST
->getValue();
7111 MachineMemOperand
*MMO
= ST
->getMemOperand();
7113 Value
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, getPointerTy(DAG
.getDataLayout()),
7115 return DAG
.getTruncStore(Chain
, dl
, Value
, BasePtr
, MVT::i8
, MMO
);
7118 // FIXME: Remove this once the ANDI glue bug is fixed:
7119 SDValue
PPCTargetLowering::LowerTRUNCATE(SDValue Op
, SelectionDAG
&DAG
) const {
7120 assert(Op
.getValueType() == MVT::i1
&&
7121 "Custom lowering only for i1 results");
7124 return DAG
.getNode(PPCISD::ANDIo_1_GT_BIT
, DL
, MVT::i1
,
7128 SDValue
PPCTargetLowering::LowerTRUNCATEVector(SDValue Op
,
7129 SelectionDAG
&DAG
) const {
7131 // Implements a vector truncate that fits in a vector register as a shuffle.
7132 // We want to legalize vector truncates down to where the source fits in
7133 // a vector register (and target is therefore smaller than vector register
7134 // size). At that point legalization will try to custom lower the sub-legal
7135 // result and get here - where we can contain the truncate as a single target
7138 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7139 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7141 // We will implement it for big-endian ordering as this (where x denotes
7143 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7144 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7146 // The same operation in little-endian ordering will be:
7147 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7148 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7150 assert(Op
.getValueType().isVector() && "Vector type expected.");
7153 SDValue N1
= Op
.getOperand(0);
7154 unsigned SrcSize
= N1
.getValueType().getSizeInBits();
7155 assert(SrcSize
<= 128 && "Source must fit in an Altivec/VSX vector");
7156 SDValue WideSrc
= SrcSize
== 128 ? N1
: widenVec(DAG
, N1
, DL
);
7158 EVT TrgVT
= Op
.getValueType();
7159 unsigned TrgNumElts
= TrgVT
.getVectorNumElements();
7160 EVT EltVT
= TrgVT
.getVectorElementType();
7161 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
7162 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
7164 // First list the elements we want to keep.
7165 unsigned SizeMult
= SrcSize
/ TrgVT
.getSizeInBits();
7166 SmallVector
<int, 16> ShuffV
;
7167 if (Subtarget
.isLittleEndian())
7168 for (unsigned i
= 0; i
< TrgNumElts
; ++i
)
7169 ShuffV
.push_back(i
* SizeMult
);
7171 for (unsigned i
= 1; i
<= TrgNumElts
; ++i
)
7172 ShuffV
.push_back(i
* SizeMult
- 1);
7174 // Populate the remaining elements with undefs.
7175 for (unsigned i
= TrgNumElts
; i
< WideNumElts
; ++i
)
7176 // ShuffV.push_back(i + WideNumElts);
7177 ShuffV
.push_back(WideNumElts
+ 1);
7179 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, DL
, WideVT
, WideSrc
);
7180 return DAG
.getVectorShuffle(WideVT
, DL
, Conv
, DAG
.getUNDEF(WideVT
), ShuffV
);
7183 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7185 SDValue
PPCTargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const {
7186 // Not FP? Not a fsel.
7187 if (!Op
.getOperand(0).getValueType().isFloatingPoint() ||
7188 !Op
.getOperand(2).getValueType().isFloatingPoint())
7191 // We might be able to do better than this under some circumstances, but in
7192 // general, fsel-based lowering of select is a finite-math-only optimization.
7193 // For more information, see section F.3 of the 2.06 ISA specification.
7194 if (!DAG
.getTarget().Options
.NoInfsFPMath
||
7195 !DAG
.getTarget().Options
.NoNaNsFPMath
)
7197 // TODO: Propagate flags from the select rather than global settings.
7199 Flags
.setNoInfs(true);
7200 Flags
.setNoNaNs(true);
7202 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
7204 EVT ResVT
= Op
.getValueType();
7205 EVT CmpVT
= Op
.getOperand(0).getValueType();
7206 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
7207 SDValue TV
= Op
.getOperand(2), FV
= Op
.getOperand(3);
7210 // If the RHS of the comparison is a 0.0, we don't need to do the
7211 // subtraction at all.
7213 if (isFloatingPointZero(RHS
))
7215 default: break; // SETUO etc aren't handled by fsel.
7220 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7221 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
7222 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
7223 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7224 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
7225 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
7226 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), Sel1
, FV
);
7229 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
7233 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7234 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
7235 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
7238 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
7242 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7243 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
7244 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
7245 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), TV
, FV
);
7250 default: break; // SETUO etc aren't handled by fsel.
7255 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
7256 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7257 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7258 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
7259 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7260 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
7261 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
7262 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, Cmp
), Sel1
, FV
);
7265 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
7266 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7267 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7268 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
7271 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
7272 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7273 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7274 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
7277 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
7278 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7279 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7280 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
7283 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
7284 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7285 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7286 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
7291 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op
, ReuseLoadInfo
&RLI
,
7293 const SDLoc
&dl
) const {
7294 assert(Op
.getOperand(0).getValueType().isFloatingPoint());
7295 SDValue Src
= Op
.getOperand(0);
7296 if (Src
.getValueType() == MVT::f32
)
7297 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
7300 switch (Op
.getSimpleValueType().SimpleTy
) {
7301 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7304 Op
.getOpcode() == ISD::FP_TO_SINT
7306 : (Subtarget
.hasFPCVT() ? PPCISD::FCTIWUZ
: PPCISD::FCTIDZ
),
7310 assert((Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT()) &&
7311 "i64 FP_TO_UINT is supported only with FPCVT");
7312 Tmp
= DAG
.getNode(Op
.getOpcode()==ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
7318 // Convert the FP value to an int value through memory.
7319 bool i32Stack
= Op
.getValueType() == MVT::i32
&& Subtarget
.hasSTFIWX() &&
7320 (Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT());
7321 SDValue FIPtr
= DAG
.CreateStackTemporary(i32Stack
? MVT::i32
: MVT::f64
);
7322 int FI
= cast
<FrameIndexSDNode
>(FIPtr
)->getIndex();
7323 MachinePointerInfo MPI
=
7324 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
);
7326 // Emit a store to the stack slot.
7329 MachineFunction
&MF
= DAG
.getMachineFunction();
7330 MachineMemOperand
*MMO
=
7331 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOStore
, 4, 4);
7332 SDValue Ops
[] = { DAG
.getEntryNode(), Tmp
, FIPtr
};
7333 Chain
= DAG
.getMemIntrinsicNode(PPCISD::STFIWX
, dl
,
7334 DAG
.getVTList(MVT::Other
), Ops
, MVT::i32
, MMO
);
7336 Chain
= DAG
.getStore(DAG
.getEntryNode(), dl
, Tmp
, FIPtr
, MPI
);
7338 // Result is a load from the stack slot. If loading 4 bytes, make sure to
7339 // add in a bias on big endian.
7340 if (Op
.getValueType() == MVT::i32
&& !i32Stack
) {
7341 FIPtr
= DAG
.getNode(ISD::ADD
, dl
, FIPtr
.getValueType(), FIPtr
,
7342 DAG
.getConstant(4, dl
, FIPtr
.getValueType()));
7343 MPI
= MPI
.getWithOffset(Subtarget
.isLittleEndian() ? 0 : 4);
7351 /// Custom lowers floating point to integer conversions to use
7352 /// the direct move instructions available in ISA 2.07 to avoid the
7353 /// need for load/store combinations.
7354 SDValue
PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op
,
7356 const SDLoc
&dl
) const {
7357 assert(Op
.getOperand(0).getValueType().isFloatingPoint());
7358 SDValue Src
= Op
.getOperand(0);
7360 if (Src
.getValueType() == MVT::f32
)
7361 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
7364 switch (Op
.getSimpleValueType().SimpleTy
) {
7365 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7368 Op
.getOpcode() == ISD::FP_TO_SINT
7370 : (Subtarget
.hasFPCVT() ? PPCISD::FCTIWUZ
: PPCISD::FCTIDZ
),
7372 Tmp
= DAG
.getNode(PPCISD::MFVSR
, dl
, MVT::i32
, Tmp
);
7375 assert((Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT()) &&
7376 "i64 FP_TO_UINT is supported only with FPCVT");
7377 Tmp
= DAG
.getNode(Op
.getOpcode()==ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
7380 Tmp
= DAG
.getNode(PPCISD::MFVSR
, dl
, MVT::i64
, Tmp
);
7386 SDValue
PPCTargetLowering::LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
,
7387 const SDLoc
&dl
) const {
7389 // FP to INT conversions are legal for f128.
7390 if (EnableQuadPrecision
&& (Op
->getOperand(0).getValueType() == MVT::f128
))
7393 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7394 // PPC (the libcall is not available).
7395 if (Op
.getOperand(0).getValueType() == MVT::ppcf128
) {
7396 if (Op
.getValueType() == MVT::i32
) {
7397 if (Op
.getOpcode() == ISD::FP_TO_SINT
) {
7398 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
7399 MVT::f64
, Op
.getOperand(0),
7400 DAG
.getIntPtrConstant(0, dl
));
7401 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
7402 MVT::f64
, Op
.getOperand(0),
7403 DAG
.getIntPtrConstant(1, dl
));
7405 // Add the two halves of the long double in round-to-zero mode.
7406 SDValue Res
= DAG
.getNode(PPCISD::FADDRTZ
, dl
, MVT::f64
, Lo
, Hi
);
7408 // Now use a smaller FP_TO_SINT.
7409 return DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, Res
);
7411 if (Op
.getOpcode() == ISD::FP_TO_UINT
) {
7412 const uint64_t TwoE31
[] = {0x41e0000000000000LL
, 0};
7413 APFloat APF
= APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31
));
7414 SDValue Tmp
= DAG
.getConstantFP(APF
, dl
, MVT::ppcf128
);
7415 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7416 // FIXME: generated code sucks.
7417 // TODO: Are there fast-math-flags to propagate to this FSUB?
7418 SDValue True
= DAG
.getNode(ISD::FSUB
, dl
, MVT::ppcf128
,
7419 Op
.getOperand(0), Tmp
);
7420 True
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, True
);
7421 True
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, True
,
7422 DAG
.getConstant(0x80000000, dl
, MVT::i32
));
7423 SDValue False
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
,
7425 return DAG
.getSelectCC(dl
, Op
.getOperand(0), Tmp
, True
, False
,
7433 if (Subtarget
.hasDirectMove() && Subtarget
.isPPC64())
7434 return LowerFP_TO_INTDirectMove(Op
, DAG
, dl
);
7437 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
7439 return DAG
.getLoad(Op
.getValueType(), dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
7440 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
7443 // We're trying to insert a regular store, S, and then a load, L. If the
7444 // incoming value, O, is a load, we might just be able to have our load use the
7445 // address used by O. However, we don't know if anything else will store to
7446 // that address before we can load from it. To prevent this situation, we need
7447 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7448 // the same chain operand as O, we create a token factor from the chain results
7449 // of O and L, and we replace all uses of O's chain result with that token
7450 // factor (see spliceIntoChain below for this last part).
7451 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op
, EVT MemVT
,
7454 ISD::LoadExtType ET
) const {
7456 if (ET
== ISD::NON_EXTLOAD
&&
7457 (Op
.getOpcode() == ISD::FP_TO_UINT
||
7458 Op
.getOpcode() == ISD::FP_TO_SINT
) &&
7459 isOperationLegalOrCustom(Op
.getOpcode(),
7460 Op
.getOperand(0).getValueType())) {
7462 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
7466 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
);
7467 if (!LD
|| LD
->getExtensionType() != ET
|| LD
->isVolatile() ||
7468 LD
->isNonTemporal())
7470 if (LD
->getMemoryVT() != MemVT
)
7473 RLI
.Ptr
= LD
->getBasePtr();
7474 if (LD
->isIndexed() && !LD
->getOffset().isUndef()) {
7475 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
7476 "Non-pre-inc AM on PPC?");
7477 RLI
.Ptr
= DAG
.getNode(ISD::ADD
, dl
, RLI
.Ptr
.getValueType(), RLI
.Ptr
,
7481 RLI
.Chain
= LD
->getChain();
7482 RLI
.MPI
= LD
->getPointerInfo();
7483 RLI
.IsDereferenceable
= LD
->isDereferenceable();
7484 RLI
.IsInvariant
= LD
->isInvariant();
7485 RLI
.Alignment
= LD
->getAlignment();
7486 RLI
.AAInfo
= LD
->getAAInfo();
7487 RLI
.Ranges
= LD
->getRanges();
7489 RLI
.ResChain
= SDValue(LD
, LD
->isIndexed() ? 2 : 1);
7493 // Given the head of the old chain, ResChain, insert a token factor containing
7494 // it and NewResChain, and make users of ResChain now be users of that token
7496 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7497 void PPCTargetLowering::spliceIntoChain(SDValue ResChain
,
7498 SDValue NewResChain
,
7499 SelectionDAG
&DAG
) const {
7503 SDLoc
dl(NewResChain
);
7505 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
7506 NewResChain
, DAG
.getUNDEF(MVT::Other
));
7507 assert(TF
.getNode() != NewResChain
.getNode() &&
7508 "A new TF really is required here");
7510 DAG
.ReplaceAllUsesOfValueWith(ResChain
, TF
);
7511 DAG
.UpdateNodeOperands(TF
.getNode(), ResChain
, NewResChain
);
7514 /// Analyze profitability of direct move
7515 /// prefer float load to int load plus direct move
7516 /// when there is no integer use of int load
7517 bool PPCTargetLowering::directMoveIsProfitable(const SDValue
&Op
) const {
7518 SDNode
*Origin
= Op
.getOperand(0).getNode();
7519 if (Origin
->getOpcode() != ISD::LOAD
)
7522 // If there is no LXSIBZX/LXSIHZX, like Power8,
7523 // prefer direct move if the memory size is 1 or 2 bytes.
7524 MachineMemOperand
*MMO
= cast
<LoadSDNode
>(Origin
)->getMemOperand();
7525 if (!Subtarget
.hasP9Vector() && MMO
->getSize() <= 2)
7528 for (SDNode::use_iterator UI
= Origin
->use_begin(),
7529 UE
= Origin
->use_end();
7532 // Only look at the users of the loaded value.
7533 if (UI
.getUse().get().getResNo() != 0)
7536 if (UI
->getOpcode() != ISD::SINT_TO_FP
&&
7537 UI
->getOpcode() != ISD::UINT_TO_FP
)
7544 /// Custom lowers integer to floating point conversions to use
7545 /// the direct move instructions available in ISA 2.07 to avoid the
7546 /// need for load/store combinations.
7547 SDValue
PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op
,
7549 const SDLoc
&dl
) const {
7550 assert((Op
.getValueType() == MVT::f32
||
7551 Op
.getValueType() == MVT::f64
) &&
7552 "Invalid floating point type as target of conversion");
7553 assert(Subtarget
.hasFPCVT() &&
7554 "Int to FP conversions with direct moves require FPCVT");
7556 SDValue Src
= Op
.getOperand(0);
7557 bool SinglePrec
= Op
.getValueType() == MVT::f32
;
7558 bool WordInt
= Src
.getSimpleValueType().SimpleTy
== MVT::i32
;
7559 bool Signed
= Op
.getOpcode() == ISD::SINT_TO_FP
;
7560 unsigned ConvOp
= Signed
? (SinglePrec
? PPCISD::FCFIDS
: PPCISD::FCFID
) :
7561 (SinglePrec
? PPCISD::FCFIDUS
: PPCISD::FCFIDU
);
7564 FP
= DAG
.getNode(Signed
? PPCISD::MTVSRA
: PPCISD::MTVSRZ
,
7566 FP
= DAG
.getNode(ConvOp
, dl
, SinglePrec
? MVT::f32
: MVT::f64
, FP
);
7569 FP
= DAG
.getNode(PPCISD::MTVSRA
, dl
, MVT::f64
, Src
);
7570 FP
= DAG
.getNode(ConvOp
, dl
, SinglePrec
? MVT::f32
: MVT::f64
, FP
);
7576 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
) {
7578 EVT VecVT
= Vec
.getValueType();
7579 assert(VecVT
.isVector() && "Expected a vector type.");
7580 assert(VecVT
.getSizeInBits() < 128 && "Vector is already full width.");
7582 EVT EltVT
= VecVT
.getVectorElementType();
7583 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
7584 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
7586 unsigned NumConcat
= WideNumElts
/ VecVT
.getVectorNumElements();
7587 SmallVector
<SDValue
, 16> Ops(NumConcat
);
7589 SDValue UndefVec
= DAG
.getUNDEF(VecVT
);
7590 for (unsigned i
= 1; i
< NumConcat
; ++i
)
7593 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, WideVT
, Ops
);
7596 SDValue
PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op
, SelectionDAG
&DAG
,
7597 const SDLoc
&dl
) const {
7599 unsigned Opc
= Op
.getOpcode();
7600 assert((Opc
== ISD::UINT_TO_FP
|| Opc
== ISD::SINT_TO_FP
) &&
7601 "Unexpected conversion type");
7602 assert((Op
.getValueType() == MVT::v2f64
|| Op
.getValueType() == MVT::v4f32
) &&
7603 "Supports conversions to v2f64/v4f32 only.");
7605 bool SignedConv
= Opc
== ISD::SINT_TO_FP
;
7606 bool FourEltRes
= Op
.getValueType() == MVT::v4f32
;
7608 SDValue Wide
= widenVec(DAG
, Op
.getOperand(0), dl
);
7609 EVT WideVT
= Wide
.getValueType();
7610 unsigned WideNumElts
= WideVT
.getVectorNumElements();
7611 MVT IntermediateVT
= FourEltRes
? MVT::v4i32
: MVT::v2i64
;
7613 SmallVector
<int, 16> ShuffV
;
7614 for (unsigned i
= 0; i
< WideNumElts
; ++i
)
7615 ShuffV
.push_back(i
+ WideNumElts
);
7617 int Stride
= FourEltRes
? WideNumElts
/ 4 : WideNumElts
/ 2;
7618 int SaveElts
= FourEltRes
? 4 : 2;
7619 if (Subtarget
.isLittleEndian())
7620 for (int i
= 0; i
< SaveElts
; i
++)
7621 ShuffV
[i
* Stride
] = i
;
7623 for (int i
= 1; i
<= SaveElts
; i
++)
7624 ShuffV
[i
* Stride
- 1] = i
- 1;
7626 SDValue ShuffleSrc2
=
7627 SignedConv
? DAG
.getUNDEF(WideVT
) : DAG
.getConstant(0, dl
, WideVT
);
7628 SDValue Arrange
= DAG
.getVectorShuffle(WideVT
, dl
, Wide
, ShuffleSrc2
, ShuffV
);
7630 SignedConv
? (unsigned)PPCISD::SExtVElems
: (unsigned)ISD::BITCAST
;
7633 if (!Subtarget
.hasP9Altivec() && SignedConv
) {
7634 Arrange
= DAG
.getBitcast(IntermediateVT
, Arrange
);
7635 Extend
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, IntermediateVT
, Arrange
,
7636 DAG
.getValueType(Op
.getOperand(0).getValueType()));
7638 Extend
= DAG
.getNode(ExtendOp
, dl
, IntermediateVT
, Arrange
);
7640 return DAG
.getNode(Opc
, dl
, Op
.getValueType(), Extend
);
7643 SDValue
PPCTargetLowering::LowerINT_TO_FP(SDValue Op
,
7644 SelectionDAG
&DAG
) const {
7647 EVT InVT
= Op
.getOperand(0).getValueType();
7648 EVT OutVT
= Op
.getValueType();
7649 if (OutVT
.isVector() && OutVT
.isFloatingPoint() &&
7650 isOperationCustom(Op
.getOpcode(), InVT
))
7651 return LowerINT_TO_FPVector(Op
, DAG
, dl
);
7653 // Conversions to f128 are legal.
7654 if (EnableQuadPrecision
&& (Op
.getValueType() == MVT::f128
))
7657 if (Subtarget
.hasQPX() && Op
.getOperand(0).getValueType() == MVT::v4i1
) {
7658 if (Op
.getValueType() != MVT::v4f32
&& Op
.getValueType() != MVT::v4f64
)
7661 SDValue Value
= Op
.getOperand(0);
7662 // The values are now known to be -1 (false) or 1 (true). To convert this
7663 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7664 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7665 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
7667 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
7669 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
7671 if (Op
.getValueType() != MVT::v4f64
)
7672 Value
= DAG
.getNode(ISD::FP_ROUND
, dl
,
7673 Op
.getValueType(), Value
,
7674 DAG
.getIntPtrConstant(1, dl
));
7678 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7679 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
7682 if (Op
.getOperand(0).getValueType() == MVT::i1
)
7683 return DAG
.getNode(ISD::SELECT
, dl
, Op
.getValueType(), Op
.getOperand(0),
7684 DAG
.getConstantFP(1.0, dl
, Op
.getValueType()),
7685 DAG
.getConstantFP(0.0, dl
, Op
.getValueType()));
7687 // If we have direct moves, we can do all the conversion, skip the store/load
7688 // however, without FPCVT we can't do most conversions.
7689 if (Subtarget
.hasDirectMove() && directMoveIsProfitable(Op
) &&
7690 Subtarget
.isPPC64() && Subtarget
.hasFPCVT())
7691 return LowerINT_TO_FPDirectMove(Op
, DAG
, dl
);
7693 assert((Op
.getOpcode() == ISD::SINT_TO_FP
|| Subtarget
.hasFPCVT()) &&
7694 "UINT_TO_FP is supported only with FPCVT");
7696 // If we have FCFIDS, then use it when converting to single-precision.
7697 // Otherwise, convert to double-precision and then round.
7698 unsigned FCFOp
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
7699 ? (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDUS
7701 : (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDU
7703 MVT FCFTy
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
7707 if (Op
.getOperand(0).getValueType() == MVT::i64
) {
7708 SDValue SINT
= Op
.getOperand(0);
7709 // When converting to single-precision, we actually need to convert
7710 // to double-precision first and then round to single-precision.
7711 // To avoid double-rounding effects during that operation, we have
7712 // to prepare the input operand. Bits that might be truncated when
7713 // converting to double-precision are replaced by a bit that won't
7714 // be lost at this stage, but is below the single-precision rounding
7717 // However, if -enable-unsafe-fp-math is in effect, accept double
7718 // rounding to avoid the extra overhead.
7719 if (Op
.getValueType() == MVT::f32
&&
7720 !Subtarget
.hasFPCVT() &&
7721 !DAG
.getTarget().Options
.UnsafeFPMath
) {
7723 // Twiddle input to make sure the low 11 bits are zero. (If this
7724 // is the case, we are guaranteed the value will fit into the 53 bit
7725 // mantissa of an IEEE double-precision value without rounding.)
7726 // If any of those low 11 bits were not zero originally, make sure
7727 // bit 12 (value 2048) is set instead, so that the final rounding
7728 // to single-precision gets the correct result.
7729 SDValue Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
7730 SINT
, DAG
.getConstant(2047, dl
, MVT::i64
));
7731 Round
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
7732 Round
, DAG
.getConstant(2047, dl
, MVT::i64
));
7733 Round
= DAG
.getNode(ISD::OR
, dl
, MVT::i64
, Round
, SINT
);
7734 Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
7735 Round
, DAG
.getConstant(-2048, dl
, MVT::i64
));
7737 // However, we cannot use that value unconditionally: if the magnitude
7738 // of the input value is small, the bit-twiddling we did above might
7739 // end up visibly changing the output. Fortunately, in that case, we
7740 // don't need to twiddle bits since the original input will convert
7741 // exactly to double-precision floating-point already. Therefore,
7742 // construct a conditional to use the original value if the top 11
7743 // bits are all sign-bit copies, and use the rounded value computed
7745 SDValue Cond
= DAG
.getNode(ISD::SRA
, dl
, MVT::i64
,
7746 SINT
, DAG
.getConstant(53, dl
, MVT::i32
));
7747 Cond
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
7748 Cond
, DAG
.getConstant(1, dl
, MVT::i64
));
7749 Cond
= DAG
.getSetCC(dl
, MVT::i32
,
7750 Cond
, DAG
.getConstant(1, dl
, MVT::i64
), ISD::SETUGT
);
7752 SINT
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i64
, Cond
, Round
, SINT
);
7758 MachineFunction
&MF
= DAG
.getMachineFunction();
7759 if (canReuseLoadAddress(SINT
, MVT::i64
, RLI
, DAG
)) {
7760 Bits
= DAG
.getLoad(MVT::f64
, dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
7761 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
7762 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7763 } else if (Subtarget
.hasLFIWAX() &&
7764 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::SEXTLOAD
)) {
7765 MachineMemOperand
*MMO
=
7766 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7767 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7768 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7769 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWAX
, dl
,
7770 DAG
.getVTList(MVT::f64
, MVT::Other
),
7771 Ops
, MVT::i32
, MMO
);
7772 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7773 } else if (Subtarget
.hasFPCVT() &&
7774 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::ZEXTLOAD
)) {
7775 MachineMemOperand
*MMO
=
7776 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7777 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7778 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7779 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWZX
, dl
,
7780 DAG
.getVTList(MVT::f64
, MVT::Other
),
7781 Ops
, MVT::i32
, MMO
);
7782 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7783 } else if (((Subtarget
.hasLFIWAX() &&
7784 SINT
.getOpcode() == ISD::SIGN_EXTEND
) ||
7785 (Subtarget
.hasFPCVT() &&
7786 SINT
.getOpcode() == ISD::ZERO_EXTEND
)) &&
7787 SINT
.getOperand(0).getValueType() == MVT::i32
) {
7788 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7789 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7791 int FrameIdx
= MFI
.CreateStackObject(4, 4, false);
7792 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7795 DAG
.getStore(DAG
.getEntryNode(), dl
, SINT
.getOperand(0), FIdx
,
7796 MachinePointerInfo::getFixedStack(
7797 DAG
.getMachineFunction(), FrameIdx
));
7799 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
7800 "Expected an i32 store");
7805 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
7808 MachineMemOperand
*MMO
=
7809 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7810 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7811 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7812 Bits
= DAG
.getMemIntrinsicNode(SINT
.getOpcode() == ISD::ZERO_EXTEND
?
7813 PPCISD::LFIWZX
: PPCISD::LFIWAX
,
7814 dl
, DAG
.getVTList(MVT::f64
, MVT::Other
),
7815 Ops
, MVT::i32
, MMO
);
7817 Bits
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, SINT
);
7819 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Bits
);
7821 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT())
7822 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
,
7823 MVT::f32
, FP
, DAG
.getIntPtrConstant(0, dl
));
7827 assert(Op
.getOperand(0).getValueType() == MVT::i32
&&
7828 "Unhandled INT_TO_FP type in custom expander!");
7829 // Since we only generate this in 64-bit mode, we can take advantage of
7830 // 64-bit registers. In particular, sign extend the input value into the
7831 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7832 // then lfd it and fcfid it.
7833 MachineFunction
&MF
= DAG
.getMachineFunction();
7834 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7835 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7838 if (Subtarget
.hasLFIWAX() || Subtarget
.hasFPCVT()) {
7841 if (!(ReusingLoad
= canReuseLoadAddress(Op
.getOperand(0), MVT::i32
, RLI
,
7843 int FrameIdx
= MFI
.CreateStackObject(4, 4, false);
7844 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7847 DAG
.getStore(DAG
.getEntryNode(), dl
, Op
.getOperand(0), FIdx
,
7848 MachinePointerInfo::getFixedStack(
7849 DAG
.getMachineFunction(), FrameIdx
));
7851 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
7852 "Expected an i32 store");
7857 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
7861 MachineMemOperand
*MMO
=
7862 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7863 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7864 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7865 Ld
= DAG
.getMemIntrinsicNode(Op
.getOpcode() == ISD::UINT_TO_FP
?
7866 PPCISD::LFIWZX
: PPCISD::LFIWAX
,
7867 dl
, DAG
.getVTList(MVT::f64
, MVT::Other
),
7868 Ops
, MVT::i32
, MMO
);
7870 spliceIntoChain(RLI
.ResChain
, Ld
.getValue(1), DAG
);
7872 assert(Subtarget
.isPPC64() &&
7873 "i32->FP without LFIWAX supported only on PPC64");
7875 int FrameIdx
= MFI
.CreateStackObject(8, 8, false);
7876 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7878 SDValue Ext64
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i64
,
7881 // STD the extended value into the stack slot.
7882 SDValue Store
= DAG
.getStore(
7883 DAG
.getEntryNode(), dl
, Ext64
, FIdx
,
7884 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
7886 // Load the value as a double.
7888 MVT::f64
, dl
, Store
, FIdx
,
7889 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
7892 // FCFID it and return it.
7893 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Ld
);
7894 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT())
7895 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, FP
,
7896 DAG
.getIntPtrConstant(0, dl
));
7900 SDValue
PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op
,
7901 SelectionDAG
&DAG
) const {
7904 The rounding mode is in bits 30:31 of FPSR, and has the following
7911 FLT_ROUNDS, on the other hand, expects the following:
7918 To perform the conversion, we do:
7919 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7922 MachineFunction
&MF
= DAG
.getMachineFunction();
7923 EVT VT
= Op
.getValueType();
7924 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7926 // Save FP Control Word to register
7928 MVT::f64
, // return register
7929 MVT::Glue
// unused in this context
7931 SDValue Chain
= DAG
.getNode(PPCISD::MFFS
, dl
, NodeTys
, None
);
7933 // Save FP register to stack slot
7934 int SSFI
= MF
.getFrameInfo().CreateStackObject(8, 8, false);
7935 SDValue StackSlot
= DAG
.getFrameIndex(SSFI
, PtrVT
);
7936 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Chain
, StackSlot
,
7937 MachinePointerInfo());
7939 // Load FP Control Word from low 32 bits of stack slot.
7940 SDValue Four
= DAG
.getConstant(4, dl
, PtrVT
);
7941 SDValue Addr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackSlot
, Four
);
7942 SDValue CWD
= DAG
.getLoad(MVT::i32
, dl
, Store
, Addr
, MachinePointerInfo());
7944 // Transform as necessary
7946 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
7947 CWD
, DAG
.getConstant(3, dl
, MVT::i32
));
7949 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
7950 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
7951 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
,
7952 CWD
, DAG
.getConstant(3, dl
, MVT::i32
)),
7953 DAG
.getConstant(3, dl
, MVT::i32
)),
7954 DAG
.getConstant(1, dl
, MVT::i32
));
7957 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, CWD1
, CWD2
);
7959 return DAG
.getNode((VT
.getSizeInBits() < 16 ?
7960 ISD::TRUNCATE
: ISD::ZERO_EXTEND
), dl
, VT
, RetVal
);
7963 SDValue
PPCTargetLowering::LowerSHL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
7964 EVT VT
= Op
.getValueType();
7965 unsigned BitWidth
= VT
.getSizeInBits();
7967 assert(Op
.getNumOperands() == 3 &&
7968 VT
== Op
.getOperand(1).getValueType() &&
7971 // Expand into a bunch of logical ops. Note that these ops
7972 // depend on the PPC behavior for oversized shift amounts.
7973 SDValue Lo
= Op
.getOperand(0);
7974 SDValue Hi
= Op
.getOperand(1);
7975 SDValue Amt
= Op
.getOperand(2);
7976 EVT AmtVT
= Amt
.getValueType();
7978 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
7979 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
7980 SDValue Tmp2
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Amt
);
7981 SDValue Tmp3
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Tmp1
);
7982 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
7983 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
7984 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
7985 SDValue Tmp6
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Tmp5
);
7986 SDValue OutHi
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
7987 SDValue OutLo
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Amt
);
7988 SDValue OutOps
[] = { OutLo
, OutHi
};
7989 return DAG
.getMergeValues(OutOps
, dl
);
7992 SDValue
PPCTargetLowering::LowerSRL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
7993 EVT VT
= Op
.getValueType();
7995 unsigned BitWidth
= VT
.getSizeInBits();
7996 assert(Op
.getNumOperands() == 3 &&
7997 VT
== Op
.getOperand(1).getValueType() &&
8000 // Expand into a bunch of logical ops. Note that these ops
8001 // depend on the PPC behavior for oversized shift amounts.
8002 SDValue Lo
= Op
.getOperand(0);
8003 SDValue Hi
= Op
.getOperand(1);
8004 SDValue Amt
= Op
.getOperand(2);
8005 EVT AmtVT
= Amt
.getValueType();
8007 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
8008 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
8009 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
8010 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
8011 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
8012 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
8013 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
8014 SDValue Tmp6
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Tmp5
);
8015 SDValue OutLo
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
8016 SDValue OutHi
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Amt
);
8017 SDValue OutOps
[] = { OutLo
, OutHi
};
8018 return DAG
.getMergeValues(OutOps
, dl
);
8021 SDValue
PPCTargetLowering::LowerSRA_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
8023 EVT VT
= Op
.getValueType();
8024 unsigned BitWidth
= VT
.getSizeInBits();
8025 assert(Op
.getNumOperands() == 3 &&
8026 VT
== Op
.getOperand(1).getValueType() &&
8029 // Expand into a bunch of logical ops, followed by a select_cc.
8030 SDValue Lo
= Op
.getOperand(0);
8031 SDValue Hi
= Op
.getOperand(1);
8032 SDValue Amt
= Op
.getOperand(2);
8033 EVT AmtVT
= Amt
.getValueType();
8035 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
8036 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
8037 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
8038 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
8039 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
8040 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
8041 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
8042 SDValue Tmp6
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Tmp5
);
8043 SDValue OutHi
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Amt
);
8044 SDValue OutLo
= DAG
.getSelectCC(dl
, Tmp5
, DAG
.getConstant(0, dl
, AmtVT
),
8045 Tmp4
, Tmp6
, ISD::SETLE
);
8046 SDValue OutOps
[] = { OutLo
, OutHi
};
8047 return DAG
.getMergeValues(OutOps
, dl
);
8050 //===----------------------------------------------------------------------===//
8051 // Vector related lowering.
8054 /// BuildSplatI - Build a canonical splati of Val with an element size of
8055 /// SplatSize. Cast the result to VT.
8056 static SDValue
BuildSplatI(int Val
, unsigned SplatSize
, EVT VT
,
8057 SelectionDAG
&DAG
, const SDLoc
&dl
) {
8058 assert(Val
>= -16 && Val
<= 15 && "vsplti is out of range!");
8060 static const MVT VTys
[] = { // canonical VT to use for each size.
8061 MVT::v16i8
, MVT::v8i16
, MVT::Other
, MVT::v4i32
8064 EVT ReqVT
= VT
!= MVT::Other
? VT
: VTys
[SplatSize
-1];
8066 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8070 EVT CanonicalVT
= VTys
[SplatSize
-1];
8072 // Build a canonical splat for this value.
8073 return DAG
.getBitcast(ReqVT
, DAG
.getConstant(Val
, dl
, CanonicalVT
));
8076 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8077 /// specified intrinsic ID.
8078 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op
, SelectionDAG
&DAG
,
8079 const SDLoc
&dl
, EVT DestVT
= MVT::Other
) {
8080 if (DestVT
== MVT::Other
) DestVT
= Op
.getValueType();
8081 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
8082 DAG
.getConstant(IID
, dl
, MVT::i32
), Op
);
8085 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8086 /// specified intrinsic ID.
8087 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue LHS
, SDValue RHS
,
8088 SelectionDAG
&DAG
, const SDLoc
&dl
,
8089 EVT DestVT
= MVT::Other
) {
8090 if (DestVT
== MVT::Other
) DestVT
= LHS
.getValueType();
8091 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
8092 DAG
.getConstant(IID
, dl
, MVT::i32
), LHS
, RHS
);
8095 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8096 /// specified intrinsic ID.
8097 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op0
, SDValue Op1
,
8098 SDValue Op2
, SelectionDAG
&DAG
, const SDLoc
&dl
,
8099 EVT DestVT
= MVT::Other
) {
8100 if (DestVT
== MVT::Other
) DestVT
= Op0
.getValueType();
8101 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
8102 DAG
.getConstant(IID
, dl
, MVT::i32
), Op0
, Op1
, Op2
);
8105 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8106 /// amount. The result has the specified value type.
8107 static SDValue
BuildVSLDOI(SDValue LHS
, SDValue RHS
, unsigned Amt
, EVT VT
,
8108 SelectionDAG
&DAG
, const SDLoc
&dl
) {
8109 // Force LHS/RHS to be the right type.
8110 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, LHS
);
8111 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, RHS
);
8114 for (unsigned i
= 0; i
!= 16; ++i
)
8116 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, LHS
, RHS
, Ops
);
8117 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
8120 /// Do we have an efficient pattern in a .td file for this node?
8122 /// \param V - pointer to the BuildVectorSDNode being matched
8123 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8125 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8126 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8127 /// the opposite is true (expansion is beneficial) are:
8128 /// - The node builds a vector out of integers that are not 32 or 64-bits
8129 /// - The node builds a vector out of constants
8130 /// - The node is a "load-and-splat"
8131 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8132 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode
*V
,
8135 EVT VecVT
= V
->getValueType(0);
8136 bool RightType
= VecVT
== MVT::v2f64
||
8137 (HasP8Vector
&& VecVT
== MVT::v4f32
) ||
8138 (HasDirectMove
&& (VecVT
== MVT::v2i64
|| VecVT
== MVT::v4i32
));
8142 bool IsSplat
= true;
8143 bool IsLoad
= false;
8144 SDValue Op0
= V
->getOperand(0);
8146 // This function is called in a block that confirms the node is not a constant
8147 // splat. So a constant BUILD_VECTOR here means the vector is built out of
8148 // different constants.
8149 if (V
->isConstant())
8151 for (int i
= 0, e
= V
->getNumOperands(); i
< e
; ++i
) {
8152 if (V
->getOperand(i
).isUndef())
8154 // We want to expand nodes that represent load-and-splat even if the
8155 // loaded value is a floating point truncation or conversion to int.
8156 if (V
->getOperand(i
).getOpcode() == ISD::LOAD
||
8157 (V
->getOperand(i
).getOpcode() == ISD::FP_ROUND
&&
8158 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
8159 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_SINT
&&
8160 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
8161 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_UINT
&&
8162 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
))
8164 // If the operands are different or the input is not a load and has more
8165 // uses than just this BV node, then it isn't a splat.
8166 if (V
->getOperand(i
) != Op0
||
8167 (!IsLoad
&& !V
->isOnlyUserOf(V
->getOperand(i
).getNode())))
8170 return !(IsSplat
&& IsLoad
);
8173 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8174 SDValue
PPCTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const {
8177 SDValue Op0
= Op
->getOperand(0);
8179 if (!EnableQuadPrecision
||
8180 (Op
.getValueType() != MVT::f128
) ||
8181 (Op0
.getOpcode() != ISD::BUILD_PAIR
) ||
8182 (Op0
.getOperand(0).getValueType() != MVT::i64
) ||
8183 (Op0
.getOperand(1).getValueType() != MVT::i64
))
8186 return DAG
.getNode(PPCISD::BUILD_FP128
, dl
, MVT::f128
, Op0
.getOperand(0),
8190 static const SDValue
*getNormalLoadInput(const SDValue
&Op
) {
8191 const SDValue
*InputLoad
= &Op
;
8192 if (InputLoad
->getOpcode() == ISD::BITCAST
)
8193 InputLoad
= &InputLoad
->getOperand(0);
8194 if (InputLoad
->getOpcode() == ISD::SCALAR_TO_VECTOR
)
8195 InputLoad
= &InputLoad
->getOperand(0);
8196 if (InputLoad
->getOpcode() != ISD::LOAD
)
8198 LoadSDNode
*LD
= cast
<LoadSDNode
>(*InputLoad
);
8199 return ISD::isNormalLoad(LD
) ? InputLoad
: nullptr;
8202 // If this is a case we can't handle, return null and let the default
8203 // expansion code take care of it. If we CAN select this case, and if it
8204 // selects to a single instruction, return Op. Otherwise, if we can codegen
8205 // this case more efficiently than a constant pool load, lower it to the
8206 // sequence of ops that should be used.
8207 SDValue
PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op
,
8208 SelectionDAG
&DAG
) const {
8210 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
8211 assert(BVN
&& "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8213 if (Subtarget
.hasQPX() && Op
.getValueType() == MVT::v4i1
) {
8214 // We first build an i32 vector, load it into a QPX register,
8215 // then convert it to a floating-point vector and compare it
8216 // to a zero vector to get the boolean result.
8217 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
8218 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
8219 MachinePointerInfo PtrInfo
=
8220 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
8221 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
8222 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
8224 assert(BVN
->getNumOperands() == 4 &&
8225 "BUILD_VECTOR for v4i1 does not have 4 operands");
8227 bool IsConst
= true;
8228 for (unsigned i
= 0; i
< 4; ++i
) {
8229 if (BVN
->getOperand(i
).isUndef()) continue;
8230 if (!isa
<ConstantSDNode
>(BVN
->getOperand(i
))) {
8238 ConstantFP::get(Type::getFloatTy(*DAG
.getContext()), 1.0);
8240 ConstantFP::get(Type::getFloatTy(*DAG
.getContext()), -1.0);
8243 for (unsigned i
= 0; i
< 4; ++i
) {
8244 if (BVN
->getOperand(i
).isUndef())
8245 CV
[i
] = UndefValue::get(Type::getFloatTy(*DAG
.getContext()));
8246 else if (isNullConstant(BVN
->getOperand(i
)))
8252 Constant
*CP
= ConstantVector::get(CV
);
8253 SDValue CPIdx
= DAG
.getConstantPool(CP
, getPointerTy(DAG
.getDataLayout()),
8254 16 /* alignment */);
8256 SDValue Ops
[] = {DAG
.getEntryNode(), CPIdx
};
8257 SDVTList VTs
= DAG
.getVTList({MVT::v4i1
, /*chain*/ MVT::Other
});
8258 return DAG
.getMemIntrinsicNode(
8259 PPCISD::QVLFSb
, dl
, VTs
, Ops
, MVT::v4f32
,
8260 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
8263 SmallVector
<SDValue
, 4> Stores
;
8264 for (unsigned i
= 0; i
< 4; ++i
) {
8265 if (BVN
->getOperand(i
).isUndef()) continue;
8267 unsigned Offset
= 4*i
;
8268 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
8269 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
8271 unsigned StoreSize
= BVN
->getOperand(i
).getValueType().getStoreSize();
8272 if (StoreSize
> 4) {
8274 DAG
.getTruncStore(DAG
.getEntryNode(), dl
, BVN
->getOperand(i
), Idx
,
8275 PtrInfo
.getWithOffset(Offset
), MVT::i32
));
8277 SDValue StoreValue
= BVN
->getOperand(i
);
8279 StoreValue
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, StoreValue
);
8281 Stores
.push_back(DAG
.getStore(DAG
.getEntryNode(), dl
, StoreValue
, Idx
,
8282 PtrInfo
.getWithOffset(Offset
)));
8287 if (!Stores
.empty())
8288 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
8290 StoreChain
= DAG
.getEntryNode();
8292 // Now load from v4i32 into the QPX register; this will extend it to
8293 // v4i64 but not yet convert it to a floating point. Nevertheless, this
8294 // is typed as v4f64 because the QPX register integer states are not
8295 // explicitly represented.
8297 SDValue Ops
[] = {StoreChain
,
8298 DAG
.getConstant(Intrinsic::ppc_qpx_qvlfiwz
, dl
, MVT::i32
),
8300 SDVTList VTs
= DAG
.getVTList({MVT::v4f64
, /*chain*/ MVT::Other
});
8302 SDValue LoadedVect
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
,
8303 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
8304 LoadedVect
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
8305 DAG
.getConstant(Intrinsic::ppc_qpx_qvfcfidu
, dl
, MVT::i32
),
8308 SDValue FPZeros
= DAG
.getConstantFP(0.0, dl
, MVT::v4f64
);
8310 return DAG
.getSetCC(dl
, MVT::v4i1
, LoadedVect
, FPZeros
, ISD::SETEQ
);
8313 // All other QPX vectors are handled by generic code.
8314 if (Subtarget
.hasQPX())
8317 // Check if this is a splat of a constant value.
8318 APInt APSplatBits
, APSplatUndef
;
8319 unsigned SplatBitSize
;
8321 if (! BVN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
8322 HasAnyUndefs
, 0, !Subtarget
.isLittleEndian()) ||
8323 SplatBitSize
> 32) {
8325 const SDValue
*InputLoad
= getNormalLoadInput(Op
.getOperand(0));
8326 // Handle load-and-splat patterns as we have instructions that will do this
8328 if (InputLoad
&& DAG
.isSplatValue(Op
, true)) {
8329 LoadSDNode
*LD
= cast
<LoadSDNode
>(*InputLoad
);
8331 // We have handling for 4 and 8 byte elements.
8332 unsigned ElementSize
= LD
->getMemoryVT().getScalarSizeInBits();
8334 // Checking for a single use of this load, we have to check for vector
8335 // width (128 bits) / ElementSize uses (since each operand of the
8336 // BUILD_VECTOR is a separate use of the value.
8337 if (InputLoad
->getNode()->hasNUsesOfValue(128 / ElementSize
, 0) &&
8338 ((Subtarget
.hasVSX() && ElementSize
== 64) ||
8339 (Subtarget
.hasP9Vector() && ElementSize
== 32))) {
8341 LD
->getChain(), // Chain
8342 LD
->getBasePtr(), // Ptr
8343 DAG
.getValueType(Op
.getValueType()) // VT
8346 DAG
.getMemIntrinsicNode(PPCISD::LD_SPLAT
, dl
,
8347 DAG
.getVTList(Op
.getValueType(), MVT::Other
),
8348 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
8352 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8353 // lowered to VSX instructions under certain conditions.
8354 // Without VSX, there is no pattern more efficient than expanding the node.
8355 if (Subtarget
.hasVSX() &&
8356 haveEfficientBuildVectorPattern(BVN
, Subtarget
.hasDirectMove(),
8357 Subtarget
.hasP8Vector()))
8362 unsigned SplatBits
= APSplatBits
.getZExtValue();
8363 unsigned SplatUndef
= APSplatUndef
.getZExtValue();
8364 unsigned SplatSize
= SplatBitSize
/ 8;
8366 // First, handle single instruction cases.
8369 if (SplatBits
== 0) {
8370 // Canonicalize all zero vectors to be v4i32.
8371 if (Op
.getValueType() != MVT::v4i32
|| HasAnyUndefs
) {
8372 SDValue Z
= DAG
.getConstant(0, dl
, MVT::v4i32
);
8373 Op
= DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Z
);
8378 // We have XXSPLTIB for constant splats one byte wide
8379 if (Subtarget
.hasP9Vector() && SplatSize
== 1) {
8380 // This is a splat of 1-byte elements with some elements potentially undef.
8381 // Rather than trying to match undef in the SDAG patterns, ensure that all
8382 // elements are the same constant.
8383 if (HasAnyUndefs
|| ISD::isBuildVectorAllOnes(BVN
)) {
8384 SmallVector
<SDValue
, 16> Ops(16, DAG
.getConstant(SplatBits
,
8386 SDValue NewBV
= DAG
.getBuildVector(MVT::v16i8
, dl
, Ops
);
8387 if (Op
.getValueType() != MVT::v16i8
)
8388 return DAG
.getBitcast(Op
.getValueType(), NewBV
);
8392 // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8393 // detect that constant splats like v8i16: 0xABAB are really just splats
8394 // of a 1-byte constant. In this case, we need to convert the node to a
8395 // splat of v16i8 and a bitcast.
8396 if (Op
.getValueType() != MVT::v16i8
)
8397 return DAG
.getBitcast(Op
.getValueType(),
8398 DAG
.getConstant(SplatBits
, dl
, MVT::v16i8
));
8403 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8404 int32_t SextVal
= (int32_t(SplatBits
<< (32-SplatBitSize
)) >>
8406 if (SextVal
>= -16 && SextVal
<= 15)
8407 return BuildSplatI(SextVal
, SplatSize
, Op
.getValueType(), DAG
, dl
);
8409 // Two instruction sequences.
8411 // If this value is in the range [-32,30] and is even, use:
8412 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8413 // If this value is in the range [17,31] and is odd, use:
8414 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8415 // If this value is in the range [-31,-17] and is odd, use:
8416 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8417 // Note the last two are three-instruction sequences.
8418 if (SextVal
>= -32 && SextVal
<= 31) {
8419 // To avoid having these optimizations undone by constant folding,
8420 // we convert to a pseudo that will be expanded later into one of
8422 SDValue Elt
= DAG
.getConstant(SextVal
, dl
, MVT::i32
);
8423 EVT VT
= (SplatSize
== 1 ? MVT::v16i8
:
8424 (SplatSize
== 2 ? MVT::v8i16
: MVT::v4i32
));
8425 SDValue EltSize
= DAG
.getConstant(SplatSize
, dl
, MVT::i32
);
8426 SDValue RetVal
= DAG
.getNode(PPCISD::VADD_SPLAT
, dl
, VT
, Elt
, EltSize
);
8427 if (VT
== Op
.getValueType())
8430 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), RetVal
);
8433 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8434 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8436 if (SplatSize
== 4 && SplatBits
== (0x7FFFFFFF&~SplatUndef
)) {
8437 // Make -1 and vspltisw -1:
8438 SDValue OnesV
= BuildSplatI(-1, 4, MVT::v4i32
, DAG
, dl
);
8440 // Make the VSLW intrinsic, computing 0x8000_0000.
8441 SDValue Res
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, OnesV
,
8444 // xor by OnesV to invert it.
8445 Res
= DAG
.getNode(ISD::XOR
, dl
, MVT::v4i32
, Res
, OnesV
);
8446 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8449 // Check to see if this is a wide variety of vsplti*, binop self cases.
8450 static const signed char SplatCsts
[] = {
8451 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8452 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8455 for (unsigned idx
= 0; idx
< array_lengthof(SplatCsts
); ++idx
) {
8456 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8457 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8458 int i
= SplatCsts
[idx
];
8460 // Figure out what shift amount will be used by altivec if shifted by i in
8462 unsigned TypeShiftAmt
= i
& (SplatBitSize
-1);
8464 // vsplti + shl self.
8465 if (SextVal
== (int)((unsigned)i
<< TypeShiftAmt
)) {
8466 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8467 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8468 Intrinsic::ppc_altivec_vslb
, Intrinsic::ppc_altivec_vslh
, 0,
8469 Intrinsic::ppc_altivec_vslw
8471 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8472 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8475 // vsplti + srl self.
8476 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
8477 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8478 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8479 Intrinsic::ppc_altivec_vsrb
, Intrinsic::ppc_altivec_vsrh
, 0,
8480 Intrinsic::ppc_altivec_vsrw
8482 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8483 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8486 // vsplti + sra self.
8487 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
8488 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8489 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8490 Intrinsic::ppc_altivec_vsrab
, Intrinsic::ppc_altivec_vsrah
, 0,
8491 Intrinsic::ppc_altivec_vsraw
8493 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8494 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8497 // vsplti + rol self.
8498 if (SextVal
== (int)(((unsigned)i
<< TypeShiftAmt
) |
8499 ((unsigned)i
>> (SplatBitSize
-TypeShiftAmt
)))) {
8500 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8501 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8502 Intrinsic::ppc_altivec_vrlb
, Intrinsic::ppc_altivec_vrlh
, 0,
8503 Intrinsic::ppc_altivec_vrlw
8505 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8506 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8509 // t = vsplti c, result = vsldoi t, t, 1
8510 if (SextVal
== (int)(((unsigned)i
<< 8) | (i
< 0 ? 0xFF : 0))) {
8511 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8512 unsigned Amt
= Subtarget
.isLittleEndian() ? 15 : 1;
8513 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8515 // t = vsplti c, result = vsldoi t, t, 2
8516 if (SextVal
== (int)(((unsigned)i
<< 16) | (i
< 0 ? 0xFFFF : 0))) {
8517 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8518 unsigned Amt
= Subtarget
.isLittleEndian() ? 14 : 2;
8519 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8521 // t = vsplti c, result = vsldoi t, t, 3
8522 if (SextVal
== (int)(((unsigned)i
<< 24) | (i
< 0 ? 0xFFFFFF : 0))) {
8523 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8524 unsigned Amt
= Subtarget
.isLittleEndian() ? 13 : 3;
8525 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8532 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8533 /// the specified operations to build the shuffle.
8534 static SDValue
GeneratePerfectShuffle(unsigned PFEntry
, SDValue LHS
,
8535 SDValue RHS
, SelectionDAG
&DAG
,
8537 unsigned OpNum
= (PFEntry
>> 26) & 0x0F;
8538 unsigned LHSID
= (PFEntry
>> 13) & ((1 << 13)-1);
8539 unsigned RHSID
= (PFEntry
>> 0) & ((1 << 13)-1);
8542 OP_COPY
= 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8554 if (OpNum
== OP_COPY
) {
8555 if (LHSID
== (1*9+2)*9+3) return LHS
;
8556 assert(LHSID
== ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8560 SDValue OpLHS
, OpRHS
;
8561 OpLHS
= GeneratePerfectShuffle(PerfectShuffleTable
[LHSID
], LHS
, RHS
, DAG
, dl
);
8562 OpRHS
= GeneratePerfectShuffle(PerfectShuffleTable
[RHSID
], LHS
, RHS
, DAG
, dl
);
8566 default: llvm_unreachable("Unknown i32 permute!");
8568 ShufIdxs
[ 0] = 0; ShufIdxs
[ 1] = 1; ShufIdxs
[ 2] = 2; ShufIdxs
[ 3] = 3;
8569 ShufIdxs
[ 4] = 16; ShufIdxs
[ 5] = 17; ShufIdxs
[ 6] = 18; ShufIdxs
[ 7] = 19;
8570 ShufIdxs
[ 8] = 4; ShufIdxs
[ 9] = 5; ShufIdxs
[10] = 6; ShufIdxs
[11] = 7;
8571 ShufIdxs
[12] = 20; ShufIdxs
[13] = 21; ShufIdxs
[14] = 22; ShufIdxs
[15] = 23;
8574 ShufIdxs
[ 0] = 8; ShufIdxs
[ 1] = 9; ShufIdxs
[ 2] = 10; ShufIdxs
[ 3] = 11;
8575 ShufIdxs
[ 4] = 24; ShufIdxs
[ 5] = 25; ShufIdxs
[ 6] = 26; ShufIdxs
[ 7] = 27;
8576 ShufIdxs
[ 8] = 12; ShufIdxs
[ 9] = 13; ShufIdxs
[10] = 14; ShufIdxs
[11] = 15;
8577 ShufIdxs
[12] = 28; ShufIdxs
[13] = 29; ShufIdxs
[14] = 30; ShufIdxs
[15] = 31;
8580 for (unsigned i
= 0; i
!= 16; ++i
)
8581 ShufIdxs
[i
] = (i
&3)+0;
8584 for (unsigned i
= 0; i
!= 16; ++i
)
8585 ShufIdxs
[i
] = (i
&3)+4;
8588 for (unsigned i
= 0; i
!= 16; ++i
)
8589 ShufIdxs
[i
] = (i
&3)+8;
8592 for (unsigned i
= 0; i
!= 16; ++i
)
8593 ShufIdxs
[i
] = (i
&3)+12;
8596 return BuildVSLDOI(OpLHS
, OpRHS
, 4, OpLHS
.getValueType(), DAG
, dl
);
8598 return BuildVSLDOI(OpLHS
, OpRHS
, 8, OpLHS
.getValueType(), DAG
, dl
);
8600 return BuildVSLDOI(OpLHS
, OpRHS
, 12, OpLHS
.getValueType(), DAG
, dl
);
8602 EVT VT
= OpLHS
.getValueType();
8603 OpLHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpLHS
);
8604 OpRHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpRHS
);
8605 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, OpLHS
, OpRHS
, ShufIdxs
);
8606 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
8609 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8610 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8612 SDValue
PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode
*N
,
8613 SelectionDAG
&DAG
) const {
8614 const unsigned BytesInVector
= 16;
8615 bool IsLE
= Subtarget
.isLittleEndian();
8617 SDValue V1
= N
->getOperand(0);
8618 SDValue V2
= N
->getOperand(1);
8619 unsigned ShiftElts
= 0, InsertAtByte
= 0;
8622 // Shifts required to get the byte we want at element 7.
8623 unsigned LittleEndianShifts
[] = {8, 7, 6, 5, 4, 3, 2, 1,
8624 0, 15, 14, 13, 12, 11, 10, 9};
8625 unsigned BigEndianShifts
[] = {9, 10, 11, 12, 13, 14, 15, 0,
8626 1, 2, 3, 4, 5, 6, 7, 8};
8628 ArrayRef
<int> Mask
= N
->getMask();
8629 int OriginalOrder
[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8631 // For each mask element, find out if we're just inserting something
8632 // from V2 into V1 or vice versa.
8633 // Possible permutations inserting an element from V2 into V1:
8634 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8635 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8637 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8638 // Inserting from V1 into V2 will be similar, except mask range will be
8641 bool FoundCandidate
= false;
8642 // If both vector operands for the shuffle are the same vector, the mask
8643 // will contain only elements from the first one and the second one will be
8645 unsigned VINSERTBSrcElem
= IsLE
? 8 : 7;
8646 // Go through the mask of half-words to find an element that's being moved
8647 // from one vector to the other.
8648 for (unsigned i
= 0; i
< BytesInVector
; ++i
) {
8649 unsigned CurrentElement
= Mask
[i
];
8650 // If 2nd operand is undefined, we should only look for element 7 in the
8652 if (V2
.isUndef() && CurrentElement
!= VINSERTBSrcElem
)
8655 bool OtherElementsInOrder
= true;
8656 // Examine the other elements in the Mask to see if they're in original
8658 for (unsigned j
= 0; j
< BytesInVector
; ++j
) {
8661 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8662 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
8663 // in which we always assume we're always picking from the 1st operand.
8665 (!V2
.isUndef() && CurrentElement
< BytesInVector
) ? BytesInVector
: 0;
8666 if (Mask
[j
] != OriginalOrder
[j
] + MaskOffset
) {
8667 OtherElementsInOrder
= false;
8671 // If other elements are in original order, we record the number of shifts
8672 // we need to get the element we want into element 7. Also record which byte
8673 // in the vector we should insert into.
8674 if (OtherElementsInOrder
) {
8675 // If 2nd operand is undefined, we assume no shifts and no swapping.
8680 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8681 ShiftElts
= IsLE
? LittleEndianShifts
[CurrentElement
& 0xF]
8682 : BigEndianShifts
[CurrentElement
& 0xF];
8683 Swap
= CurrentElement
< BytesInVector
;
8685 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) : i
;
8686 FoundCandidate
= true;
8691 if (!FoundCandidate
)
8694 // Candidate found, construct the proper SDAG sequence with VINSERTB,
8695 // optionally with VECSHL if shift is required.
8701 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
8702 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8703 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, Shl
,
8704 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8706 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, V2
,
8707 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8710 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8711 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8713 SDValue
PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode
*N
,
8714 SelectionDAG
&DAG
) const {
8715 const unsigned NumHalfWords
= 8;
8716 const unsigned BytesInVector
= NumHalfWords
* 2;
8717 // Check that the shuffle is on half-words.
8718 if (!isNByteElemShuffleMask(N
, 2, 1))
8721 bool IsLE
= Subtarget
.isLittleEndian();
8723 SDValue V1
= N
->getOperand(0);
8724 SDValue V2
= N
->getOperand(1);
8725 unsigned ShiftElts
= 0, InsertAtByte
= 0;
8728 // Shifts required to get the half-word we want at element 3.
8729 unsigned LittleEndianShifts
[] = {4, 3, 2, 1, 0, 7, 6, 5};
8730 unsigned BigEndianShifts
[] = {5, 6, 7, 0, 1, 2, 3, 4};
8733 uint32_t OriginalOrderLow
= 0x1234567;
8734 uint32_t OriginalOrderHigh
= 0x89ABCDEF;
8735 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
8736 // 32-bit space, only need 4-bit nibbles per element.
8737 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
8738 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
8739 Mask
|= ((uint32_t)(N
->getMaskElt(i
* 2) / 2) << MaskShift
);
8742 // For each mask element, find out if we're just inserting something
8743 // from V2 into V1 or vice versa. Possible permutations inserting an element
8745 // X, 1, 2, 3, 4, 5, 6, 7
8746 // 0, X, 2, 3, 4, 5, 6, 7
8747 // 0, 1, X, 3, 4, 5, 6, 7
8748 // 0, 1, 2, X, 4, 5, 6, 7
8749 // 0, 1, 2, 3, X, 5, 6, 7
8750 // 0, 1, 2, 3, 4, X, 6, 7
8751 // 0, 1, 2, 3, 4, 5, X, 7
8752 // 0, 1, 2, 3, 4, 5, 6, X
8753 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8755 bool FoundCandidate
= false;
8756 // Go through the mask of half-words to find an element that's being moved
8757 // from one vector to the other.
8758 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
8759 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
8760 uint32_t MaskOneElt
= (Mask
>> MaskShift
) & 0xF;
8761 uint32_t MaskOtherElts
= ~(0xF << MaskShift
);
8762 uint32_t TargetOrder
= 0x0;
8764 // If both vector operands for the shuffle are the same vector, the mask
8765 // will contain only elements from the first one and the second one will be
8769 unsigned VINSERTHSrcElem
= IsLE
? 4 : 3;
8770 TargetOrder
= OriginalOrderLow
;
8772 // Skip if not the correct element or mask of other elements don't equal
8773 // to our expected order.
8774 if (MaskOneElt
== VINSERTHSrcElem
&&
8775 (Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
8776 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
8777 FoundCandidate
= true;
8780 } else { // If both operands are defined.
8781 // Target order is [8,15] if the current mask is between [0,7].
8783 (MaskOneElt
< NumHalfWords
) ? OriginalOrderHigh
: OriginalOrderLow
;
8784 // Skip if mask of other elements don't equal our expected order.
8785 if ((Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
8786 // We only need the last 3 bits for the number of shifts.
8787 ShiftElts
= IsLE
? LittleEndianShifts
[MaskOneElt
& 0x7]
8788 : BigEndianShifts
[MaskOneElt
& 0x7];
8789 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
8790 Swap
= MaskOneElt
< NumHalfWords
;
8791 FoundCandidate
= true;
8797 if (!FoundCandidate
)
8800 // Candidate found, construct the proper SDAG sequence with VINSERTH,
8801 // optionally with VECSHL if shift is required.
8806 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
8808 // Double ShiftElts because we're left shifting on v16i8 type.
8809 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
8810 DAG
.getConstant(2 * ShiftElts
, dl
, MVT::i32
));
8811 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, Shl
);
8812 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
8813 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8814 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8816 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V2
);
8817 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
8818 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8819 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8822 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
8823 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
8824 /// return the code it can be lowered into. Worst case, it can always be
8825 /// lowered into a vperm.
8826 SDValue
PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
,
8827 SelectionDAG
&DAG
) const {
8829 SDValue V1
= Op
.getOperand(0);
8830 SDValue V2
= Op
.getOperand(1);
8831 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(Op
);
8832 EVT VT
= Op
.getValueType();
8833 bool isLittleEndian
= Subtarget
.isLittleEndian();
8835 unsigned ShiftElts
, InsertAtByte
;
8838 // If this is a load-and-splat, we can do that with a single instruction
8839 // in some cases. However if the load has multiple uses, we don't want to
8840 // combine it because that will just produce multiple loads.
8841 const SDValue
*InputLoad
= getNormalLoadInput(V1
);
8842 if (InputLoad
&& Subtarget
.hasVSX() && V2
.isUndef() &&
8843 (PPC::isSplatShuffleMask(SVOp
, 4) || PPC::isSplatShuffleMask(SVOp
, 8)) &&
8844 InputLoad
->hasOneUse()) {
8845 bool IsFourByte
= PPC::isSplatShuffleMask(SVOp
, 4);
8847 PPC::getSplatIdxForPPCMnemonics(SVOp
, IsFourByte
? 4 : 8, DAG
);
8849 LoadSDNode
*LD
= cast
<LoadSDNode
>(*InputLoad
);
8850 // For 4-byte load-and-splat, we need Power9.
8851 if ((IsFourByte
&& Subtarget
.hasP9Vector()) || !IsFourByte
) {
8852 uint64_t Offset
= 0;
8854 Offset
= isLittleEndian
? (3 - SplatIdx
) * 4 : SplatIdx
* 4;
8856 Offset
= isLittleEndian
? (1 - SplatIdx
) * 8 : SplatIdx
* 8;
8857 SDValue BasePtr
= LD
->getBasePtr();
8859 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(DAG
.getDataLayout()),
8860 BasePtr
, DAG
.getIntPtrConstant(Offset
, dl
));
8862 LD
->getChain(), // Chain
8864 DAG
.getValueType(Op
.getValueType()) // VT
8867 DAG
.getVTList(IsFourByte
? MVT::v4i32
: MVT::v2i64
, MVT::Other
);
8869 DAG
.getMemIntrinsicNode(PPCISD::LD_SPLAT
, dl
, VTL
,
8870 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
8871 if (LdSplt
.getValueType() != SVOp
->getValueType(0))
8872 LdSplt
= DAG
.getBitcast(SVOp
->getValueType(0), LdSplt
);
8876 if (Subtarget
.hasP9Vector() &&
8877 PPC::isXXINSERTWMask(SVOp
, ShiftElts
, InsertAtByte
, Swap
,
8881 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8882 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
);
8884 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv2
, Conv2
,
8885 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8886 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Shl
,
8887 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8888 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8890 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Conv2
,
8891 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8892 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8895 if (Subtarget
.hasP9Altivec()) {
8897 if ((NewISDNode
= lowerToVINSERTH(SVOp
, DAG
)))
8900 if ((NewISDNode
= lowerToVINSERTB(SVOp
, DAG
)))
8904 if (Subtarget
.hasVSX() &&
8905 PPC::isXXSLDWIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
8908 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8910 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
.isUndef() ? V1
: V2
);
8912 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv1
, Conv2
,
8913 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8914 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Shl
);
8917 if (Subtarget
.hasVSX() &&
8918 PPC::isXXPERMDIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
8921 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
8923 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V2
.isUndef() ? V1
: V2
);
8925 SDValue PermDI
= DAG
.getNode(PPCISD::XXPERMDI
, dl
, MVT::v2i64
, Conv1
, Conv2
,
8926 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8927 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, PermDI
);
8930 if (Subtarget
.hasP9Vector()) {
8931 if (PPC::isXXBRHShuffleMask(SVOp
)) {
8932 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
8933 SDValue ReveHWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v8i16
, Conv
);
8934 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveHWord
);
8935 } else if (PPC::isXXBRWShuffleMask(SVOp
)) {
8936 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8937 SDValue ReveWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v4i32
, Conv
);
8938 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveWord
);
8939 } else if (PPC::isXXBRDShuffleMask(SVOp
)) {
8940 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
8941 SDValue ReveDWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v2i64
, Conv
);
8942 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveDWord
);
8943 } else if (PPC::isXXBRQShuffleMask(SVOp
)) {
8944 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v1i128
, V1
);
8945 SDValue ReveQWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v1i128
, Conv
);
8946 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveQWord
);
8950 if (Subtarget
.hasVSX()) {
8951 if (V2
.isUndef() && PPC::isSplatShuffleMask(SVOp
, 4)) {
8952 int SplatIdx
= PPC::getSplatIdxForPPCMnemonics(SVOp
, 4, DAG
);
8954 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8955 SDValue Splat
= DAG
.getNode(PPCISD::XXSPLT
, dl
, MVT::v4i32
, Conv
,
8956 DAG
.getConstant(SplatIdx
, dl
, MVT::i32
));
8957 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Splat
);
8960 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8961 if (V2
.isUndef() && PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) == 8) {
8962 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, V1
);
8963 SDValue Swap
= DAG
.getNode(PPCISD::SWAP_NO_CHAIN
, dl
, MVT::v2f64
, Conv
);
8964 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Swap
);
8968 if (Subtarget
.hasQPX()) {
8969 if (VT
.getVectorNumElements() != 4)
8972 if (V2
.isUndef()) V2
= V1
;
8974 int AlignIdx
= PPC::isQVALIGNIShuffleMask(SVOp
);
8975 if (AlignIdx
!= -1) {
8976 return DAG
.getNode(PPCISD::QVALIGNI
, dl
, VT
, V1
, V2
,
8977 DAG
.getConstant(AlignIdx
, dl
, MVT::i32
));
8978 } else if (SVOp
->isSplat()) {
8979 int SplatIdx
= SVOp
->getSplatIndex();
8980 if (SplatIdx
>= 4) {
8985 return DAG
.getNode(PPCISD::QVESPLATI
, dl
, VT
, V1
,
8986 DAG
.getConstant(SplatIdx
, dl
, MVT::i32
));
8989 // Lower this into a qvgpci/qvfperm pair.
8991 // Compute the qvgpci literal
8993 for (unsigned i
= 0; i
< 4; ++i
) {
8994 int m
= SVOp
->getMaskElt(i
);
8995 unsigned mm
= m
>= 0 ? (unsigned) m
: i
;
8996 idx
|= mm
<< (3-i
)*3;
8999 SDValue V3
= DAG
.getNode(PPCISD::QVGPCI
, dl
, MVT::v4f64
,
9000 DAG
.getConstant(idx
, dl
, MVT::i32
));
9001 return DAG
.getNode(PPCISD::QVFPERM
, dl
, VT
, V1
, V2
, V3
);
9004 // Cases that are handled by instructions that take permute immediates
9005 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
9006 // selected by the instruction selector.
9008 if (PPC::isSplatShuffleMask(SVOp
, 1) ||
9009 PPC::isSplatShuffleMask(SVOp
, 2) ||
9010 PPC::isSplatShuffleMask(SVOp
, 4) ||
9011 PPC::isVPKUWUMShuffleMask(SVOp
, 1, DAG
) ||
9012 PPC::isVPKUHUMShuffleMask(SVOp
, 1, DAG
) ||
9013 PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) != -1 ||
9014 PPC::isVMRGLShuffleMask(SVOp
, 1, 1, DAG
) ||
9015 PPC::isVMRGLShuffleMask(SVOp
, 2, 1, DAG
) ||
9016 PPC::isVMRGLShuffleMask(SVOp
, 4, 1, DAG
) ||
9017 PPC::isVMRGHShuffleMask(SVOp
, 1, 1, DAG
) ||
9018 PPC::isVMRGHShuffleMask(SVOp
, 2, 1, DAG
) ||
9019 PPC::isVMRGHShuffleMask(SVOp
, 4, 1, DAG
) ||
9020 (Subtarget
.hasP8Altivec() && (
9021 PPC::isVPKUDUMShuffleMask(SVOp
, 1, DAG
) ||
9022 PPC::isVMRGEOShuffleMask(SVOp
, true, 1, DAG
) ||
9023 PPC::isVMRGEOShuffleMask(SVOp
, false, 1, DAG
)))) {
9028 // Altivec has a variety of "shuffle immediates" that take two vector inputs
9029 // and produce a fixed permutation. If any of these match, do not lower to
9031 unsigned int ShuffleKind
= isLittleEndian
? 2 : 0;
9032 if (PPC::isVPKUWUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
9033 PPC::isVPKUHUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
9034 PPC::isVSLDOIShuffleMask(SVOp
, ShuffleKind
, DAG
) != -1 ||
9035 PPC::isVMRGLShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
9036 PPC::isVMRGLShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
9037 PPC::isVMRGLShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
9038 PPC::isVMRGHShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
9039 PPC::isVMRGHShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
9040 PPC::isVMRGHShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
9041 (Subtarget
.hasP8Altivec() && (
9042 PPC::isVPKUDUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
9043 PPC::isVMRGEOShuffleMask(SVOp
, true, ShuffleKind
, DAG
) ||
9044 PPC::isVMRGEOShuffleMask(SVOp
, false, ShuffleKind
, DAG
))))
9047 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
9048 // perfect shuffle table to emit an optimal matching sequence.
9049 ArrayRef
<int> PermMask
= SVOp
->getMask();
9051 unsigned PFIndexes
[4];
9052 bool isFourElementShuffle
= true;
9053 for (unsigned i
= 0; i
!= 4 && isFourElementShuffle
; ++i
) { // Element number
9054 unsigned EltNo
= 8; // Start out undef.
9055 for (unsigned j
= 0; j
!= 4; ++j
) { // Intra-element byte.
9056 if (PermMask
[i
*4+j
] < 0)
9057 continue; // Undef, ignore it.
9059 unsigned ByteSource
= PermMask
[i
*4+j
];
9060 if ((ByteSource
& 3) != j
) {
9061 isFourElementShuffle
= false;
9066 EltNo
= ByteSource
/4;
9067 } else if (EltNo
!= ByteSource
/4) {
9068 isFourElementShuffle
= false;
9072 PFIndexes
[i
] = EltNo
;
9075 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
9076 // perfect shuffle vector to determine if it is cost effective to do this as
9077 // discrete instructions, or whether we should use a vperm.
9078 // For now, we skip this for little endian until such time as we have a
9079 // little-endian perfect shuffle table.
9080 if (isFourElementShuffle
&& !isLittleEndian
) {
9081 // Compute the index in the perfect shuffle table.
9082 unsigned PFTableIndex
=
9083 PFIndexes
[0]*9*9*9+PFIndexes
[1]*9*9+PFIndexes
[2]*9+PFIndexes
[3];
9085 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
9086 unsigned Cost
= (PFEntry
>> 30);
9088 // Determining when to avoid vperm is tricky. Many things affect the cost
9089 // of vperm, particularly how many times the perm mask needs to be computed.
9090 // For example, if the perm mask can be hoisted out of a loop or is already
9091 // used (perhaps because there are multiple permutes with the same shuffle
9092 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
9093 // the loop requires an extra register.
9095 // As a compromise, we only emit discrete instructions if the shuffle can be
9096 // generated in 3 or fewer operations. When we have loop information
9097 // available, if this block is within a loop, we should avoid using vperm
9098 // for 3-operation perms and use a constant pool load instead.
9100 return GeneratePerfectShuffle(PFEntry
, V1
, V2
, DAG
, dl
);
9103 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9104 // vector that will get spilled to the constant pool.
9105 if (V2
.isUndef()) V2
= V1
;
9107 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9108 // that it is in input element units, not in bytes. Convert now.
9110 // For little endian, the order of the input vectors is reversed, and
9111 // the permutation mask is complemented with respect to 31. This is
9112 // necessary to produce proper semantics with the big-endian-biased vperm
9114 EVT EltVT
= V1
.getValueType().getVectorElementType();
9115 unsigned BytesPerElement
= EltVT
.getSizeInBits()/8;
9117 SmallVector
<SDValue
, 16> ResultMask
;
9118 for (unsigned i
= 0, e
= VT
.getVectorNumElements(); i
!= e
; ++i
) {
9119 unsigned SrcElt
= PermMask
[i
] < 0 ? 0 : PermMask
[i
];
9121 for (unsigned j
= 0; j
!= BytesPerElement
; ++j
)
9123 ResultMask
.push_back(DAG
.getConstant(31 - (SrcElt
*BytesPerElement
+ j
),
9126 ResultMask
.push_back(DAG
.getConstant(SrcElt
*BytesPerElement
+ j
, dl
,
9130 SDValue VPermMask
= DAG
.getBuildVector(MVT::v16i8
, dl
, ResultMask
);
9132 return DAG
.getNode(PPCISD::VPERM
, dl
, V1
.getValueType(),
9135 return DAG
.getNode(PPCISD::VPERM
, dl
, V1
.getValueType(),
9139 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9140 /// vector comparison. If it is, return true and fill in Opc/isDot with
9141 /// information about the intrinsic.
9142 static bool getVectorCompareInfo(SDValue Intrin
, int &CompareOpc
,
9143 bool &isDot
, const PPCSubtarget
&Subtarget
) {
9144 unsigned IntrinsicID
=
9145 cast
<ConstantSDNode
>(Intrin
.getOperand(0))->getZExtValue();
9148 switch (IntrinsicID
) {
9151 // Comparison predicates.
9152 case Intrinsic::ppc_altivec_vcmpbfp_p
:
9156 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
9160 case Intrinsic::ppc_altivec_vcmpequb_p
:
9164 case Intrinsic::ppc_altivec_vcmpequh_p
:
9168 case Intrinsic::ppc_altivec_vcmpequw_p
:
9172 case Intrinsic::ppc_altivec_vcmpequd_p
:
9173 if (Subtarget
.hasP8Altivec()) {
9179 case Intrinsic::ppc_altivec_vcmpneb_p
:
9180 case Intrinsic::ppc_altivec_vcmpneh_p
:
9181 case Intrinsic::ppc_altivec_vcmpnew_p
:
9182 case Intrinsic::ppc_altivec_vcmpnezb_p
:
9183 case Intrinsic::ppc_altivec_vcmpnezh_p
:
9184 case Intrinsic::ppc_altivec_vcmpnezw_p
:
9185 if (Subtarget
.hasP9Altivec()) {
9186 switch (IntrinsicID
) {
9188 llvm_unreachable("Unknown comparison intrinsic.");
9189 case Intrinsic::ppc_altivec_vcmpneb_p
:
9192 case Intrinsic::ppc_altivec_vcmpneh_p
:
9195 case Intrinsic::ppc_altivec_vcmpnew_p
:
9198 case Intrinsic::ppc_altivec_vcmpnezb_p
:
9201 case Intrinsic::ppc_altivec_vcmpnezh_p
:
9204 case Intrinsic::ppc_altivec_vcmpnezw_p
:
9212 case Intrinsic::ppc_altivec_vcmpgefp_p
:
9216 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
9220 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
9224 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
9228 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
9232 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
9233 if (Subtarget
.hasP8Altivec()) {
9239 case Intrinsic::ppc_altivec_vcmpgtub_p
:
9243 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
9247 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
9251 case Intrinsic::ppc_altivec_vcmpgtud_p
:
9252 if (Subtarget
.hasP8Altivec()) {
9259 // VSX predicate comparisons use the same infrastructure
9260 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
9261 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
9262 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
9263 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
9264 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
9265 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
9266 if (Subtarget
.hasVSX()) {
9267 switch (IntrinsicID
) {
9268 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
9271 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
9274 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
9277 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
9280 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
9283 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
9292 // Normal Comparisons.
9293 case Intrinsic::ppc_altivec_vcmpbfp
:
9296 case Intrinsic::ppc_altivec_vcmpeqfp
:
9299 case Intrinsic::ppc_altivec_vcmpequb
:
9302 case Intrinsic::ppc_altivec_vcmpequh
:
9305 case Intrinsic::ppc_altivec_vcmpequw
:
9308 case Intrinsic::ppc_altivec_vcmpequd
:
9309 if (Subtarget
.hasP8Altivec())
9314 case Intrinsic::ppc_altivec_vcmpneb
:
9315 case Intrinsic::ppc_altivec_vcmpneh
:
9316 case Intrinsic::ppc_altivec_vcmpnew
:
9317 case Intrinsic::ppc_altivec_vcmpnezb
:
9318 case Intrinsic::ppc_altivec_vcmpnezh
:
9319 case Intrinsic::ppc_altivec_vcmpnezw
:
9320 if (Subtarget
.hasP9Altivec())
9321 switch (IntrinsicID
) {
9323 llvm_unreachable("Unknown comparison intrinsic.");
9324 case Intrinsic::ppc_altivec_vcmpneb
:
9327 case Intrinsic::ppc_altivec_vcmpneh
:
9330 case Intrinsic::ppc_altivec_vcmpnew
:
9333 case Intrinsic::ppc_altivec_vcmpnezb
:
9336 case Intrinsic::ppc_altivec_vcmpnezh
:
9339 case Intrinsic::ppc_altivec_vcmpnezw
:
9346 case Intrinsic::ppc_altivec_vcmpgefp
:
9349 case Intrinsic::ppc_altivec_vcmpgtfp
:
9352 case Intrinsic::ppc_altivec_vcmpgtsb
:
9355 case Intrinsic::ppc_altivec_vcmpgtsh
:
9358 case Intrinsic::ppc_altivec_vcmpgtsw
:
9361 case Intrinsic::ppc_altivec_vcmpgtsd
:
9362 if (Subtarget
.hasP8Altivec())
9367 case Intrinsic::ppc_altivec_vcmpgtub
:
9370 case Intrinsic::ppc_altivec_vcmpgtuh
:
9373 case Intrinsic::ppc_altivec_vcmpgtuw
:
9376 case Intrinsic::ppc_altivec_vcmpgtud
:
9377 if (Subtarget
.hasP8Altivec())
9386 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9387 /// lower, do it, otherwise return null.
9388 SDValue
PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
9389 SelectionDAG
&DAG
) const {
9390 unsigned IntrinsicID
=
9391 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
9395 if (IntrinsicID
== Intrinsic::thread_pointer
) {
9396 // Reads the thread pointer register, used for __builtin_thread_pointer.
9397 if (Subtarget
.isPPC64())
9398 return DAG
.getRegister(PPC::X13
, MVT::i64
);
9399 return DAG
.getRegister(PPC::R2
, MVT::i32
);
9402 // If this is a lowered altivec predicate compare, CompareOpc is set to the
9403 // opcode number of the comparison.
9406 if (!getVectorCompareInfo(Op
, CompareOpc
, isDot
, Subtarget
))
9407 return SDValue(); // Don't custom lower most intrinsics.
9409 // If this is a non-dot comparison, make the VCMP node and we are done.
9411 SDValue Tmp
= DAG
.getNode(PPCISD::VCMP
, dl
, Op
.getOperand(2).getValueType(),
9412 Op
.getOperand(1), Op
.getOperand(2),
9413 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
));
9414 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Tmp
);
9417 // Create the PPCISD altivec 'dot' comparison node.
9419 Op
.getOperand(2), // LHS
9420 Op
.getOperand(3), // RHS
9421 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
9423 EVT VTs
[] = { Op
.getOperand(2).getValueType(), MVT::Glue
};
9424 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
);
9426 // Now that we have the comparison, emit a copy from the CR to a GPR.
9427 // This is flagged to the above dot comparison.
9428 SDValue Flags
= DAG
.getNode(PPCISD::MFOCRF
, dl
, MVT::i32
,
9429 DAG
.getRegister(PPC::CR6
, MVT::i32
),
9430 CompNode
.getValue(1));
9432 // Unpack the result based on how the target uses it.
9433 unsigned BitNo
; // Bit # of CR6.
9434 bool InvertBit
; // Invert result?
9435 switch (cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue()) {
9436 default: // Can't happen, don't crash on invalid number though.
9437 case 0: // Return the value of the EQ bit of CR6.
9438 BitNo
= 0; InvertBit
= false;
9440 case 1: // Return the inverted value of the EQ bit of CR6.
9441 BitNo
= 0; InvertBit
= true;
9443 case 2: // Return the value of the LT bit of CR6.
9444 BitNo
= 2; InvertBit
= false;
9446 case 3: // Return the inverted value of the LT bit of CR6.
9447 BitNo
= 2; InvertBit
= true;
9451 // Shift the bit into the low position.
9452 Flags
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
, Flags
,
9453 DAG
.getConstant(8 - (3 - BitNo
), dl
, MVT::i32
));
9455 Flags
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Flags
,
9456 DAG
.getConstant(1, dl
, MVT::i32
));
9458 // If we are supposed to, toggle the bit.
9460 Flags
= DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, Flags
,
9461 DAG
.getConstant(1, dl
, MVT::i32
));
9465 SDValue
PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
9466 SelectionDAG
&DAG
) const {
9467 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9468 // the beginning of the argument list.
9469 int ArgStart
= isa
<ConstantSDNode
>(Op
.getOperand(0)) ? 0 : 1;
9471 switch (cast
<ConstantSDNode
>(Op
.getOperand(ArgStart
))->getZExtValue()) {
9472 case Intrinsic::ppc_cfence
: {
9473 assert(ArgStart
== 1 && "llvm.ppc.cfence must carry a chain argument.");
9474 assert(Subtarget
.isPPC64() && "Only 64-bit is supported for now.");
9475 return SDValue(DAG
.getMachineNode(PPC::CFENCE8
, DL
, MVT::Other
,
9476 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
,
9477 Op
.getOperand(ArgStart
+ 1)),
9487 SDValue
PPCTargetLowering::LowerREM(SDValue Op
, SelectionDAG
&DAG
) const {
9488 // Check for a DIV with the same operands as this REM.
9489 for (auto UI
: Op
.getOperand(1)->uses()) {
9490 if ((Op
.getOpcode() == ISD::SREM
&& UI
->getOpcode() == ISD::SDIV
) ||
9491 (Op
.getOpcode() == ISD::UREM
&& UI
->getOpcode() == ISD::UDIV
))
9492 if (UI
->getOperand(0) == Op
.getOperand(0) &&
9493 UI
->getOperand(1) == Op
.getOperand(1))
9499 // Lower scalar BSWAP64 to xxbrd.
9500 SDValue
PPCTargetLowering::LowerBSWAP(SDValue Op
, SelectionDAG
&DAG
) const {
9503 Op
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
, Op
.getOperand(0),
9506 Op
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v2i64
, Op
);
9508 int VectorIndex
= 0;
9509 if (Subtarget
.isLittleEndian())
9511 Op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Op
,
9512 DAG
.getTargetConstant(VectorIndex
, dl
, MVT::i32
));
9516 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9517 // compared to a value that is atomically loaded (atomic loads zero-extend).
9518 SDValue
PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op
,
9519 SelectionDAG
&DAG
) const {
9520 assert(Op
.getOpcode() == ISD::ATOMIC_CMP_SWAP
&&
9521 "Expecting an atomic compare-and-swap here.");
9523 auto *AtomicNode
= cast
<AtomicSDNode
>(Op
.getNode());
9524 EVT MemVT
= AtomicNode
->getMemoryVT();
9525 if (MemVT
.getSizeInBits() >= 32)
9528 SDValue CmpOp
= Op
.getOperand(2);
9529 // If this is already correctly zero-extended, leave it alone.
9530 auto HighBits
= APInt::getHighBitsSet(32, 32 - MemVT
.getSizeInBits());
9531 if (DAG
.MaskedValueIsZero(CmpOp
, HighBits
))
9534 // Clear the high bits of the compare operand.
9535 unsigned MaskVal
= (1 << MemVT
.getSizeInBits()) - 1;
9537 DAG
.getNode(ISD::AND
, dl
, MVT::i32
, CmpOp
,
9538 DAG
.getConstant(MaskVal
, dl
, MVT::i32
));
9540 // Replace the existing compare operand with the properly zero-extended one.
9541 SmallVector
<SDValue
, 4> Ops
;
9542 for (int i
= 0, e
= AtomicNode
->getNumOperands(); i
< e
; i
++)
9543 Ops
.push_back(AtomicNode
->getOperand(i
));
9545 MachineMemOperand
*MMO
= AtomicNode
->getMemOperand();
9546 SDVTList Tys
= DAG
.getVTList(MVT::i32
, MVT::Other
);
9548 (MemVT
== MVT::i8
) ? PPCISD::ATOMIC_CMP_SWAP_8
: PPCISD::ATOMIC_CMP_SWAP_16
;
9549 return DAG
.getMemIntrinsicNode(NodeTy
, dl
, Tys
, Ops
, MemVT
, MMO
);
9552 SDValue
PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op
,
9553 SelectionDAG
&DAG
) const {
9555 // Create a stack slot that is 16-byte aligned.
9556 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9557 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9558 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9559 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9561 // Store the input value into Value#0 of the stack slot.
9562 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Op
.getOperand(0), FIdx
,
9563 MachinePointerInfo());
9565 return DAG
.getLoad(Op
.getValueType(), dl
, Store
, FIdx
, MachinePointerInfo());
9568 SDValue
PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
9569 SelectionDAG
&DAG
) const {
9570 assert(Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&&
9571 "Should only be called for ISD::INSERT_VECTOR_ELT");
9573 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
9574 // We have legal lowering for constant indices but not for variable ones.
9578 EVT VT
= Op
.getValueType();
9580 SDValue V1
= Op
.getOperand(0);
9581 SDValue V2
= Op
.getOperand(1);
9582 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9583 if (VT
== MVT::v8i16
|| VT
== MVT::v16i8
) {
9584 SDValue Mtvsrz
= DAG
.getNode(PPCISD::MTVSRZ
, dl
, VT
, V2
);
9585 unsigned BytesInEachElement
= VT
.getVectorElementType().getSizeInBits() / 8;
9586 unsigned InsertAtElement
= C
->getZExtValue();
9587 unsigned InsertAtByte
= InsertAtElement
* BytesInEachElement
;
9588 if (Subtarget
.isLittleEndian()) {
9589 InsertAtByte
= (16 - BytesInEachElement
) - InsertAtByte
;
9591 return DAG
.getNode(PPCISD::VECINSERT
, dl
, VT
, V1
, Mtvsrz
,
9592 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
9597 SDValue
PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
9598 SelectionDAG
&DAG
) const {
9600 SDNode
*N
= Op
.getNode();
9602 assert(N
->getOperand(0).getValueType() == MVT::v4i1
&&
9603 "Unknown extract_vector_elt type");
9605 SDValue Value
= N
->getOperand(0);
9607 // The first part of this is like the store lowering except that we don't
9608 // need to track the chain.
9610 // The values are now known to be -1 (false) or 1 (true). To convert this
9611 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9612 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9613 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
9615 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9616 // understand how to form the extending load.
9617 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
9619 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
9621 // Now convert to an integer and store.
9622 Value
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
9623 DAG
.getConstant(Intrinsic::ppc_qpx_qvfctiwu
, dl
, MVT::i32
),
9626 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9627 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9628 MachinePointerInfo PtrInfo
=
9629 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
9630 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9631 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9633 SDValue StoreChain
= DAG
.getEntryNode();
9634 SDValue Ops
[] = {StoreChain
,
9635 DAG
.getConstant(Intrinsic::ppc_qpx_qvstfiw
, dl
, MVT::i32
),
9637 SDVTList VTs
= DAG
.getVTList(/*chain*/ MVT::Other
);
9639 StoreChain
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
,
9640 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
9642 // Extract the value requested.
9643 unsigned Offset
= 4*cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
9644 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
9645 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
9648 DAG
.getLoad(MVT::i32
, dl
, StoreChain
, Idx
, PtrInfo
.getWithOffset(Offset
));
9650 if (!Subtarget
.useCRBits())
9653 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, IntVal
);
9656 /// Lowering for QPX v4i1 loads
9657 SDValue
PPCTargetLowering::LowerVectorLoad(SDValue Op
,
9658 SelectionDAG
&DAG
) const {
9660 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
9661 SDValue LoadChain
= LN
->getChain();
9662 SDValue BasePtr
= LN
->getBasePtr();
9664 if (Op
.getValueType() == MVT::v4f64
||
9665 Op
.getValueType() == MVT::v4f32
) {
9666 EVT MemVT
= LN
->getMemoryVT();
9667 unsigned Alignment
= LN
->getAlignment();
9669 // If this load is properly aligned, then it is legal.
9670 if (Alignment
>= MemVT
.getStoreSize())
9673 EVT ScalarVT
= Op
.getValueType().getScalarType(),
9674 ScalarMemVT
= MemVT
.getScalarType();
9675 unsigned Stride
= ScalarMemVT
.getStoreSize();
9677 SDValue Vals
[4], LoadChains
[4];
9678 for (unsigned Idx
= 0; Idx
< 4; ++Idx
) {
9680 if (ScalarVT
!= ScalarMemVT
)
9681 Load
= DAG
.getExtLoad(LN
->getExtensionType(), dl
, ScalarVT
, LoadChain
,
9683 LN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9684 ScalarMemVT
, MinAlign(Alignment
, Idx
* Stride
),
9685 LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9687 Load
= DAG
.getLoad(ScalarVT
, dl
, LoadChain
, BasePtr
,
9688 LN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9689 MinAlign(Alignment
, Idx
* Stride
),
9690 LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9692 if (Idx
== 0 && LN
->isIndexed()) {
9693 assert(LN
->getAddressingMode() == ISD::PRE_INC
&&
9694 "Unknown addressing mode on vector load");
9695 Load
= DAG
.getIndexedLoad(Load
, dl
, BasePtr
, LN
->getOffset(),
9696 LN
->getAddressingMode());
9700 LoadChains
[Idx
] = Load
.getValue(1);
9702 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
9703 DAG
.getConstant(Stride
, dl
,
9704 BasePtr
.getValueType()));
9707 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, LoadChains
);
9708 SDValue Value
= DAG
.getBuildVector(Op
.getValueType(), dl
, Vals
);
9710 if (LN
->isIndexed()) {
9711 SDValue RetOps
[] = { Value
, Vals
[0].getValue(1), TF
};
9712 return DAG
.getMergeValues(RetOps
, dl
);
9715 SDValue RetOps
[] = { Value
, TF
};
9716 return DAG
.getMergeValues(RetOps
, dl
);
9719 assert(Op
.getValueType() == MVT::v4i1
&& "Unknown load to lower");
9720 assert(LN
->isUnindexed() && "Indexed v4i1 loads are not supported");
9722 // To lower v4i1 from a byte array, we load the byte elements of the
9723 // vector and then reuse the BUILD_VECTOR logic.
9725 SDValue VectElmts
[4], VectElmtChains
[4];
9726 for (unsigned i
= 0; i
< 4; ++i
) {
9727 SDValue Idx
= DAG
.getConstant(i
, dl
, BasePtr
.getValueType());
9728 Idx
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
, Idx
);
9730 VectElmts
[i
] = DAG
.getExtLoad(
9731 ISD::EXTLOAD
, dl
, MVT::i32
, LoadChain
, Idx
,
9732 LN
->getPointerInfo().getWithOffset(i
), MVT::i8
,
9733 /* Alignment = */ 1, LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9734 VectElmtChains
[i
] = VectElmts
[i
].getValue(1);
9737 LoadChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, VectElmtChains
);
9738 SDValue Value
= DAG
.getBuildVector(MVT::v4i1
, dl
, VectElmts
);
9740 SDValue RVals
[] = { Value
, LoadChain
};
9741 return DAG
.getMergeValues(RVals
, dl
);
9744 /// Lowering for QPX v4i1 stores
9745 SDValue
PPCTargetLowering::LowerVectorStore(SDValue Op
,
9746 SelectionDAG
&DAG
) const {
9748 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
9749 SDValue StoreChain
= SN
->getChain();
9750 SDValue BasePtr
= SN
->getBasePtr();
9751 SDValue Value
= SN
->getValue();
9753 if (Value
.getValueType() == MVT::v4f64
||
9754 Value
.getValueType() == MVT::v4f32
) {
9755 EVT MemVT
= SN
->getMemoryVT();
9756 unsigned Alignment
= SN
->getAlignment();
9758 // If this store is properly aligned, then it is legal.
9759 if (Alignment
>= MemVT
.getStoreSize())
9762 EVT ScalarVT
= Value
.getValueType().getScalarType(),
9763 ScalarMemVT
= MemVT
.getScalarType();
9764 unsigned Stride
= ScalarMemVT
.getStoreSize();
9767 for (unsigned Idx
= 0; Idx
< 4; ++Idx
) {
9768 SDValue Ex
= DAG
.getNode(
9769 ISD::EXTRACT_VECTOR_ELT
, dl
, ScalarVT
, Value
,
9770 DAG
.getConstant(Idx
, dl
, getVectorIdxTy(DAG
.getDataLayout())));
9772 if (ScalarVT
!= ScalarMemVT
)
9774 DAG
.getTruncStore(StoreChain
, dl
, Ex
, BasePtr
,
9775 SN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9776 ScalarMemVT
, MinAlign(Alignment
, Idx
* Stride
),
9777 SN
->getMemOperand()->getFlags(), SN
->getAAInfo());
9779 Store
= DAG
.getStore(StoreChain
, dl
, Ex
, BasePtr
,
9780 SN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9781 MinAlign(Alignment
, Idx
* Stride
),
9782 SN
->getMemOperand()->getFlags(), SN
->getAAInfo());
9784 if (Idx
== 0 && SN
->isIndexed()) {
9785 assert(SN
->getAddressingMode() == ISD::PRE_INC
&&
9786 "Unknown addressing mode on vector store");
9787 Store
= DAG
.getIndexedStore(Store
, dl
, BasePtr
, SN
->getOffset(),
9788 SN
->getAddressingMode());
9791 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
9792 DAG
.getConstant(Stride
, dl
,
9793 BasePtr
.getValueType()));
9794 Stores
[Idx
] = Store
;
9797 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
9799 if (SN
->isIndexed()) {
9800 SDValue RetOps
[] = { TF
, Stores
[0].getValue(1) };
9801 return DAG
.getMergeValues(RetOps
, dl
);
9807 assert(SN
->isUnindexed() && "Indexed v4i1 stores are not supported");
9808 assert(Value
.getValueType() == MVT::v4i1
&& "Unknown store to lower");
9810 // The values are now known to be -1 (false) or 1 (true). To convert this
9811 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9812 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9813 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
9815 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9816 // understand how to form the extending load.
9817 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
9819 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
9821 // Now convert to an integer and store.
9822 Value
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
9823 DAG
.getConstant(Intrinsic::ppc_qpx_qvfctiwu
, dl
, MVT::i32
),
9826 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9827 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9828 MachinePointerInfo PtrInfo
=
9829 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
9830 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9831 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9833 SDValue Ops
[] = {StoreChain
,
9834 DAG
.getConstant(Intrinsic::ppc_qpx_qvstfiw
, dl
, MVT::i32
),
9836 SDVTList VTs
= DAG
.getVTList(/*chain*/ MVT::Other
);
9838 StoreChain
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
,
9839 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
9841 // Move data into the byte array.
9842 SDValue Loads
[4], LoadChains
[4];
9843 for (unsigned i
= 0; i
< 4; ++i
) {
9844 unsigned Offset
= 4*i
;
9845 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
9846 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
9848 Loads
[i
] = DAG
.getLoad(MVT::i32
, dl
, StoreChain
, Idx
,
9849 PtrInfo
.getWithOffset(Offset
));
9850 LoadChains
[i
] = Loads
[i
].getValue(1);
9853 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, LoadChains
);
9856 for (unsigned i
= 0; i
< 4; ++i
) {
9857 SDValue Idx
= DAG
.getConstant(i
, dl
, BasePtr
.getValueType());
9858 Idx
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
, Idx
);
9860 Stores
[i
] = DAG
.getTruncStore(
9861 StoreChain
, dl
, Loads
[i
], Idx
, SN
->getPointerInfo().getWithOffset(i
),
9862 MVT::i8
, /* Alignment = */ 1, SN
->getMemOperand()->getFlags(),
9866 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
9871 SDValue
PPCTargetLowering::LowerMUL(SDValue Op
, SelectionDAG
&DAG
) const {
9873 if (Op
.getValueType() == MVT::v4i32
) {
9874 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9876 SDValue Zero
= BuildSplatI( 0, 1, MVT::v4i32
, DAG
, dl
);
9877 SDValue Neg16
= BuildSplatI(-16, 4, MVT::v4i32
, DAG
, dl
);//+16 as shift amt.
9879 SDValue RHSSwap
= // = vrlw RHS, 16
9880 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw
, RHS
, Neg16
, DAG
, dl
);
9882 // Shrinkify inputs to v8i16.
9883 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, LHS
);
9884 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHS
);
9885 RHSSwap
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHSSwap
);
9887 // Low parts multiplied together, generating 32-bit results (we ignore the
9889 SDValue LoProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh
,
9890 LHS
, RHS
, DAG
, dl
, MVT::v4i32
);
9892 SDValue HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm
,
9893 LHS
, RHSSwap
, Zero
, DAG
, dl
, MVT::v4i32
);
9894 // Shift the high parts up 16 bits.
9895 HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, HiProd
,
9897 return DAG
.getNode(ISD::ADD
, dl
, MVT::v4i32
, LoProd
, HiProd
);
9898 } else if (Op
.getValueType() == MVT::v8i16
) {
9899 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9901 SDValue Zero
= BuildSplatI(0, 1, MVT::v8i16
, DAG
, dl
);
9903 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm
,
9904 LHS
, RHS
, Zero
, DAG
, dl
);
9905 } else if (Op
.getValueType() == MVT::v16i8
) {
9906 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9907 bool isLittleEndian
= Subtarget
.isLittleEndian();
9909 // Multiply the even 8-bit parts, producing 16-bit sums.
9910 SDValue EvenParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub
,
9911 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
9912 EvenParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, EvenParts
);
9914 // Multiply the odd 8-bit parts, producing 16-bit sums.
9915 SDValue OddParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub
,
9916 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
9917 OddParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OddParts
);
9919 // Merge the results together. Because vmuleub and vmuloub are
9920 // instructions with a big-endian bias, we must reverse the
9921 // element numbering and reverse the meaning of "odd" and "even"
9922 // when generating little endian code.
9924 for (unsigned i
= 0; i
!= 8; ++i
) {
9925 if (isLittleEndian
) {
9927 Ops
[i
*2+1] = 2*i
+16;
9930 Ops
[i
*2+1] = 2*i
+1+16;
9934 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, OddParts
, EvenParts
, Ops
);
9936 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, EvenParts
, OddParts
, Ops
);
9938 llvm_unreachable("Unknown mul to lower!");
9942 SDValue
PPCTargetLowering::LowerABS(SDValue Op
, SelectionDAG
&DAG
) const {
9944 assert(Op
.getOpcode() == ISD::ABS
&& "Should only be called for ISD::ABS");
9946 EVT VT
= Op
.getValueType();
9947 assert(VT
.isVector() &&
9948 "Only set vector abs as custom, scalar abs shouldn't reach here!");
9949 assert((VT
== MVT::v2i64
|| VT
== MVT::v4i32
|| VT
== MVT::v8i16
||
9950 VT
== MVT::v16i8
) &&
9951 "Unexpected vector element type!");
9952 assert((VT
!= MVT::v2i64
|| Subtarget
.hasP8Altivec()) &&
9953 "Current subtarget doesn't support smax v2i64!");
9955 // For vector abs, it can be lowered to:
9962 SDValue X
= Op
.getOperand(0);
9963 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
9964 SDValue Y
= DAG
.getNode(ISD::SUB
, dl
, VT
, Zero
, X
);
9966 // SMAX patch https://reviews.llvm.org/D47332
9967 // hasn't landed yet, so use intrinsic first here.
9968 // TODO: Should use SMAX directly once SMAX patch landed
9969 Intrinsic::ID BifID
= Intrinsic::ppc_altivec_vmaxsw
;
9970 if (VT
== MVT::v2i64
)
9971 BifID
= Intrinsic::ppc_altivec_vmaxsd
;
9972 else if (VT
== MVT::v8i16
)
9973 BifID
= Intrinsic::ppc_altivec_vmaxsh
;
9974 else if (VT
== MVT::v16i8
)
9975 BifID
= Intrinsic::ppc_altivec_vmaxsb
;
9977 return BuildIntrinsicOp(BifID
, X
, Y
, DAG
, dl
, VT
);
9980 // Custom lowering for fpext vf32 to v2f64
9981 SDValue
PPCTargetLowering::LowerFP_EXTEND(SDValue Op
, SelectionDAG
&DAG
) const {
9983 assert(Op
.getOpcode() == ISD::FP_EXTEND
&&
9984 "Should only be called for ISD::FP_EXTEND");
9986 // We only want to custom lower an extend from v2f32 to v2f64.
9987 if (Op
.getValueType() != MVT::v2f64
||
9988 Op
.getOperand(0).getValueType() != MVT::v2f32
)
9992 SDValue Op0
= Op
.getOperand(0);
9994 switch (Op0
.getOpcode()) {
9997 case ISD::EXTRACT_SUBVECTOR
: {
9998 assert(Op0
.getNumOperands() == 2 &&
9999 isa
<ConstantSDNode
>(Op0
->getOperand(1)) &&
10000 "Node should have 2 operands with second one being a constant!");
10002 if (Op0
.getOperand(0).getValueType() != MVT::v4f32
)
10005 // Custom lower is only done for high or low doubleword.
10006 int Idx
= cast
<ConstantSDNode
>(Op0
.getOperand(1))->getZExtValue();
10010 // Since input is v4f32, at this point Idx is either 0 or 2.
10011 // Shift to get the doubleword position we want.
10012 int DWord
= Idx
>> 1;
10014 // High and low word positions are different on little endian.
10015 if (Subtarget
.isLittleEndian())
10018 return DAG
.getNode(PPCISD::FP_EXTEND_HALF
, dl
, MVT::v2f64
,
10019 Op0
.getOperand(0), DAG
.getConstant(DWord
, dl
, MVT::i32
));
10024 SDValue NewLoad
[2];
10025 for (unsigned i
= 0, ie
= Op0
.getNumOperands(); i
!= ie
; ++i
) {
10026 // Ensure both input are loads.
10027 SDValue LdOp
= Op0
.getOperand(i
);
10028 if (LdOp
.getOpcode() != ISD::LOAD
)
10030 // Generate new load node.
10031 LoadSDNode
*LD
= cast
<LoadSDNode
>(LdOp
);
10032 SDValue LoadOps
[] = {LD
->getChain(), LD
->getBasePtr()};
10033 NewLoad
[i
] = DAG
.getMemIntrinsicNode(
10034 PPCISD::LD_VSX_LH
, dl
, DAG
.getVTList(MVT::v4f32
, MVT::Other
), LoadOps
,
10035 LD
->getMemoryVT(), LD
->getMemOperand());
10038 DAG
.getNode(Op0
.getOpcode(), SDLoc(Op0
), MVT::v4f32
, NewLoad
[0],
10039 NewLoad
[1], Op0
.getNode()->getFlags());
10040 return DAG
.getNode(PPCISD::FP_EXTEND_HALF
, dl
, MVT::v2f64
, NewOp
,
10041 DAG
.getConstant(0, dl
, MVT::i32
));
10044 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op0
);
10045 SDValue LoadOps
[] = {LD
->getChain(), LD
->getBasePtr()};
10046 SDValue NewLd
= DAG
.getMemIntrinsicNode(
10047 PPCISD::LD_VSX_LH
, dl
, DAG
.getVTList(MVT::v4f32
, MVT::Other
), LoadOps
,
10048 LD
->getMemoryVT(), LD
->getMemOperand());
10049 return DAG
.getNode(PPCISD::FP_EXTEND_HALF
, dl
, MVT::v2f64
, NewLd
,
10050 DAG
.getConstant(0, dl
, MVT::i32
));
10053 llvm_unreachable("ERROR:Should return for all cases within swtich.");
10056 /// LowerOperation - Provide custom lowering hooks for some operations.
10058 SDValue
PPCTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
10059 switch (Op
.getOpcode()) {
10060 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
10061 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
10062 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
10063 case ISD::GlobalAddress
: return LowerGlobalAddress(Op
, DAG
);
10064 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
10065 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
10066 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
10067 case ISD::INIT_TRAMPOLINE
: return LowerINIT_TRAMPOLINE(Op
, DAG
);
10068 case ISD::ADJUST_TRAMPOLINE
: return LowerADJUST_TRAMPOLINE(Op
, DAG
);
10070 // Variable argument lowering.
10071 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
10072 case ISD::VAARG
: return LowerVAARG(Op
, DAG
);
10073 case ISD::VACOPY
: return LowerVACOPY(Op
, DAG
);
10075 case ISD::STACKRESTORE
: return LowerSTACKRESTORE(Op
, DAG
);
10076 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
10077 case ISD::GET_DYNAMIC_AREA_OFFSET
:
10078 return LowerGET_DYNAMIC_AREA_OFFSET(Op
, DAG
);
10080 // Exception handling lowering.
10081 case ISD::EH_DWARF_CFA
: return LowerEH_DWARF_CFA(Op
, DAG
);
10082 case ISD::EH_SJLJ_SETJMP
: return lowerEH_SJLJ_SETJMP(Op
, DAG
);
10083 case ISD::EH_SJLJ_LONGJMP
: return lowerEH_SJLJ_LONGJMP(Op
, DAG
);
10085 case ISD::LOAD
: return LowerLOAD(Op
, DAG
);
10086 case ISD::STORE
: return LowerSTORE(Op
, DAG
);
10087 case ISD::TRUNCATE
: return LowerTRUNCATE(Op
, DAG
);
10088 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
10089 case ISD::FP_TO_UINT
:
10090 case ISD::FP_TO_SINT
: return LowerFP_TO_INT(Op
, DAG
, SDLoc(Op
));
10091 case ISD::UINT_TO_FP
:
10092 case ISD::SINT_TO_FP
: return LowerINT_TO_FP(Op
, DAG
);
10093 case ISD::FLT_ROUNDS_
: return LowerFLT_ROUNDS_(Op
, DAG
);
10095 // Lower 64-bit shifts.
10096 case ISD::SHL_PARTS
: return LowerSHL_PARTS(Op
, DAG
);
10097 case ISD::SRL_PARTS
: return LowerSRL_PARTS(Op
, DAG
);
10098 case ISD::SRA_PARTS
: return LowerSRA_PARTS(Op
, DAG
);
10100 // Vector-related lowering.
10101 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
10102 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
10103 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
10104 case ISD::SCALAR_TO_VECTOR
: return LowerSCALAR_TO_VECTOR(Op
, DAG
);
10105 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
10106 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
10107 case ISD::MUL
: return LowerMUL(Op
, DAG
);
10108 case ISD::ABS
: return LowerABS(Op
, DAG
);
10109 case ISD::FP_EXTEND
: return LowerFP_EXTEND(Op
, DAG
);
10111 // For counter-based loop handling.
10112 case ISD::INTRINSIC_W_CHAIN
: return SDValue();
10114 case ISD::BITCAST
: return LowerBITCAST(Op
, DAG
);
10116 // Frame & Return address.
10117 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
10118 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
10120 case ISD::INTRINSIC_VOID
:
10121 return LowerINTRINSIC_VOID(Op
, DAG
);
10124 return LowerREM(Op
, DAG
);
10126 return LowerBSWAP(Op
, DAG
);
10127 case ISD::ATOMIC_CMP_SWAP
:
10128 return LowerATOMIC_CMP_SWAP(Op
, DAG
);
10132 void PPCTargetLowering::ReplaceNodeResults(SDNode
*N
,
10133 SmallVectorImpl
<SDValue
>&Results
,
10134 SelectionDAG
&DAG
) const {
10136 switch (N
->getOpcode()) {
10138 llvm_unreachable("Do not know how to custom type legalize this operation!");
10139 case ISD::READCYCLECOUNTER
: {
10140 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
10141 SDValue RTB
= DAG
.getNode(PPCISD::READ_TIME_BASE
, dl
, VTs
, N
->getOperand(0));
10143 Results
.push_back(RTB
);
10144 Results
.push_back(RTB
.getValue(1));
10145 Results
.push_back(RTB
.getValue(2));
10148 case ISD::INTRINSIC_W_CHAIN
: {
10149 if (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue() !=
10150 Intrinsic::loop_decrement
)
10153 assert(N
->getValueType(0) == MVT::i1
&&
10154 "Unexpected result type for CTR decrement intrinsic");
10155 EVT SVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(),
10156 N
->getValueType(0));
10157 SDVTList VTs
= DAG
.getVTList(SVT
, MVT::Other
);
10158 SDValue NewInt
= DAG
.getNode(N
->getOpcode(), dl
, VTs
, N
->getOperand(0),
10161 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewInt
));
10162 Results
.push_back(NewInt
.getValue(1));
10166 if (!Subtarget
.isSVR4ABI() || Subtarget
.isPPC64())
10169 EVT VT
= N
->getValueType(0);
10171 if (VT
== MVT::i64
) {
10172 SDValue NewNode
= LowerVAARG(SDValue(N
, 1), DAG
);
10174 Results
.push_back(NewNode
);
10175 Results
.push_back(NewNode
.getValue(1));
10179 case ISD::FP_TO_SINT
:
10180 case ISD::FP_TO_UINT
:
10181 // LowerFP_TO_INT() can only handle f32 and f64.
10182 if (N
->getOperand(0).getValueType() == MVT::ppcf128
)
10184 Results
.push_back(LowerFP_TO_INT(SDValue(N
, 0), DAG
, dl
));
10186 case ISD::TRUNCATE
: {
10187 EVT TrgVT
= N
->getValueType(0);
10188 EVT OpVT
= N
->getOperand(0).getValueType();
10189 if (TrgVT
.isVector() &&
10190 isOperationCustom(N
->getOpcode(), TrgVT
) &&
10191 OpVT
.getSizeInBits() <= 128 &&
10192 isPowerOf2_32(OpVT
.getVectorElementType().getSizeInBits()))
10193 Results
.push_back(LowerTRUNCATEVector(SDValue(N
, 0), DAG
));
10197 // Don't handle bitcast here.
10202 //===----------------------------------------------------------------------===//
10203 // Other Lowering Code
10204 //===----------------------------------------------------------------------===//
10206 static Instruction
* callIntrinsic(IRBuilder
<> &Builder
, Intrinsic::ID Id
) {
10207 Module
*M
= Builder
.GetInsertBlock()->getParent()->getParent();
10208 Function
*Func
= Intrinsic::getDeclaration(M
, Id
);
10209 return Builder
.CreateCall(Func
, {});
10212 // The mappings for emitLeading/TrailingFence is taken from
10213 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10214 Instruction
*PPCTargetLowering::emitLeadingFence(IRBuilder
<> &Builder
,
10216 AtomicOrdering Ord
) const {
10217 if (Ord
== AtomicOrdering::SequentiallyConsistent
)
10218 return callIntrinsic(Builder
, Intrinsic::ppc_sync
);
10219 if (isReleaseOrStronger(Ord
))
10220 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
10224 Instruction
*PPCTargetLowering::emitTrailingFence(IRBuilder
<> &Builder
,
10226 AtomicOrdering Ord
) const {
10227 if (Inst
->hasAtomicLoad() && isAcquireOrStronger(Ord
)) {
10228 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10229 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10230 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10231 if (isa
<LoadInst
>(Inst
) && Subtarget
.isPPC64())
10232 return Builder
.CreateCall(
10233 Intrinsic::getDeclaration(
10234 Builder
.GetInsertBlock()->getParent()->getParent(),
10235 Intrinsic::ppc_cfence
, {Inst
->getType()}),
10237 // FIXME: Can use isync for rmw operation.
10238 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
10243 MachineBasicBlock
*
10244 PPCTargetLowering::EmitAtomicBinary(MachineInstr
&MI
, MachineBasicBlock
*BB
,
10245 unsigned AtomicSize
,
10246 unsigned BinOpcode
,
10247 unsigned CmpOpcode
,
10248 unsigned CmpPred
) const {
10249 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10250 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10252 auto LoadMnemonic
= PPC::LDARX
;
10253 auto StoreMnemonic
= PPC::STDCX
;
10254 switch (AtomicSize
) {
10256 llvm_unreachable("Unexpected size of atomic entity");
10258 LoadMnemonic
= PPC::LBARX
;
10259 StoreMnemonic
= PPC::STBCX
;
10260 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
10263 LoadMnemonic
= PPC::LHARX
;
10264 StoreMnemonic
= PPC::STHCX
;
10265 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
10268 LoadMnemonic
= PPC::LWARX
;
10269 StoreMnemonic
= PPC::STWCX
;
10272 LoadMnemonic
= PPC::LDARX
;
10273 StoreMnemonic
= PPC::STDCX
;
10277 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10278 MachineFunction
*F
= BB
->getParent();
10279 MachineFunction::iterator It
= ++BB
->getIterator();
10281 Register dest
= MI
.getOperand(0).getReg();
10282 Register ptrA
= MI
.getOperand(1).getReg();
10283 Register ptrB
= MI
.getOperand(2).getReg();
10284 Register incr
= MI
.getOperand(3).getReg();
10285 DebugLoc dl
= MI
.getDebugLoc();
10287 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10288 MachineBasicBlock
*loop2MBB
=
10289 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
10290 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10291 F
->insert(It
, loopMBB
);
10293 F
->insert(It
, loop2MBB
);
10294 F
->insert(It
, exitMBB
);
10295 exitMBB
->splice(exitMBB
->begin(), BB
,
10296 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10297 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10299 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10300 Register TmpReg
= (!BinOpcode
) ? incr
:
10301 RegInfo
.createVirtualRegister( AtomicSize
== 8 ? &PPC::G8RCRegClass
10302 : &PPC::GPRCRegClass
);
10306 // fallthrough --> loopMBB
10307 BB
->addSuccessor(loopMBB
);
10310 // l[wd]arx dest, ptr
10311 // add r0, dest, incr
10312 // st[wd]cx. r0, ptr
10314 // fallthrough --> exitMBB
10318 // l[wd]arx dest, ptr
10319 // cmpl?[wd] incr, dest
10322 // st[wd]cx. dest, ptr
10324 // fallthrough --> exitMBB
10327 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
)
10328 .addReg(ptrA
).addReg(ptrB
);
10330 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
).addReg(incr
).addReg(dest
);
10332 // Signed comparisons of byte or halfword values must be sign-extended.
10333 if (CmpOpcode
== PPC::CMPW
&& AtomicSize
< 4) {
10334 Register ExtReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
10335 BuildMI(BB
, dl
, TII
->get(AtomicSize
== 1 ? PPC::EXTSB
: PPC::EXTSH
),
10336 ExtReg
).addReg(dest
);
10337 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10338 .addReg(incr
).addReg(ExtReg
);
10340 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10341 .addReg(incr
).addReg(dest
);
10343 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10344 .addImm(CmpPred
).addReg(PPC::CR0
).addMBB(exitMBB
);
10345 BB
->addSuccessor(loop2MBB
);
10346 BB
->addSuccessor(exitMBB
);
10349 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
10350 .addReg(TmpReg
).addReg(ptrA
).addReg(ptrB
);
10351 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10352 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(loopMBB
);
10353 BB
->addSuccessor(loopMBB
);
10354 BB
->addSuccessor(exitMBB
);
10362 MachineBasicBlock
*PPCTargetLowering::EmitPartwordAtomicBinary(
10363 MachineInstr
&MI
, MachineBasicBlock
*BB
,
10364 bool is8bit
, // operation
10365 unsigned BinOpcode
, unsigned CmpOpcode
, unsigned CmpPred
) const {
10366 // If we support part-word atomic mnemonics, just use them
10367 if (Subtarget
.hasPartwordAtomics())
10368 return EmitAtomicBinary(MI
, BB
, is8bit
? 1 : 2, BinOpcode
, CmpOpcode
,
10371 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10372 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10373 // In 64 bit mode we have to use 64 bits for addresses, even though the
10374 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
10375 // registers without caring whether they're 32 or 64, but here we're
10376 // doing actual arithmetic on the addresses.
10377 bool is64bit
= Subtarget
.isPPC64();
10378 bool isLittleEndian
= Subtarget
.isLittleEndian();
10379 unsigned ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
10381 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10382 MachineFunction
*F
= BB
->getParent();
10383 MachineFunction::iterator It
= ++BB
->getIterator();
10385 Register dest
= MI
.getOperand(0).getReg();
10386 Register ptrA
= MI
.getOperand(1).getReg();
10387 Register ptrB
= MI
.getOperand(2).getReg();
10388 Register incr
= MI
.getOperand(3).getReg();
10389 DebugLoc dl
= MI
.getDebugLoc();
10391 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10392 MachineBasicBlock
*loop2MBB
=
10393 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
10394 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10395 F
->insert(It
, loopMBB
);
10397 F
->insert(It
, loop2MBB
);
10398 F
->insert(It
, exitMBB
);
10399 exitMBB
->splice(exitMBB
->begin(), BB
,
10400 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10401 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10403 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10404 const TargetRegisterClass
*RC
=
10405 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
10406 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
10408 Register PtrReg
= RegInfo
.createVirtualRegister(RC
);
10409 Register Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
10410 Register ShiftReg
=
10411 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
10412 Register Incr2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10413 Register MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
10414 Register Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10415 Register Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
10416 Register Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10417 Register Tmp3Reg
= RegInfo
.createVirtualRegister(GPRC
);
10418 Register Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
10419 Register TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
10422 (!BinOpcode
) ? Incr2Reg
: RegInfo
.createVirtualRegister(GPRC
);
10426 // fallthrough --> loopMBB
10427 BB
->addSuccessor(loopMBB
);
10429 // The 4-byte load must be aligned, while a char or short may be
10430 // anywhere in the word. Hence all this nasty bookkeeping code.
10431 // add ptr1, ptrA, ptrB [copy if ptrA==0]
10432 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10433 // xori shift, shift1, 24 [16]
10434 // rlwinm ptr, ptr1, 0, 0, 29
10435 // slw incr2, incr, shift
10436 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10437 // slw mask, mask2, shift
10439 // lwarx tmpDest, ptr
10440 // add tmp, tmpDest, incr2
10441 // andc tmp2, tmpDest, mask
10442 // and tmp3, tmp, mask
10443 // or tmp4, tmp3, tmp2
10444 // stwcx. tmp4, ptr
10446 // fallthrough --> exitMBB
10447 // srw dest, tmpDest, shift
10448 if (ptrA
!= ZeroReg
) {
10449 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
10450 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
10456 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10458 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
10459 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
10462 .addImm(is8bit
? 28 : 27);
10463 if (!isLittleEndian
)
10464 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
10466 .addImm(is8bit
? 24 : 16);
10468 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
10473 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
10478 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), Incr2Reg
).addReg(incr
).addReg(ShiftReg
);
10480 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
10482 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
10483 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
10487 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
10492 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
10496 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
)
10498 .addReg(TmpDestReg
);
10499 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
10500 .addReg(TmpDestReg
)
10502 BuildMI(BB
, dl
, TII
->get(PPC::AND
), Tmp3Reg
).addReg(TmpReg
).addReg(MaskReg
);
10504 // For unsigned comparisons, we can directly compare the shifted values.
10505 // For signed comparisons we shift and sign extend.
10506 Register SReg
= RegInfo
.createVirtualRegister(GPRC
);
10507 BuildMI(BB
, dl
, TII
->get(PPC::AND
), SReg
)
10508 .addReg(TmpDestReg
)
10510 unsigned ValueReg
= SReg
;
10511 unsigned CmpReg
= Incr2Reg
;
10512 if (CmpOpcode
== PPC::CMPW
) {
10513 ValueReg
= RegInfo
.createVirtualRegister(GPRC
);
10514 BuildMI(BB
, dl
, TII
->get(PPC::SRW
), ValueReg
)
10517 Register ValueSReg
= RegInfo
.createVirtualRegister(GPRC
);
10518 BuildMI(BB
, dl
, TII
->get(is8bit
? PPC::EXTSB
: PPC::EXTSH
), ValueSReg
)
10520 ValueReg
= ValueSReg
;
10523 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10526 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10530 BB
->addSuccessor(loop2MBB
);
10531 BB
->addSuccessor(exitMBB
);
10534 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
).addReg(Tmp3Reg
).addReg(Tmp2Reg
);
10535 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
10539 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10540 .addImm(PPC::PRED_NE
)
10543 BB
->addSuccessor(loopMBB
);
10544 BB
->addSuccessor(exitMBB
);
10549 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), dest
)
10550 .addReg(TmpDestReg
)
10555 llvm::MachineBasicBlock
*
10556 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr
&MI
,
10557 MachineBasicBlock
*MBB
) const {
10558 DebugLoc DL
= MI
.getDebugLoc();
10559 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10560 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
10562 MachineFunction
*MF
= MBB
->getParent();
10563 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
10565 const BasicBlock
*BB
= MBB
->getBasicBlock();
10566 MachineFunction::iterator I
= ++MBB
->getIterator();
10568 Register DstReg
= MI
.getOperand(0).getReg();
10569 const TargetRegisterClass
*RC
= MRI
.getRegClass(DstReg
);
10570 assert(TRI
->isTypeLegalForClass(*RC
, MVT::i32
) && "Invalid destination!");
10571 Register mainDstReg
= MRI
.createVirtualRegister(RC
);
10572 Register restoreDstReg
= MRI
.createVirtualRegister(RC
);
10574 MVT PVT
= getPointerTy(MF
->getDataLayout());
10575 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
10576 "Invalid Pointer Size!");
10577 // For v = setjmp(buf), we generate
10580 // SjLjSetup mainMBB
10586 // buf[LabelOffset] = LR
10590 // v = phi(main, restore)
10593 MachineBasicBlock
*thisMBB
= MBB
;
10594 MachineBasicBlock
*mainMBB
= MF
->CreateMachineBasicBlock(BB
);
10595 MachineBasicBlock
*sinkMBB
= MF
->CreateMachineBasicBlock(BB
);
10596 MF
->insert(I
, mainMBB
);
10597 MF
->insert(I
, sinkMBB
);
10599 MachineInstrBuilder MIB
;
10601 // Transfer the remainder of BB and its successor edges to sinkMBB.
10602 sinkMBB
->splice(sinkMBB
->begin(), MBB
,
10603 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
10604 sinkMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
10606 // Note that the structure of the jmp_buf used here is not compatible
10607 // with that used by libc, and is not designed to be. Specifically, it
10608 // stores only those 'reserved' registers that LLVM does not otherwise
10609 // understand how to spill. Also, by convention, by the time this
10610 // intrinsic is called, Clang has already stored the frame address in the
10611 // first slot of the buffer and stack address in the third. Following the
10612 // X86 target code, we'll store the jump address in the second slot. We also
10613 // need to save the TOC pointer (R2) to handle jumps between shared
10614 // libraries, and that will be stored in the fourth slot. The thread
10615 // identifier (R13) is not affected.
10618 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
10619 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
10620 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
10622 // Prepare IP either in reg.
10623 const TargetRegisterClass
*PtrRC
= getRegClassFor(PVT
);
10624 Register LabelReg
= MRI
.createVirtualRegister(PtrRC
);
10625 Register BufReg
= MI
.getOperand(1).getReg();
10627 if (Subtarget
.is64BitELFABI()) {
10628 setUsesTOCBasePtr(*MBB
->getParent());
10629 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::STD
))
10636 // Naked functions never have a base pointer, and so we use r1. For all
10637 // other functions, this decision must be delayed until during PEI.
10639 if (MF
->getFunction().hasFnAttribute(Attribute::Naked
))
10640 BaseReg
= Subtarget
.isPPC64() ? PPC::X1
: PPC::R1
;
10642 BaseReg
= Subtarget
.isPPC64() ? PPC::BP8
: PPC::BP
;
10644 MIB
= BuildMI(*thisMBB
, MI
, DL
,
10645 TII
->get(Subtarget
.isPPC64() ? PPC::STD
: PPC::STW
))
10652 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::BCLalways
)).addMBB(mainMBB
);
10653 MIB
.addRegMask(TRI
->getNoPreservedMask());
10655 BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::LI
), restoreDstReg
).addImm(1);
10657 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::EH_SjLj_Setup
))
10659 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::B
)).addMBB(sinkMBB
);
10661 thisMBB
->addSuccessor(mainMBB
, BranchProbability::getZero());
10662 thisMBB
->addSuccessor(sinkMBB
, BranchProbability::getOne());
10667 BuildMI(mainMBB
, DL
,
10668 TII
->get(Subtarget
.isPPC64() ? PPC::MFLR8
: PPC::MFLR
), LabelReg
);
10671 if (Subtarget
.isPPC64()) {
10672 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STD
))
10674 .addImm(LabelOffset
)
10677 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STW
))
10679 .addImm(LabelOffset
)
10682 MIB
.cloneMemRefs(MI
);
10684 BuildMI(mainMBB
, DL
, TII
->get(PPC::LI
), mainDstReg
).addImm(0);
10685 mainMBB
->addSuccessor(sinkMBB
);
10688 BuildMI(*sinkMBB
, sinkMBB
->begin(), DL
,
10689 TII
->get(PPC::PHI
), DstReg
)
10690 .addReg(mainDstReg
).addMBB(mainMBB
)
10691 .addReg(restoreDstReg
).addMBB(thisMBB
);
10693 MI
.eraseFromParent();
10697 MachineBasicBlock
*
10698 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr
&MI
,
10699 MachineBasicBlock
*MBB
) const {
10700 DebugLoc DL
= MI
.getDebugLoc();
10701 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10703 MachineFunction
*MF
= MBB
->getParent();
10704 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
10706 MVT PVT
= getPointerTy(MF
->getDataLayout());
10707 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
10708 "Invalid Pointer Size!");
10710 const TargetRegisterClass
*RC
=
10711 (PVT
== MVT::i64
) ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
10712 Register Tmp
= MRI
.createVirtualRegister(RC
);
10713 // Since FP is only updated here but NOT referenced, it's treated as GPR.
10714 unsigned FP
= (PVT
== MVT::i64
) ? PPC::X31
: PPC::R31
;
10715 unsigned SP
= (PVT
== MVT::i64
) ? PPC::X1
: PPC::R1
;
10719 : (Subtarget
.isSVR4ABI() && isPositionIndependent() ? PPC::R29
10722 MachineInstrBuilder MIB
;
10724 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
10725 const int64_t SPOffset
= 2 * PVT
.getStoreSize();
10726 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
10727 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
10729 Register BufReg
= MI
.getOperand(0).getReg();
10731 // Reload FP (the jumped-to function may not have had a
10732 // frame pointer, and if so, then its r31 will be restored
10734 if (PVT
== MVT::i64
) {
10735 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), FP
)
10739 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), FP
)
10743 MIB
.cloneMemRefs(MI
);
10746 if (PVT
== MVT::i64
) {
10747 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), Tmp
)
10748 .addImm(LabelOffset
)
10751 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), Tmp
)
10752 .addImm(LabelOffset
)
10755 MIB
.cloneMemRefs(MI
);
10758 if (PVT
== MVT::i64
) {
10759 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), SP
)
10763 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), SP
)
10767 MIB
.cloneMemRefs(MI
);
10770 if (PVT
== MVT::i64
) {
10771 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), BP
)
10775 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), BP
)
10779 MIB
.cloneMemRefs(MI
);
10782 if (PVT
== MVT::i64
&& Subtarget
.isSVR4ABI()) {
10783 setUsesTOCBasePtr(*MBB
->getParent());
10784 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), PPC::X2
)
10791 BuildMI(*MBB
, MI
, DL
,
10792 TII
->get(PVT
== MVT::i64
? PPC::MTCTR8
: PPC::MTCTR
)).addReg(Tmp
);
10793 BuildMI(*MBB
, MI
, DL
, TII
->get(PVT
== MVT::i64
? PPC::BCTR8
: PPC::BCTR
));
10795 MI
.eraseFromParent();
10799 MachineBasicBlock
*
10800 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
10801 MachineBasicBlock
*BB
) const {
10802 if (MI
.getOpcode() == TargetOpcode::STACKMAP
||
10803 MI
.getOpcode() == TargetOpcode::PATCHPOINT
) {
10804 if (Subtarget
.is64BitELFABI() &&
10805 MI
.getOpcode() == TargetOpcode::PATCHPOINT
) {
10806 // Call lowering should have added an r2 operand to indicate a dependence
10807 // on the TOC base pointer value. It can't however, because there is no
10808 // way to mark the dependence as implicit there, and so the stackmap code
10809 // will confuse it with a regular operand. Instead, add the dependence
10811 MI
.addOperand(MachineOperand::CreateReg(PPC::X2
, false, true));
10814 return emitPatchPoint(MI
, BB
);
10817 if (MI
.getOpcode() == PPC::EH_SjLj_SetJmp32
||
10818 MI
.getOpcode() == PPC::EH_SjLj_SetJmp64
) {
10819 return emitEHSjLjSetJmp(MI
, BB
);
10820 } else if (MI
.getOpcode() == PPC::EH_SjLj_LongJmp32
||
10821 MI
.getOpcode() == PPC::EH_SjLj_LongJmp64
) {
10822 return emitEHSjLjLongJmp(MI
, BB
);
10825 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10827 // To "insert" these instructions we actually have to insert their
10828 // control-flow patterns.
10829 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10830 MachineFunction::iterator It
= ++BB
->getIterator();
10832 MachineFunction
*F
= BB
->getParent();
10834 if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10835 MI
.getOpcode() == PPC::SELECT_CC_I8
|| MI
.getOpcode() == PPC::SELECT_I4
||
10836 MI
.getOpcode() == PPC::SELECT_I8
) {
10837 SmallVector
<MachineOperand
, 2> Cond
;
10838 if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10839 MI
.getOpcode() == PPC::SELECT_CC_I8
)
10840 Cond
.push_back(MI
.getOperand(4));
10842 Cond
.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET
));
10843 Cond
.push_back(MI
.getOperand(1));
10845 DebugLoc dl
= MI
.getDebugLoc();
10846 TII
->insertSelect(*BB
, MI
, dl
, MI
.getOperand(0).getReg(), Cond
,
10847 MI
.getOperand(2).getReg(), MI
.getOperand(3).getReg());
10848 } else if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10849 MI
.getOpcode() == PPC::SELECT_CC_I8
||
10850 MI
.getOpcode() == PPC::SELECT_CC_F4
||
10851 MI
.getOpcode() == PPC::SELECT_CC_F8
||
10852 MI
.getOpcode() == PPC::SELECT_CC_F16
||
10853 MI
.getOpcode() == PPC::SELECT_CC_QFRC
||
10854 MI
.getOpcode() == PPC::SELECT_CC_QSRC
||
10855 MI
.getOpcode() == PPC::SELECT_CC_QBRC
||
10856 MI
.getOpcode() == PPC::SELECT_CC_VRRC
||
10857 MI
.getOpcode() == PPC::SELECT_CC_VSFRC
||
10858 MI
.getOpcode() == PPC::SELECT_CC_VSSRC
||
10859 MI
.getOpcode() == PPC::SELECT_CC_VSRC
||
10860 MI
.getOpcode() == PPC::SELECT_CC_SPE4
||
10861 MI
.getOpcode() == PPC::SELECT_CC_SPE
||
10862 MI
.getOpcode() == PPC::SELECT_I4
||
10863 MI
.getOpcode() == PPC::SELECT_I8
||
10864 MI
.getOpcode() == PPC::SELECT_F4
||
10865 MI
.getOpcode() == PPC::SELECT_F8
||
10866 MI
.getOpcode() == PPC::SELECT_F16
||
10867 MI
.getOpcode() == PPC::SELECT_QFRC
||
10868 MI
.getOpcode() == PPC::SELECT_QSRC
||
10869 MI
.getOpcode() == PPC::SELECT_QBRC
||
10870 MI
.getOpcode() == PPC::SELECT_SPE
||
10871 MI
.getOpcode() == PPC::SELECT_SPE4
||
10872 MI
.getOpcode() == PPC::SELECT_VRRC
||
10873 MI
.getOpcode() == PPC::SELECT_VSFRC
||
10874 MI
.getOpcode() == PPC::SELECT_VSSRC
||
10875 MI
.getOpcode() == PPC::SELECT_VSRC
) {
10876 // The incoming instruction knows the destination vreg to set, the
10877 // condition code register to branch on, the true/false values to
10878 // select between, and a branch opcode to use.
10883 // cmpTY ccX, r1, r2
10885 // fallthrough --> copy0MBB
10886 MachineBasicBlock
*thisMBB
= BB
;
10887 MachineBasicBlock
*copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10888 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10889 DebugLoc dl
= MI
.getDebugLoc();
10890 F
->insert(It
, copy0MBB
);
10891 F
->insert(It
, sinkMBB
);
10893 // Transfer the remainder of BB and its successor edges to sinkMBB.
10894 sinkMBB
->splice(sinkMBB
->begin(), BB
,
10895 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10896 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10898 // Next, add the true and fallthrough blocks as its successors.
10899 BB
->addSuccessor(copy0MBB
);
10900 BB
->addSuccessor(sinkMBB
);
10902 if (MI
.getOpcode() == PPC::SELECT_I4
|| MI
.getOpcode() == PPC::SELECT_I8
||
10903 MI
.getOpcode() == PPC::SELECT_F4
|| MI
.getOpcode() == PPC::SELECT_F8
||
10904 MI
.getOpcode() == PPC::SELECT_F16
||
10905 MI
.getOpcode() == PPC::SELECT_SPE4
||
10906 MI
.getOpcode() == PPC::SELECT_SPE
||
10907 MI
.getOpcode() == PPC::SELECT_QFRC
||
10908 MI
.getOpcode() == PPC::SELECT_QSRC
||
10909 MI
.getOpcode() == PPC::SELECT_QBRC
||
10910 MI
.getOpcode() == PPC::SELECT_VRRC
||
10911 MI
.getOpcode() == PPC::SELECT_VSFRC
||
10912 MI
.getOpcode() == PPC::SELECT_VSSRC
||
10913 MI
.getOpcode() == PPC::SELECT_VSRC
) {
10914 BuildMI(BB
, dl
, TII
->get(PPC::BC
))
10915 .addReg(MI
.getOperand(1).getReg())
10918 unsigned SelectPred
= MI
.getOperand(4).getImm();
10919 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10920 .addImm(SelectPred
)
10921 .addReg(MI
.getOperand(1).getReg())
10926 // %FalseValue = ...
10927 // # fallthrough to sinkMBB
10930 // Update machine-CFG edges
10931 BB
->addSuccessor(sinkMBB
);
10934 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10937 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::PHI
), MI
.getOperand(0).getReg())
10938 .addReg(MI
.getOperand(3).getReg())
10940 .addReg(MI
.getOperand(2).getReg())
10942 } else if (MI
.getOpcode() == PPC::ReadTB
) {
10943 // To read the 64-bit time-base register on a 32-bit target, we read the
10944 // two halves. Should the counter have wrapped while it was being read, we
10945 // need to try again.
10948 // mfspr Rx,TBU # load from TBU
10949 // mfspr Ry,TB # load from TB
10950 // mfspr Rz,TBU # load from TBU
10951 // cmpw crX,Rx,Rz # check if 'old'='new'
10952 // bne readLoop # branch if they're not equal
10955 MachineBasicBlock
*readMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10956 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10957 DebugLoc dl
= MI
.getDebugLoc();
10958 F
->insert(It
, readMBB
);
10959 F
->insert(It
, sinkMBB
);
10961 // Transfer the remainder of BB and its successor edges to sinkMBB.
10962 sinkMBB
->splice(sinkMBB
->begin(), BB
,
10963 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10964 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10966 BB
->addSuccessor(readMBB
);
10969 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10970 Register ReadAgainReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
10971 Register LoReg
= MI
.getOperand(0).getReg();
10972 Register HiReg
= MI
.getOperand(1).getReg();
10974 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), HiReg
).addImm(269);
10975 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), LoReg
).addImm(268);
10976 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), ReadAgainReg
).addImm(269);
10978 Register CmpReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
10980 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), CmpReg
)
10982 .addReg(ReadAgainReg
);
10983 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10984 .addImm(PPC::PRED_NE
)
10988 BB
->addSuccessor(readMBB
);
10989 BB
->addSuccessor(sinkMBB
);
10990 } else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8
)
10991 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::ADD4
);
10992 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16
)
10993 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::ADD4
);
10994 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32
)
10995 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::ADD4
);
10996 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64
)
10997 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::ADD8
);
10999 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I8
)
11000 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::AND
);
11001 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I16
)
11002 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::AND
);
11003 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I32
)
11004 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::AND
);
11005 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I64
)
11006 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::AND8
);
11008 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I8
)
11009 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::OR
);
11010 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I16
)
11011 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::OR
);
11012 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I32
)
11013 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::OR
);
11014 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I64
)
11015 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::OR8
);
11017 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8
)
11018 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::XOR
);
11019 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16
)
11020 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::XOR
);
11021 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32
)
11022 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::XOR
);
11023 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64
)
11024 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::XOR8
);
11026 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8
)
11027 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::NAND
);
11028 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16
)
11029 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::NAND
);
11030 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32
)
11031 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::NAND
);
11032 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64
)
11033 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::NAND8
);
11035 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8
)
11036 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::SUBF
);
11037 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16
)
11038 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::SUBF
);
11039 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32
)
11040 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::SUBF
);
11041 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64
)
11042 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::SUBF8
);
11044 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8
)
11045 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_GE
);
11046 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16
)
11047 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_GE
);
11048 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32
)
11049 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_GE
);
11050 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64
)
11051 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_GE
);
11053 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8
)
11054 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_LE
);
11055 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16
)
11056 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_LE
);
11057 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32
)
11058 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_LE
);
11059 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64
)
11060 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_LE
);
11062 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8
)
11063 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_GE
);
11064 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16
)
11065 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_GE
);
11066 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32
)
11067 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_GE
);
11068 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64
)
11069 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_GE
);
11071 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8
)
11072 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_LE
);
11073 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16
)
11074 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_LE
);
11075 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32
)
11076 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_LE
);
11077 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64
)
11078 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_LE
);
11080 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I8
)
11081 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0);
11082 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I16
)
11083 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0);
11084 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I32
)
11085 BB
= EmitAtomicBinary(MI
, BB
, 4, 0);
11086 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I64
)
11087 BB
= EmitAtomicBinary(MI
, BB
, 8, 0);
11088 else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32
||
11089 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
||
11090 (Subtarget
.hasPartwordAtomics() &&
11091 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
) ||
11092 (Subtarget
.hasPartwordAtomics() &&
11093 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
)) {
11094 bool is64bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
;
11096 auto LoadMnemonic
= PPC::LDARX
;
11097 auto StoreMnemonic
= PPC::STDCX
;
11098 switch (MI
.getOpcode()) {
11100 llvm_unreachable("Compare and swap of unknown size");
11101 case PPC::ATOMIC_CMP_SWAP_I8
:
11102 LoadMnemonic
= PPC::LBARX
;
11103 StoreMnemonic
= PPC::STBCX
;
11104 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
11106 case PPC::ATOMIC_CMP_SWAP_I16
:
11107 LoadMnemonic
= PPC::LHARX
;
11108 StoreMnemonic
= PPC::STHCX
;
11109 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
11111 case PPC::ATOMIC_CMP_SWAP_I32
:
11112 LoadMnemonic
= PPC::LWARX
;
11113 StoreMnemonic
= PPC::STWCX
;
11115 case PPC::ATOMIC_CMP_SWAP_I64
:
11116 LoadMnemonic
= PPC::LDARX
;
11117 StoreMnemonic
= PPC::STDCX
;
11120 Register dest
= MI
.getOperand(0).getReg();
11121 Register ptrA
= MI
.getOperand(1).getReg();
11122 Register ptrB
= MI
.getOperand(2).getReg();
11123 Register oldval
= MI
.getOperand(3).getReg();
11124 Register newval
= MI
.getOperand(4).getReg();
11125 DebugLoc dl
= MI
.getDebugLoc();
11127 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11128 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11129 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11130 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11131 F
->insert(It
, loop1MBB
);
11132 F
->insert(It
, loop2MBB
);
11133 F
->insert(It
, midMBB
);
11134 F
->insert(It
, exitMBB
);
11135 exitMBB
->splice(exitMBB
->begin(), BB
,
11136 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
11137 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
11141 // fallthrough --> loopMBB
11142 BB
->addSuccessor(loop1MBB
);
11145 // l[bhwd]arx dest, ptr
11146 // cmp[wd] dest, oldval
11149 // st[bhwd]cx. newval, ptr
11153 // st[bhwd]cx. dest, ptr
11156 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
).addReg(ptrA
).addReg(ptrB
);
11157 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::CMPD
: PPC::CMPW
), PPC::CR0
)
11160 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11161 .addImm(PPC::PRED_NE
)
11164 BB
->addSuccessor(loop2MBB
);
11165 BB
->addSuccessor(midMBB
);
11168 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
11172 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11173 .addImm(PPC::PRED_NE
)
11176 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
11177 BB
->addSuccessor(loop1MBB
);
11178 BB
->addSuccessor(exitMBB
);
11181 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
11185 BB
->addSuccessor(exitMBB
);
11190 } else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
||
11191 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
) {
11192 // We must use 64-bit registers for addresses when targeting 64-bit,
11193 // since we're actually doing arithmetic on them. Other registers
11195 bool is64bit
= Subtarget
.isPPC64();
11196 bool isLittleEndian
= Subtarget
.isLittleEndian();
11197 bool is8bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
;
11199 Register dest
= MI
.getOperand(0).getReg();
11200 Register ptrA
= MI
.getOperand(1).getReg();
11201 Register ptrB
= MI
.getOperand(2).getReg();
11202 Register oldval
= MI
.getOperand(3).getReg();
11203 Register newval
= MI
.getOperand(4).getReg();
11204 DebugLoc dl
= MI
.getDebugLoc();
11206 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11207 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11208 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11209 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11210 F
->insert(It
, loop1MBB
);
11211 F
->insert(It
, loop2MBB
);
11212 F
->insert(It
, midMBB
);
11213 F
->insert(It
, exitMBB
);
11214 exitMBB
->splice(exitMBB
->begin(), BB
,
11215 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
11216 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
11218 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11219 const TargetRegisterClass
*RC
=
11220 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
11221 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
11223 Register PtrReg
= RegInfo
.createVirtualRegister(RC
);
11224 Register Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
11225 Register ShiftReg
=
11226 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
11227 Register NewVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11228 Register NewVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
11229 Register OldVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11230 Register OldVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
11231 Register MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
11232 Register Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11233 Register Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
11234 Register Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11235 Register Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
11236 Register TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
11238 Register TmpReg
= RegInfo
.createVirtualRegister(GPRC
);
11239 Register ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
11242 // fallthrough --> loopMBB
11243 BB
->addSuccessor(loop1MBB
);
11245 // The 4-byte load must be aligned, while a char or short may be
11246 // anywhere in the word. Hence all this nasty bookkeeping code.
11247 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11248 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11249 // xori shift, shift1, 24 [16]
11250 // rlwinm ptr, ptr1, 0, 0, 29
11251 // slw newval2, newval, shift
11252 // slw oldval2, oldval,shift
11253 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11254 // slw mask, mask2, shift
11255 // and newval3, newval2, mask
11256 // and oldval3, oldval2, mask
11258 // lwarx tmpDest, ptr
11259 // and tmp, tmpDest, mask
11260 // cmpw tmp, oldval3
11263 // andc tmp2, tmpDest, mask
11264 // or tmp4, tmp2, newval3
11265 // stwcx. tmp4, ptr
11269 // stwcx. tmpDest, ptr
11271 // srw dest, tmpDest, shift
11272 if (ptrA
!= ZeroReg
) {
11273 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
11274 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
11281 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11283 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
11284 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
11287 .addImm(is8bit
? 28 : 27);
11288 if (!isLittleEndian
)
11289 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
11291 .addImm(is8bit
? 24 : 16);
11293 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
11298 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
11303 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), NewVal2Reg
)
11306 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), OldVal2Reg
)
11310 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
11312 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
11313 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
11317 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
11320 BuildMI(BB
, dl
, TII
->get(PPC::AND
), NewVal3Reg
)
11321 .addReg(NewVal2Reg
)
11323 BuildMI(BB
, dl
, TII
->get(PPC::AND
), OldVal3Reg
)
11324 .addReg(OldVal2Reg
)
11328 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
11331 BuildMI(BB
, dl
, TII
->get(PPC::AND
), TmpReg
)
11332 .addReg(TmpDestReg
)
11334 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), PPC::CR0
)
11336 .addReg(OldVal3Reg
);
11337 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11338 .addImm(PPC::PRED_NE
)
11341 BB
->addSuccessor(loop2MBB
);
11342 BB
->addSuccessor(midMBB
);
11345 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
11346 .addReg(TmpDestReg
)
11348 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
)
11350 .addReg(NewVal3Reg
);
11351 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
11355 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11356 .addImm(PPC::PRED_NE
)
11359 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
11360 BB
->addSuccessor(loop1MBB
);
11361 BB
->addSuccessor(exitMBB
);
11364 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
11365 .addReg(TmpDestReg
)
11368 BB
->addSuccessor(exitMBB
);
11373 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), dest
)
11376 } else if (MI
.getOpcode() == PPC::FADDrtz
) {
11377 // This pseudo performs an FADD with rounding mode temporarily forced
11378 // to round-to-zero. We emit this via custom inserter since the FPSCR
11379 // is not modeled at the SelectionDAG level.
11380 Register Dest
= MI
.getOperand(0).getReg();
11381 Register Src1
= MI
.getOperand(1).getReg();
11382 Register Src2
= MI
.getOperand(2).getReg();
11383 DebugLoc dl
= MI
.getDebugLoc();
11385 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11386 Register MFFSReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
11388 // Save FPSCR value.
11389 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), MFFSReg
);
11391 // Set rounding mode to round-to-zero.
11392 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB1
)).addImm(31);
11393 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB0
)).addImm(30);
11395 // Perform addition.
11396 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::FADD
), Dest
).addReg(Src1
).addReg(Src2
);
11398 // Restore FPSCR value.
11399 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSFb
)).addImm(1).addReg(MFFSReg
);
11400 } else if (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT
||
11401 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT
||
11402 MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
||
11403 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT8
) {
11404 unsigned Opcode
= (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
||
11405 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT8
)
11408 bool isEQ
= (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT
||
11409 MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
);
11411 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11412 Register Dest
= RegInfo
.createVirtualRegister(
11413 Opcode
== PPC::ANDIo
? &PPC::GPRCRegClass
: &PPC::G8RCRegClass
);
11415 DebugLoc dl
= MI
.getDebugLoc();
11416 BuildMI(*BB
, MI
, dl
, TII
->get(Opcode
), Dest
)
11417 .addReg(MI
.getOperand(1).getReg())
11419 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::COPY
),
11420 MI
.getOperand(0).getReg())
11421 .addReg(isEQ
? PPC::CR0EQ
: PPC::CR0GT
);
11422 } else if (MI
.getOpcode() == PPC::TCHECK_RET
) {
11423 DebugLoc Dl
= MI
.getDebugLoc();
11424 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11425 Register CRReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
11426 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::TCHECK
), CRReg
);
11427 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::COPY
),
11428 MI
.getOperand(0).getReg())
11430 } else if (MI
.getOpcode() == PPC::TBEGIN_RET
) {
11431 DebugLoc Dl
= MI
.getDebugLoc();
11432 unsigned Imm
= MI
.getOperand(1).getImm();
11433 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::TBEGIN
)).addImm(Imm
);
11434 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::COPY
),
11435 MI
.getOperand(0).getReg())
11436 .addReg(PPC::CR0EQ
);
11437 } else if (MI
.getOpcode() == PPC::SETRNDi
) {
11438 DebugLoc dl
= MI
.getDebugLoc();
11439 Register OldFPSCRReg
= MI
.getOperand(0).getReg();
11441 // Save FPSCR value.
11442 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), OldFPSCRReg
);
11444 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11445 // the following settings:
11446 // 00 Round to nearest
11448 // 10 Round to +inf
11449 // 11 Round to -inf
11451 // When the operand is immediate, using the two least significant bits of
11452 // the immediate to set the bits 62:63 of FPSCR.
11453 unsigned Mode
= MI
.getOperand(1).getImm();
11454 BuildMI(*BB
, MI
, dl
, TII
->get((Mode
& 1) ? PPC::MTFSB1
: PPC::MTFSB0
))
11457 BuildMI(*BB
, MI
, dl
, TII
->get((Mode
& 2) ? PPC::MTFSB1
: PPC::MTFSB0
))
11459 } else if (MI
.getOpcode() == PPC::SETRND
) {
11460 DebugLoc dl
= MI
.getDebugLoc();
11462 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
11463 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
11464 // If the target doesn't have DirectMove, we should use stack to do the
11465 // conversion, because the target doesn't have the instructions like mtvsrd
11466 // or mfvsrd to do this conversion directly.
11467 auto copyRegFromG8RCOrF8RC
= [&] (unsigned DestReg
, unsigned SrcReg
) {
11468 if (Subtarget
.hasDirectMove()) {
11469 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::COPY
), DestReg
)
11472 // Use stack to do the register copy.
11473 unsigned StoreOp
= PPC::STD
, LoadOp
= PPC::LFD
;
11474 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11475 const TargetRegisterClass
*RC
= RegInfo
.getRegClass(SrcReg
);
11476 if (RC
== &PPC::F8RCRegClass
) {
11477 // Copy register from F8RCRegClass to G8RCRegclass.
11478 assert((RegInfo
.getRegClass(DestReg
) == &PPC::G8RCRegClass
) &&
11479 "Unsupported RegClass.");
11481 StoreOp
= PPC::STFD
;
11484 // Copy register from G8RCRegClass to F8RCRegclass.
11485 assert((RegInfo
.getRegClass(SrcReg
) == &PPC::G8RCRegClass
) &&
11486 (RegInfo
.getRegClass(DestReg
) == &PPC::F8RCRegClass
) &&
11487 "Unsupported RegClass.");
11490 MachineFrameInfo
&MFI
= F
->getFrameInfo();
11491 int FrameIdx
= MFI
.CreateStackObject(8, 8, false);
11493 MachineMemOperand
*MMOStore
= F
->getMachineMemOperand(
11494 MachinePointerInfo::getFixedStack(*F
, FrameIdx
, 0),
11495 MachineMemOperand::MOStore
, MFI
.getObjectSize(FrameIdx
),
11496 MFI
.getObjectAlignment(FrameIdx
));
11498 // Store the SrcReg into the stack.
11499 BuildMI(*BB
, MI
, dl
, TII
->get(StoreOp
))
11502 .addFrameIndex(FrameIdx
)
11503 .addMemOperand(MMOStore
);
11505 MachineMemOperand
*MMOLoad
= F
->getMachineMemOperand(
11506 MachinePointerInfo::getFixedStack(*F
, FrameIdx
, 0),
11507 MachineMemOperand::MOLoad
, MFI
.getObjectSize(FrameIdx
),
11508 MFI
.getObjectAlignment(FrameIdx
));
11510 // Load from the stack where SrcReg is stored, and save to DestReg,
11511 // so we have done the RegClass conversion from RegClass::SrcReg to
11512 // RegClass::DestReg.
11513 BuildMI(*BB
, MI
, dl
, TII
->get(LoadOp
), DestReg
)
11515 .addFrameIndex(FrameIdx
)
11516 .addMemOperand(MMOLoad
);
11520 Register OldFPSCRReg
= MI
.getOperand(0).getReg();
11522 // Save FPSCR value.
11523 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), OldFPSCRReg
);
11525 // When the operand is gprc register, use two least significant bits of the
11526 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
11528 // copy OldFPSCRTmpReg, OldFPSCRReg
11529 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
11530 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
11531 // copy NewFPSCRReg, NewFPSCRTmpReg
11532 // mtfsf 255, NewFPSCRReg
11533 MachineOperand SrcOp
= MI
.getOperand(1);
11534 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11535 Register OldFPSCRTmpReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11537 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg
, OldFPSCRReg
);
11539 Register ImDefReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11540 Register ExtSrcReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11542 // The first operand of INSERT_SUBREG should be a register which has
11543 // subregisters, we only care about its RegClass, so we should use an
11544 // IMPLICIT_DEF register.
11545 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::IMPLICIT_DEF
), ImDefReg
);
11546 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::INSERT_SUBREG
), ExtSrcReg
)
11551 Register NewFPSCRTmpReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11552 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::RLDIMI
), NewFPSCRTmpReg
)
11553 .addReg(OldFPSCRTmpReg
)
11558 Register NewFPSCRReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
11559 copyRegFromG8RCOrF8RC(NewFPSCRReg
, NewFPSCRTmpReg
);
11561 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
11563 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSF
))
11565 .addReg(NewFPSCRReg
)
11569 llvm_unreachable("Unexpected instr type to insert");
11572 MI
.eraseFromParent(); // The pseudo instruction is gone now.
11576 //===----------------------------------------------------------------------===//
11577 // Target Optimization Hooks
11578 //===----------------------------------------------------------------------===//
11580 static int getEstimateRefinementSteps(EVT VT
, const PPCSubtarget
&Subtarget
) {
11581 // For the estimates, convergence is quadratic, so we essentially double the
11582 // number of digits correct after every iteration. For both FRE and FRSQRTE,
11583 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
11584 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
11585 int RefinementSteps
= Subtarget
.hasRecipPrec() ? 1 : 3;
11586 if (VT
.getScalarType() == MVT::f64
)
11588 return RefinementSteps
;
11591 SDValue
PPCTargetLowering::getSqrtEstimate(SDValue Operand
, SelectionDAG
&DAG
,
11592 int Enabled
, int &RefinementSteps
,
11593 bool &UseOneConstNR
,
11594 bool Reciprocal
) const {
11595 EVT VT
= Operand
.getValueType();
11596 if ((VT
== MVT::f32
&& Subtarget
.hasFRSQRTES()) ||
11597 (VT
== MVT::f64
&& Subtarget
.hasFRSQRTE()) ||
11598 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
11599 (VT
== MVT::v2f64
&& Subtarget
.hasVSX()) ||
11600 (VT
== MVT::v4f32
&& Subtarget
.hasQPX()) ||
11601 (VT
== MVT::v4f64
&& Subtarget
.hasQPX())) {
11602 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
11603 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
11605 // The Newton-Raphson computation with a single constant does not provide
11606 // enough accuracy on some CPUs.
11607 UseOneConstNR
= !Subtarget
.needsTwoConstNR();
11608 return DAG
.getNode(PPCISD::FRSQRTE
, SDLoc(Operand
), VT
, Operand
);
11613 SDValue
PPCTargetLowering::getRecipEstimate(SDValue Operand
, SelectionDAG
&DAG
,
11615 int &RefinementSteps
) const {
11616 EVT VT
= Operand
.getValueType();
11617 if ((VT
== MVT::f32
&& Subtarget
.hasFRES()) ||
11618 (VT
== MVT::f64
&& Subtarget
.hasFRE()) ||
11619 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
11620 (VT
== MVT::v2f64
&& Subtarget
.hasVSX()) ||
11621 (VT
== MVT::v4f32
&& Subtarget
.hasQPX()) ||
11622 (VT
== MVT::v4f64
&& Subtarget
.hasQPX())) {
11623 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
11624 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
11625 return DAG
.getNode(PPCISD::FRE
, SDLoc(Operand
), VT
, Operand
);
11630 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11631 // Note: This functionality is used only when unsafe-fp-math is enabled, and
11632 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11633 // enabled for division), this functionality is redundant with the default
11634 // combiner logic (once the division -> reciprocal/multiply transformation
11635 // has taken place). As a result, this matters more for older cores than for
11638 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11639 // reciprocal if there are two or more FDIVs (for embedded cores with only
11640 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11641 switch (Subtarget
.getDarwinDirective()) {
11646 case PPC::DIR_E500
:
11647 case PPC::DIR_E500mc
:
11648 case PPC::DIR_E5500
:
11653 // isConsecutiveLSLoc needs to work even if all adds have not yet been
11654 // collapsed, and so we need to look through chains of them.
11655 static void getBaseWithConstantOffset(SDValue Loc
, SDValue
&Base
,
11656 int64_t& Offset
, SelectionDAG
&DAG
) {
11657 if (DAG
.isBaseWithConstantOffset(Loc
)) {
11658 Base
= Loc
.getOperand(0);
11659 Offset
+= cast
<ConstantSDNode
>(Loc
.getOperand(1))->getSExtValue();
11661 // The base might itself be a base plus an offset, and if so, accumulate
11663 getBaseWithConstantOffset(Loc
.getOperand(0), Base
, Offset
, DAG
);
11667 static bool isConsecutiveLSLoc(SDValue Loc
, EVT VT
, LSBaseSDNode
*Base
,
11668 unsigned Bytes
, int Dist
,
11669 SelectionDAG
&DAG
) {
11670 if (VT
.getSizeInBits() / 8 != Bytes
)
11673 SDValue BaseLoc
= Base
->getBasePtr();
11674 if (Loc
.getOpcode() == ISD::FrameIndex
) {
11675 if (BaseLoc
.getOpcode() != ISD::FrameIndex
)
11677 const MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
11678 int FI
= cast
<FrameIndexSDNode
>(Loc
)->getIndex();
11679 int BFI
= cast
<FrameIndexSDNode
>(BaseLoc
)->getIndex();
11680 int FS
= MFI
.getObjectSize(FI
);
11681 int BFS
= MFI
.getObjectSize(BFI
);
11682 if (FS
!= BFS
|| FS
!= (int)Bytes
) return false;
11683 return MFI
.getObjectOffset(FI
) == (MFI
.getObjectOffset(BFI
) + Dist
*Bytes
);
11686 SDValue Base1
= Loc
, Base2
= BaseLoc
;
11687 int64_t Offset1
= 0, Offset2
= 0;
11688 getBaseWithConstantOffset(Loc
, Base1
, Offset1
, DAG
);
11689 getBaseWithConstantOffset(BaseLoc
, Base2
, Offset2
, DAG
);
11690 if (Base1
== Base2
&& Offset1
== (Offset2
+ Dist
* Bytes
))
11693 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
11694 const GlobalValue
*GV1
= nullptr;
11695 const GlobalValue
*GV2
= nullptr;
11698 bool isGA1
= TLI
.isGAPlusOffset(Loc
.getNode(), GV1
, Offset1
);
11699 bool isGA2
= TLI
.isGAPlusOffset(BaseLoc
.getNode(), GV2
, Offset2
);
11700 if (isGA1
&& isGA2
&& GV1
== GV2
)
11701 return Offset1
== (Offset2
+ Dist
*Bytes
);
11705 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11706 // not enforce equality of the chain operands.
11707 static bool isConsecutiveLS(SDNode
*N
, LSBaseSDNode
*Base
,
11708 unsigned Bytes
, int Dist
,
11709 SelectionDAG
&DAG
) {
11710 if (LSBaseSDNode
*LS
= dyn_cast
<LSBaseSDNode
>(N
)) {
11711 EVT VT
= LS
->getMemoryVT();
11712 SDValue Loc
= LS
->getBasePtr();
11713 return isConsecutiveLSLoc(Loc
, VT
, Base
, Bytes
, Dist
, DAG
);
11716 if (N
->getOpcode() == ISD::INTRINSIC_W_CHAIN
) {
11718 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
11719 default: return false;
11720 case Intrinsic::ppc_qpx_qvlfd
:
11721 case Intrinsic::ppc_qpx_qvlfda
:
11724 case Intrinsic::ppc_qpx_qvlfs
:
11725 case Intrinsic::ppc_qpx_qvlfsa
:
11728 case Intrinsic::ppc_qpx_qvlfcd
:
11729 case Intrinsic::ppc_qpx_qvlfcda
:
11732 case Intrinsic::ppc_qpx_qvlfcs
:
11733 case Intrinsic::ppc_qpx_qvlfcsa
:
11736 case Intrinsic::ppc_qpx_qvlfiwa
:
11737 case Intrinsic::ppc_qpx_qvlfiwz
:
11738 case Intrinsic::ppc_altivec_lvx
:
11739 case Intrinsic::ppc_altivec_lvxl
:
11740 case Intrinsic::ppc_vsx_lxvw4x
:
11741 case Intrinsic::ppc_vsx_lxvw4x_be
:
11744 case Intrinsic::ppc_vsx_lxvd2x
:
11745 case Intrinsic::ppc_vsx_lxvd2x_be
:
11748 case Intrinsic::ppc_altivec_lvebx
:
11751 case Intrinsic::ppc_altivec_lvehx
:
11754 case Intrinsic::ppc_altivec_lvewx
:
11759 return isConsecutiveLSLoc(N
->getOperand(2), VT
, Base
, Bytes
, Dist
, DAG
);
11762 if (N
->getOpcode() == ISD::INTRINSIC_VOID
) {
11764 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
11765 default: return false;
11766 case Intrinsic::ppc_qpx_qvstfd
:
11767 case Intrinsic::ppc_qpx_qvstfda
:
11770 case Intrinsic::ppc_qpx_qvstfs
:
11771 case Intrinsic::ppc_qpx_qvstfsa
:
11774 case Intrinsic::ppc_qpx_qvstfcd
:
11775 case Intrinsic::ppc_qpx_qvstfcda
:
11778 case Intrinsic::ppc_qpx_qvstfcs
:
11779 case Intrinsic::ppc_qpx_qvstfcsa
:
11782 case Intrinsic::ppc_qpx_qvstfiw
:
11783 case Intrinsic::ppc_qpx_qvstfiwa
:
11784 case Intrinsic::ppc_altivec_stvx
:
11785 case Intrinsic::ppc_altivec_stvxl
:
11786 case Intrinsic::ppc_vsx_stxvw4x
:
11789 case Intrinsic::ppc_vsx_stxvd2x
:
11792 case Intrinsic::ppc_vsx_stxvw4x_be
:
11795 case Intrinsic::ppc_vsx_stxvd2x_be
:
11798 case Intrinsic::ppc_altivec_stvebx
:
11801 case Intrinsic::ppc_altivec_stvehx
:
11804 case Intrinsic::ppc_altivec_stvewx
:
11809 return isConsecutiveLSLoc(N
->getOperand(3), VT
, Base
, Bytes
, Dist
, DAG
);
11815 // Return true is there is a nearyby consecutive load to the one provided
11816 // (regardless of alignment). We search up and down the chain, looking though
11817 // token factors and other loads (but nothing else). As a result, a true result
11818 // indicates that it is safe to create a new consecutive load adjacent to the
11820 static bool findConsecutiveLoad(LoadSDNode
*LD
, SelectionDAG
&DAG
) {
11821 SDValue Chain
= LD
->getChain();
11822 EVT VT
= LD
->getMemoryVT();
11824 SmallSet
<SDNode
*, 16> LoadRoots
;
11825 SmallVector
<SDNode
*, 8> Queue(1, Chain
.getNode());
11826 SmallSet
<SDNode
*, 16> Visited
;
11828 // First, search up the chain, branching to follow all token-factor operands.
11829 // If we find a consecutive load, then we're done, otherwise, record all
11830 // nodes just above the top-level loads and token factors.
11831 while (!Queue
.empty()) {
11832 SDNode
*ChainNext
= Queue
.pop_back_val();
11833 if (!Visited
.insert(ChainNext
).second
)
11836 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(ChainNext
)) {
11837 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
11840 if (!Visited
.count(ChainLD
->getChain().getNode()))
11841 Queue
.push_back(ChainLD
->getChain().getNode());
11842 } else if (ChainNext
->getOpcode() == ISD::TokenFactor
) {
11843 for (const SDUse
&O
: ChainNext
->ops())
11844 if (!Visited
.count(O
.getNode()))
11845 Queue
.push_back(O
.getNode());
11847 LoadRoots
.insert(ChainNext
);
11850 // Second, search down the chain, starting from the top-level nodes recorded
11851 // in the first phase. These top-level nodes are the nodes just above all
11852 // loads and token factors. Starting with their uses, recursively look though
11853 // all loads (just the chain uses) and token factors to find a consecutive
11858 for (SmallSet
<SDNode
*, 16>::iterator I
= LoadRoots
.begin(),
11859 IE
= LoadRoots
.end(); I
!= IE
; ++I
) {
11860 Queue
.push_back(*I
);
11862 while (!Queue
.empty()) {
11863 SDNode
*LoadRoot
= Queue
.pop_back_val();
11864 if (!Visited
.insert(LoadRoot
).second
)
11867 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(LoadRoot
))
11868 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
11871 for (SDNode::use_iterator UI
= LoadRoot
->use_begin(),
11872 UE
= LoadRoot
->use_end(); UI
!= UE
; ++UI
)
11873 if (((isa
<MemSDNode
>(*UI
) &&
11874 cast
<MemSDNode
>(*UI
)->getChain().getNode() == LoadRoot
) ||
11875 UI
->getOpcode() == ISD::TokenFactor
) && !Visited
.count(*UI
))
11876 Queue
.push_back(*UI
);
11883 /// This function is called when we have proved that a SETCC node can be replaced
11884 /// by subtraction (and other supporting instructions) so that the result of
11885 /// comparison is kept in a GPR instead of CR. This function is purely for
11886 /// codegen purposes and has some flags to guide the codegen process.
11887 static SDValue
generateEquivalentSub(SDNode
*N
, int Size
, bool Complement
,
11888 bool Swap
, SDLoc
&DL
, SelectionDAG
&DAG
) {
11889 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
11891 // Zero extend the operands to the largest legal integer. Originally, they
11892 // must be of a strictly smaller size.
11893 auto Op0
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(0),
11894 DAG
.getConstant(Size
, DL
, MVT::i32
));
11895 auto Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(1),
11896 DAG
.getConstant(Size
, DL
, MVT::i32
));
11898 // Swap if needed. Depends on the condition code.
11900 std::swap(Op0
, Op1
);
11902 // Subtract extended integers.
11903 auto SubNode
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, Op0
, Op1
);
11905 // Move the sign bit to the least significant position and zero out the rest.
11906 // Now the least significant bit carries the result of original comparison.
11907 auto Shifted
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, SubNode
,
11908 DAG
.getConstant(Size
- 1, DL
, MVT::i32
));
11909 auto Final
= Shifted
;
11911 // Complement the result if needed. Based on the condition code.
11913 Final
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, Shifted
,
11914 DAG
.getConstant(1, DL
, MVT::i64
));
11916 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, Final
);
11919 SDValue
PPCTargetLowering::ConvertSETCCToSubtract(SDNode
*N
,
11920 DAGCombinerInfo
&DCI
) const {
11921 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
11923 SelectionDAG
&DAG
= DCI
.DAG
;
11926 // Size of integers being compared has a critical role in the following
11927 // analysis, so we prefer to do this when all types are legal.
11928 if (!DCI
.isAfterLegalizeDAG())
11931 // If all users of SETCC extend its value to a legal integer type
11932 // then we replace SETCC with a subtraction
11933 for (SDNode::use_iterator UI
= N
->use_begin(),
11934 UE
= N
->use_end(); UI
!= UE
; ++UI
) {
11935 if (UI
->getOpcode() != ISD::ZERO_EXTEND
)
11939 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
11940 auto OpSize
= N
->getOperand(0).getValueSizeInBits();
11942 unsigned Size
= DAG
.getDataLayout().getLargestLegalIntTypeSizeInBits();
11944 if (OpSize
< Size
) {
11948 return generateEquivalentSub(N
, Size
, false, false, DL
, DAG
);
11950 return generateEquivalentSub(N
, Size
, true, true, DL
, DAG
);
11952 return generateEquivalentSub(N
, Size
, false, true, DL
, DAG
);
11954 return generateEquivalentSub(N
, Size
, true, false, DL
, DAG
);
11961 SDValue
PPCTargetLowering::DAGCombineTruncBoolExt(SDNode
*N
,
11962 DAGCombinerInfo
&DCI
) const {
11963 SelectionDAG
&DAG
= DCI
.DAG
;
11966 assert(Subtarget
.useCRBits() && "Expecting to be tracking CR bits");
11967 // If we're tracking CR bits, we need to be careful that we don't have:
11968 // trunc(binary-ops(zext(x), zext(y)))
11970 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11971 // such that we're unnecessarily moving things into GPRs when it would be
11972 // better to keep them in CR bits.
11974 // Note that trunc here can be an actual i1 trunc, or can be the effective
11975 // truncation that comes from a setcc or select_cc.
11976 if (N
->getOpcode() == ISD::TRUNCATE
&&
11977 N
->getValueType(0) != MVT::i1
)
11980 if (N
->getOperand(0).getValueType() != MVT::i32
&&
11981 N
->getOperand(0).getValueType() != MVT::i64
)
11984 if (N
->getOpcode() == ISD::SETCC
||
11985 N
->getOpcode() == ISD::SELECT_CC
) {
11986 // If we're looking at a comparison, then we need to make sure that the
11987 // high bits (all except for the first) don't matter the result.
11989 cast
<CondCodeSDNode
>(N
->getOperand(
11990 N
->getOpcode() == ISD::SETCC
? 2 : 4))->get();
11991 unsigned OpBits
= N
->getOperand(0).getValueSizeInBits();
11993 if (ISD::isSignedIntSetCC(CC
)) {
11994 if (DAG
.ComputeNumSignBits(N
->getOperand(0)) != OpBits
||
11995 DAG
.ComputeNumSignBits(N
->getOperand(1)) != OpBits
)
11997 } else if (ISD::isUnsignedIntSetCC(CC
)) {
11998 if (!DAG
.MaskedValueIsZero(N
->getOperand(0),
11999 APInt::getHighBitsSet(OpBits
, OpBits
-1)) ||
12000 !DAG
.MaskedValueIsZero(N
->getOperand(1),
12001 APInt::getHighBitsSet(OpBits
, OpBits
-1)))
12002 return (N
->getOpcode() == ISD::SETCC
? ConvertSETCCToSubtract(N
, DCI
)
12005 // This is neither a signed nor an unsigned comparison, just make sure
12006 // that the high bits are equal.
12007 KnownBits Op1Known
= DAG
.computeKnownBits(N
->getOperand(0));
12008 KnownBits Op2Known
= DAG
.computeKnownBits(N
->getOperand(1));
12010 // We don't really care about what is known about the first bit (if
12011 // anything), so clear it in all masks prior to comparing them.
12012 Op1Known
.Zero
.clearBit(0); Op1Known
.One
.clearBit(0);
12013 Op2Known
.Zero
.clearBit(0); Op2Known
.One
.clearBit(0);
12015 if (Op1Known
.Zero
!= Op2Known
.Zero
|| Op1Known
.One
!= Op2Known
.One
)
12020 // We now know that the higher-order bits are irrelevant, we just need to
12021 // make sure that all of the intermediate operations are bit operations, and
12022 // all inputs are extensions.
12023 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
12024 N
->getOperand(0).getOpcode() != ISD::OR
&&
12025 N
->getOperand(0).getOpcode() != ISD::XOR
&&
12026 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
12027 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
&&
12028 N
->getOperand(0).getOpcode() != ISD::TRUNCATE
&&
12029 N
->getOperand(0).getOpcode() != ISD::SIGN_EXTEND
&&
12030 N
->getOperand(0).getOpcode() != ISD::ZERO_EXTEND
&&
12031 N
->getOperand(0).getOpcode() != ISD::ANY_EXTEND
)
12034 if ((N
->getOpcode() == ISD::SETCC
|| N
->getOpcode() == ISD::SELECT_CC
) &&
12035 N
->getOperand(1).getOpcode() != ISD::AND
&&
12036 N
->getOperand(1).getOpcode() != ISD::OR
&&
12037 N
->getOperand(1).getOpcode() != ISD::XOR
&&
12038 N
->getOperand(1).getOpcode() != ISD::SELECT
&&
12039 N
->getOperand(1).getOpcode() != ISD::SELECT_CC
&&
12040 N
->getOperand(1).getOpcode() != ISD::TRUNCATE
&&
12041 N
->getOperand(1).getOpcode() != ISD::SIGN_EXTEND
&&
12042 N
->getOperand(1).getOpcode() != ISD::ZERO_EXTEND
&&
12043 N
->getOperand(1).getOpcode() != ISD::ANY_EXTEND
)
12046 SmallVector
<SDValue
, 4> Inputs
;
12047 SmallVector
<SDValue
, 8> BinOps
, PromOps
;
12048 SmallPtrSet
<SDNode
*, 16> Visited
;
12050 for (unsigned i
= 0; i
< 2; ++i
) {
12051 if (((N
->getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
12052 N
->getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
12053 N
->getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
12054 N
->getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
12055 isa
<ConstantSDNode
>(N
->getOperand(i
)))
12056 Inputs
.push_back(N
->getOperand(i
));
12058 BinOps
.push_back(N
->getOperand(i
));
12060 if (N
->getOpcode() == ISD::TRUNCATE
)
12064 // Visit all inputs, collect all binary operations (and, or, xor and
12065 // select) that are all fed by extensions.
12066 while (!BinOps
.empty()) {
12067 SDValue BinOp
= BinOps
.back();
12070 if (!Visited
.insert(BinOp
.getNode()).second
)
12073 PromOps
.push_back(BinOp
);
12075 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
12076 // The condition of the select is not promoted.
12077 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
12079 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
12082 if (((BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
12083 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
12084 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
12085 BinOp
.getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
12086 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
12087 Inputs
.push_back(BinOp
.getOperand(i
));
12088 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
12089 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
12090 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
12091 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
12092 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
||
12093 BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
12094 BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
12095 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
12096 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) {
12097 BinOps
.push_back(BinOp
.getOperand(i
));
12099 // We have an input that is not an extension or another binary
12100 // operation; we'll abort this transformation.
12106 // Make sure that this is a self-contained cluster of operations (which
12107 // is not quite the same thing as saying that everything has only one
12109 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12110 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12113 for (SDNode::use_iterator UI
= Inputs
[i
].getNode()->use_begin(),
12114 UE
= Inputs
[i
].getNode()->use_end();
12116 SDNode
*User
= *UI
;
12117 if (User
!= N
&& !Visited
.count(User
))
12120 // Make sure that we're not going to promote the non-output-value
12121 // operand(s) or SELECT or SELECT_CC.
12122 // FIXME: Although we could sometimes handle this, and it does occur in
12123 // practice that one of the condition inputs to the select is also one of
12124 // the outputs, we currently can't deal with this.
12125 if (User
->getOpcode() == ISD::SELECT
) {
12126 if (User
->getOperand(0) == Inputs
[i
])
12128 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12129 if (User
->getOperand(0) == Inputs
[i
] ||
12130 User
->getOperand(1) == Inputs
[i
])
12136 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
12137 for (SDNode::use_iterator UI
= PromOps
[i
].getNode()->use_begin(),
12138 UE
= PromOps
[i
].getNode()->use_end();
12140 SDNode
*User
= *UI
;
12141 if (User
!= N
&& !Visited
.count(User
))
12144 // Make sure that we're not going to promote the non-output-value
12145 // operand(s) or SELECT or SELECT_CC.
12146 // FIXME: Although we could sometimes handle this, and it does occur in
12147 // practice that one of the condition inputs to the select is also one of
12148 // the outputs, we currently can't deal with this.
12149 if (User
->getOpcode() == ISD::SELECT
) {
12150 if (User
->getOperand(0) == PromOps
[i
])
12152 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12153 if (User
->getOperand(0) == PromOps
[i
] ||
12154 User
->getOperand(1) == PromOps
[i
])
12160 // Replace all inputs with the extension operand.
12161 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12162 // Constants may have users outside the cluster of to-be-promoted nodes,
12163 // and so we need to replace those as we do the promotions.
12164 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12167 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], Inputs
[i
].getOperand(0));
12170 std::list
<HandleSDNode
> PromOpHandles
;
12171 for (auto &PromOp
: PromOps
)
12172 PromOpHandles
.emplace_back(PromOp
);
12174 // Replace all operations (these are all the same, but have a different
12175 // (i1) return type). DAG.getNode will validate that the types of
12176 // a binary operator match, so go through the list in reverse so that
12177 // we've likely promoted both operands first. Any intermediate truncations or
12178 // extensions disappear.
12179 while (!PromOpHandles
.empty()) {
12180 SDValue PromOp
= PromOpHandles
.back().getValue();
12181 PromOpHandles
.pop_back();
12183 if (PromOp
.getOpcode() == ISD::TRUNCATE
||
12184 PromOp
.getOpcode() == ISD::SIGN_EXTEND
||
12185 PromOp
.getOpcode() == ISD::ZERO_EXTEND
||
12186 PromOp
.getOpcode() == ISD::ANY_EXTEND
) {
12187 if (!isa
<ConstantSDNode
>(PromOp
.getOperand(0)) &&
12188 PromOp
.getOperand(0).getValueType() != MVT::i1
) {
12189 // The operand is not yet ready (see comment below).
12190 PromOpHandles
.emplace_front(PromOp
);
12194 SDValue RepValue
= PromOp
.getOperand(0);
12195 if (isa
<ConstantSDNode
>(RepValue
))
12196 RepValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, RepValue
);
12198 DAG
.ReplaceAllUsesOfValueWith(PromOp
, RepValue
);
12203 switch (PromOp
.getOpcode()) {
12204 default: C
= 0; break;
12205 case ISD::SELECT
: C
= 1; break;
12206 case ISD::SELECT_CC
: C
= 2; break;
12209 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
12210 PromOp
.getOperand(C
).getValueType() != MVT::i1
) ||
12211 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
12212 PromOp
.getOperand(C
+1).getValueType() != MVT::i1
)) {
12213 // The to-be-promoted operands of this node have not yet been
12214 // promoted (this should be rare because we're going through the
12215 // list backward, but if one of the operands has several users in
12216 // this cluster of to-be-promoted nodes, it is possible).
12217 PromOpHandles
.emplace_front(PromOp
);
12221 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
12222 PromOp
.getNode()->op_end());
12224 // If there are any constant inputs, make sure they're replaced now.
12225 for (unsigned i
= 0; i
< 2; ++i
)
12226 if (isa
<ConstantSDNode
>(Ops
[C
+i
]))
12227 Ops
[C
+i
] = DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, Ops
[C
+i
]);
12229 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
12230 DAG
.getNode(PromOp
.getOpcode(), dl
, MVT::i1
, Ops
));
12233 // Now we're left with the initial truncation itself.
12234 if (N
->getOpcode() == ISD::TRUNCATE
)
12235 return N
->getOperand(0);
12237 // Otherwise, this is a comparison. The operands to be compared have just
12238 // changed type (to i1), but everything else is the same.
12239 return SDValue(N
, 0);
12242 SDValue
PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode
*N
,
12243 DAGCombinerInfo
&DCI
) const {
12244 SelectionDAG
&DAG
= DCI
.DAG
;
12247 // If we're tracking CR bits, we need to be careful that we don't have:
12248 // zext(binary-ops(trunc(x), trunc(y)))
12250 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12251 // such that we're unnecessarily moving things into CR bits that can more
12252 // efficiently stay in GPRs. Note that if we're not certain that the high
12253 // bits are set as required by the final extension, we still may need to do
12254 // some masking to get the proper behavior.
12256 // This same functionality is important on PPC64 when dealing with
12257 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12258 // the return values of functions. Because it is so similar, it is handled
12261 if (N
->getValueType(0) != MVT::i32
&&
12262 N
->getValueType(0) != MVT::i64
)
12265 if (!((N
->getOperand(0).getValueType() == MVT::i1
&& Subtarget
.useCRBits()) ||
12266 (N
->getOperand(0).getValueType() == MVT::i32
&& Subtarget
.isPPC64())))
12269 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
12270 N
->getOperand(0).getOpcode() != ISD::OR
&&
12271 N
->getOperand(0).getOpcode() != ISD::XOR
&&
12272 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
12273 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
)
12276 SmallVector
<SDValue
, 4> Inputs
;
12277 SmallVector
<SDValue
, 8> BinOps(1, N
->getOperand(0)), PromOps
;
12278 SmallPtrSet
<SDNode
*, 16> Visited
;
12280 // Visit all inputs, collect all binary operations (and, or, xor and
12281 // select) that are all fed by truncations.
12282 while (!BinOps
.empty()) {
12283 SDValue BinOp
= BinOps
.back();
12286 if (!Visited
.insert(BinOp
.getNode()).second
)
12289 PromOps
.push_back(BinOp
);
12291 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
12292 // The condition of the select is not promoted.
12293 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
12295 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
12298 if (BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
12299 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
12300 Inputs
.push_back(BinOp
.getOperand(i
));
12301 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
12302 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
12303 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
12304 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
12305 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
) {
12306 BinOps
.push_back(BinOp
.getOperand(i
));
12308 // We have an input that is not a truncation or another binary
12309 // operation; we'll abort this transformation.
12315 // The operands of a select that must be truncated when the select is
12316 // promoted because the operand is actually part of the to-be-promoted set.
12317 DenseMap
<SDNode
*, EVT
> SelectTruncOp
[2];
12319 // Make sure that this is a self-contained cluster of operations (which
12320 // is not quite the same thing as saying that everything has only one
12322 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12323 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12326 for (SDNode::use_iterator UI
= Inputs
[i
].getNode()->use_begin(),
12327 UE
= Inputs
[i
].getNode()->use_end();
12329 SDNode
*User
= *UI
;
12330 if (User
!= N
&& !Visited
.count(User
))
12333 // If we're going to promote the non-output-value operand(s) or SELECT or
12334 // SELECT_CC, record them for truncation.
12335 if (User
->getOpcode() == ISD::SELECT
) {
12336 if (User
->getOperand(0) == Inputs
[i
])
12337 SelectTruncOp
[0].insert(std::make_pair(User
,
12338 User
->getOperand(0).getValueType()));
12339 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12340 if (User
->getOperand(0) == Inputs
[i
])
12341 SelectTruncOp
[0].insert(std::make_pair(User
,
12342 User
->getOperand(0).getValueType()));
12343 if (User
->getOperand(1) == Inputs
[i
])
12344 SelectTruncOp
[1].insert(std::make_pair(User
,
12345 User
->getOperand(1).getValueType()));
12350 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
12351 for (SDNode::use_iterator UI
= PromOps
[i
].getNode()->use_begin(),
12352 UE
= PromOps
[i
].getNode()->use_end();
12354 SDNode
*User
= *UI
;
12355 if (User
!= N
&& !Visited
.count(User
))
12358 // If we're going to promote the non-output-value operand(s) or SELECT or
12359 // SELECT_CC, record them for truncation.
12360 if (User
->getOpcode() == ISD::SELECT
) {
12361 if (User
->getOperand(0) == PromOps
[i
])
12362 SelectTruncOp
[0].insert(std::make_pair(User
,
12363 User
->getOperand(0).getValueType()));
12364 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12365 if (User
->getOperand(0) == PromOps
[i
])
12366 SelectTruncOp
[0].insert(std::make_pair(User
,
12367 User
->getOperand(0).getValueType()));
12368 if (User
->getOperand(1) == PromOps
[i
])
12369 SelectTruncOp
[1].insert(std::make_pair(User
,
12370 User
->getOperand(1).getValueType()));
12375 unsigned PromBits
= N
->getOperand(0).getValueSizeInBits();
12376 bool ReallyNeedsExt
= false;
12377 if (N
->getOpcode() != ISD::ANY_EXTEND
) {
12378 // If all of the inputs are not already sign/zero extended, then
12379 // we'll still need to do that at the end.
12380 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12381 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12385 Inputs
[i
].getOperand(0).getValueSizeInBits();
12386 assert(PromBits
< OpBits
&& "Truncation not to a smaller bit count?");
12388 if ((N
->getOpcode() == ISD::ZERO_EXTEND
&&
12389 !DAG
.MaskedValueIsZero(Inputs
[i
].getOperand(0),
12390 APInt::getHighBitsSet(OpBits
,
12391 OpBits
-PromBits
))) ||
12392 (N
->getOpcode() == ISD::SIGN_EXTEND
&&
12393 DAG
.ComputeNumSignBits(Inputs
[i
].getOperand(0)) <
12394 (OpBits
-(PromBits
-1)))) {
12395 ReallyNeedsExt
= true;
12401 // Replace all inputs, either with the truncation operand, or a
12402 // truncation or extension to the final output type.
12403 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12404 // Constant inputs need to be replaced with the to-be-promoted nodes that
12405 // use them because they might have users outside of the cluster of
12407 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12410 SDValue InSrc
= Inputs
[i
].getOperand(0);
12411 if (Inputs
[i
].getValueType() == N
->getValueType(0))
12412 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], InSrc
);
12413 else if (N
->getOpcode() == ISD::SIGN_EXTEND
)
12414 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
12415 DAG
.getSExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
12416 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
12417 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
12418 DAG
.getZExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
12420 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
12421 DAG
.getAnyExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
12424 std::list
<HandleSDNode
> PromOpHandles
;
12425 for (auto &PromOp
: PromOps
)
12426 PromOpHandles
.emplace_back(PromOp
);
12428 // Replace all operations (these are all the same, but have a different
12429 // (promoted) return type). DAG.getNode will validate that the types of
12430 // a binary operator match, so go through the list in reverse so that
12431 // we've likely promoted both operands first.
12432 while (!PromOpHandles
.empty()) {
12433 SDValue PromOp
= PromOpHandles
.back().getValue();
12434 PromOpHandles
.pop_back();
12437 switch (PromOp
.getOpcode()) {
12438 default: C
= 0; break;
12439 case ISD::SELECT
: C
= 1; break;
12440 case ISD::SELECT_CC
: C
= 2; break;
12443 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
12444 PromOp
.getOperand(C
).getValueType() != N
->getValueType(0)) ||
12445 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
12446 PromOp
.getOperand(C
+1).getValueType() != N
->getValueType(0))) {
12447 // The to-be-promoted operands of this node have not yet been
12448 // promoted (this should be rare because we're going through the
12449 // list backward, but if one of the operands has several users in
12450 // this cluster of to-be-promoted nodes, it is possible).
12451 PromOpHandles
.emplace_front(PromOp
);
12455 // For SELECT and SELECT_CC nodes, we do a similar check for any
12456 // to-be-promoted comparison inputs.
12457 if (PromOp
.getOpcode() == ISD::SELECT
||
12458 PromOp
.getOpcode() == ISD::SELECT_CC
) {
12459 if ((SelectTruncOp
[0].count(PromOp
.getNode()) &&
12460 PromOp
.getOperand(0).getValueType() != N
->getValueType(0)) ||
12461 (SelectTruncOp
[1].count(PromOp
.getNode()) &&
12462 PromOp
.getOperand(1).getValueType() != N
->getValueType(0))) {
12463 PromOpHandles
.emplace_front(PromOp
);
12468 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
12469 PromOp
.getNode()->op_end());
12471 // If this node has constant inputs, then they'll need to be promoted here.
12472 for (unsigned i
= 0; i
< 2; ++i
) {
12473 if (!isa
<ConstantSDNode
>(Ops
[C
+i
]))
12475 if (Ops
[C
+i
].getValueType() == N
->getValueType(0))
12478 if (N
->getOpcode() == ISD::SIGN_EXTEND
)
12479 Ops
[C
+i
] = DAG
.getSExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
12480 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
12481 Ops
[C
+i
] = DAG
.getZExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
12483 Ops
[C
+i
] = DAG
.getAnyExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
12486 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
12487 // truncate them again to the original value type.
12488 if (PromOp
.getOpcode() == ISD::SELECT
||
12489 PromOp
.getOpcode() == ISD::SELECT_CC
) {
12490 auto SI0
= SelectTruncOp
[0].find(PromOp
.getNode());
12491 if (SI0
!= SelectTruncOp
[0].end())
12492 Ops
[0] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI0
->second
, Ops
[0]);
12493 auto SI1
= SelectTruncOp
[1].find(PromOp
.getNode());
12494 if (SI1
!= SelectTruncOp
[1].end())
12495 Ops
[1] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI1
->second
, Ops
[1]);
12498 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
12499 DAG
.getNode(PromOp
.getOpcode(), dl
, N
->getValueType(0), Ops
));
12502 // Now we're left with the initial extension itself.
12503 if (!ReallyNeedsExt
)
12504 return N
->getOperand(0);
12506 // To zero extend, just mask off everything except for the first bit (in the
12508 if (N
->getOpcode() == ISD::ZERO_EXTEND
)
12509 return DAG
.getNode(ISD::AND
, dl
, N
->getValueType(0), N
->getOperand(0),
12510 DAG
.getConstant(APInt::getLowBitsSet(
12511 N
->getValueSizeInBits(0), PromBits
),
12512 dl
, N
->getValueType(0)));
12514 assert(N
->getOpcode() == ISD::SIGN_EXTEND
&&
12515 "Invalid extension type");
12516 EVT ShiftAmountTy
= getShiftAmountTy(N
->getValueType(0), DAG
.getDataLayout());
12518 DAG
.getConstant(N
->getValueSizeInBits(0) - PromBits
, dl
, ShiftAmountTy
);
12519 return DAG
.getNode(
12520 ISD::SRA
, dl
, N
->getValueType(0),
12521 DAG
.getNode(ISD::SHL
, dl
, N
->getValueType(0), N
->getOperand(0), ShiftCst
),
12525 SDValue
PPCTargetLowering::combineSetCC(SDNode
*N
,
12526 DAGCombinerInfo
&DCI
) const {
12527 assert(N
->getOpcode() == ISD::SETCC
&&
12528 "Should be called with a SETCC node");
12530 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
12531 if (CC
== ISD::SETNE
|| CC
== ISD::SETEQ
) {
12532 SDValue LHS
= N
->getOperand(0);
12533 SDValue RHS
= N
->getOperand(1);
12535 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
12536 if (LHS
.getOpcode() == ISD::SUB
&& isNullConstant(LHS
.getOperand(0)) &&
12538 std::swap(LHS
, RHS
);
12540 // x == 0-y --> x+y == 0
12541 // x != 0-y --> x+y != 0
12542 if (RHS
.getOpcode() == ISD::SUB
&& isNullConstant(RHS
.getOperand(0)) &&
12545 SelectionDAG
&DAG
= DCI
.DAG
;
12546 EVT VT
= N
->getValueType(0);
12547 EVT OpVT
= LHS
.getValueType();
12548 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, OpVT
, LHS
, RHS
.getOperand(1));
12549 return DAG
.getSetCC(DL
, VT
, Add
, DAG
.getConstant(0, DL
, OpVT
), CC
);
12553 return DAGCombineTruncBoolExt(N
, DCI
);
12556 // Is this an extending load from an f32 to an f64?
12557 static bool isFPExtLoad(SDValue Op
) {
12558 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
.getNode()))
12559 return LD
->getExtensionType() == ISD::EXTLOAD
&&
12560 Op
.getValueType() == MVT::f64
;
12564 /// Reduces the number of fp-to-int conversion when building a vector.
12566 /// If this vector is built out of floating to integer conversions,
12567 /// transform it to a vector built out of floating point values followed by a
12568 /// single floating to integer conversion of the vector.
12569 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
12570 /// becomes (fptosi (build_vector ($A, $B, ...)))
12571 SDValue
PPCTargetLowering::
12572 combineElementTruncationToVectorTruncation(SDNode
*N
,
12573 DAGCombinerInfo
&DCI
) const {
12574 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12575 "Should be called with a BUILD_VECTOR node");
12577 SelectionDAG
&DAG
= DCI
.DAG
;
12580 SDValue FirstInput
= N
->getOperand(0);
12581 assert(FirstInput
.getOpcode() == PPCISD::MFVSR
&&
12582 "The input operand must be an fp-to-int conversion.");
12584 // This combine happens after legalization so the fp_to_[su]i nodes are
12585 // already converted to PPCSISD nodes.
12586 unsigned FirstConversion
= FirstInput
.getOperand(0).getOpcode();
12587 if (FirstConversion
== PPCISD::FCTIDZ
||
12588 FirstConversion
== PPCISD::FCTIDUZ
||
12589 FirstConversion
== PPCISD::FCTIWZ
||
12590 FirstConversion
== PPCISD::FCTIWUZ
) {
12591 bool IsSplat
= true;
12592 bool Is32Bit
= FirstConversion
== PPCISD::FCTIWZ
||
12593 FirstConversion
== PPCISD::FCTIWUZ
;
12594 EVT SrcVT
= FirstInput
.getOperand(0).getValueType();
12595 SmallVector
<SDValue
, 4> Ops
;
12596 EVT TargetVT
= N
->getValueType(0);
12597 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
12598 SDValue NextOp
= N
->getOperand(i
);
12599 if (NextOp
.getOpcode() != PPCISD::MFVSR
)
12601 unsigned NextConversion
= NextOp
.getOperand(0).getOpcode();
12602 if (NextConversion
!= FirstConversion
)
12604 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
12605 // This is not valid if the input was originally double precision. It is
12606 // also not profitable to do unless this is an extending load in which
12607 // case doing this combine will allow us to combine consecutive loads.
12608 if (Is32Bit
&& !isFPExtLoad(NextOp
.getOperand(0).getOperand(0)))
12610 if (N
->getOperand(i
) != FirstInput
)
12614 // If this is a splat, we leave it as-is since there will be only a single
12615 // fp-to-int conversion followed by a splat of the integer. This is better
12616 // for 32-bit and smaller ints and neutral for 64-bit ints.
12620 // Now that we know we have the right type of node, get its operands
12621 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
12622 SDValue In
= N
->getOperand(i
).getOperand(0);
12624 // For 32-bit values, we need to add an FP_ROUND node (if we made it
12625 // here, we know that all inputs are extending loads so this is safe).
12627 Ops
.push_back(DAG
.getUNDEF(SrcVT
));
12629 SDValue Trunc
= DAG
.getNode(ISD::FP_ROUND
, dl
,
12630 MVT::f32
, In
.getOperand(0),
12631 DAG
.getIntPtrConstant(1, dl
));
12632 Ops
.push_back(Trunc
);
12635 Ops
.push_back(In
.isUndef() ? DAG
.getUNDEF(SrcVT
) : In
.getOperand(0));
12639 if (FirstConversion
== PPCISD::FCTIDZ
||
12640 FirstConversion
== PPCISD::FCTIWZ
)
12641 Opcode
= ISD::FP_TO_SINT
;
12643 Opcode
= ISD::FP_TO_UINT
;
12645 EVT NewVT
= TargetVT
== MVT::v2i64
? MVT::v2f64
: MVT::v4f32
;
12646 SDValue BV
= DAG
.getBuildVector(NewVT
, dl
, Ops
);
12647 return DAG
.getNode(Opcode
, dl
, TargetVT
, BV
);
12652 /// Reduce the number of loads when building a vector.
12654 /// Building a vector out of multiple loads can be converted to a load
12655 /// of the vector type if the loads are consecutive. If the loads are
12656 /// consecutive but in descending order, a shuffle is added at the end
12657 /// to reorder the vector.
12658 static SDValue
combineBVOfConsecutiveLoads(SDNode
*N
, SelectionDAG
&DAG
) {
12659 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12660 "Should be called with a BUILD_VECTOR node");
12664 // Return early for non byte-sized type, as they can't be consecutive.
12665 if (!N
->getValueType(0).getVectorElementType().isByteSized())
12668 bool InputsAreConsecutiveLoads
= true;
12669 bool InputsAreReverseConsecutive
= true;
12670 unsigned ElemSize
= N
->getValueType(0).getScalarType().getStoreSize();
12671 SDValue FirstInput
= N
->getOperand(0);
12672 bool IsRoundOfExtLoad
= false;
12674 if (FirstInput
.getOpcode() == ISD::FP_ROUND
&&
12675 FirstInput
.getOperand(0).getOpcode() == ISD::LOAD
) {
12676 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(FirstInput
.getOperand(0));
12677 IsRoundOfExtLoad
= LD
->getExtensionType() == ISD::EXTLOAD
;
12679 // Not a build vector of (possibly fp_rounded) loads.
12680 if ((!IsRoundOfExtLoad
&& FirstInput
.getOpcode() != ISD::LOAD
) ||
12681 N
->getNumOperands() == 1)
12684 for (int i
= 1, e
= N
->getNumOperands(); i
< e
; ++i
) {
12685 // If any inputs are fp_round(extload), they all must be.
12686 if (IsRoundOfExtLoad
&& N
->getOperand(i
).getOpcode() != ISD::FP_ROUND
)
12689 SDValue NextInput
= IsRoundOfExtLoad
? N
->getOperand(i
).getOperand(0) :
12691 if (NextInput
.getOpcode() != ISD::LOAD
)
12694 SDValue PreviousInput
=
12695 IsRoundOfExtLoad
? N
->getOperand(i
-1).getOperand(0) : N
->getOperand(i
-1);
12696 LoadSDNode
*LD1
= dyn_cast
<LoadSDNode
>(PreviousInput
);
12697 LoadSDNode
*LD2
= dyn_cast
<LoadSDNode
>(NextInput
);
12699 // If any inputs are fp_round(extload), they all must be.
12700 if (IsRoundOfExtLoad
&& LD2
->getExtensionType() != ISD::EXTLOAD
)
12703 if (!isConsecutiveLS(LD2
, LD1
, ElemSize
, 1, DAG
))
12704 InputsAreConsecutiveLoads
= false;
12705 if (!isConsecutiveLS(LD1
, LD2
, ElemSize
, 1, DAG
))
12706 InputsAreReverseConsecutive
= false;
12708 // Exit early if the loads are neither consecutive nor reverse consecutive.
12709 if (!InputsAreConsecutiveLoads
&& !InputsAreReverseConsecutive
)
12713 assert(!(InputsAreConsecutiveLoads
&& InputsAreReverseConsecutive
) &&
12714 "The loads cannot be both consecutive and reverse consecutive.");
12716 SDValue FirstLoadOp
=
12717 IsRoundOfExtLoad
? FirstInput
.getOperand(0) : FirstInput
;
12718 SDValue LastLoadOp
=
12719 IsRoundOfExtLoad
? N
->getOperand(N
->getNumOperands()-1).getOperand(0) :
12720 N
->getOperand(N
->getNumOperands()-1);
12722 LoadSDNode
*LD1
= dyn_cast
<LoadSDNode
>(FirstLoadOp
);
12723 LoadSDNode
*LDL
= dyn_cast
<LoadSDNode
>(LastLoadOp
);
12724 if (InputsAreConsecutiveLoads
) {
12725 assert(LD1
&& "Input needs to be a LoadSDNode.");
12726 return DAG
.getLoad(N
->getValueType(0), dl
, LD1
->getChain(),
12727 LD1
->getBasePtr(), LD1
->getPointerInfo(),
12728 LD1
->getAlignment());
12730 if (InputsAreReverseConsecutive
) {
12731 assert(LDL
&& "Input needs to be a LoadSDNode.");
12732 SDValue Load
= DAG
.getLoad(N
->getValueType(0), dl
, LDL
->getChain(),
12733 LDL
->getBasePtr(), LDL
->getPointerInfo(),
12734 LDL
->getAlignment());
12735 SmallVector
<int, 16> Ops
;
12736 for (int i
= N
->getNumOperands() - 1; i
>= 0; i
--)
12739 return DAG
.getVectorShuffle(N
->getValueType(0), dl
, Load
,
12740 DAG
.getUNDEF(N
->getValueType(0)), Ops
);
12745 // This function adds the required vector_shuffle needed to get
12746 // the elements of the vector extract in the correct position
12747 // as specified by the CorrectElems encoding.
12748 static SDValue
addShuffleForVecExtend(SDNode
*N
, SelectionDAG
&DAG
,
12749 SDValue Input
, uint64_t Elems
,
12750 uint64_t CorrectElems
) {
12753 unsigned NumElems
= Input
.getValueType().getVectorNumElements();
12754 SmallVector
<int, 16> ShuffleMask(NumElems
, -1);
12756 // Knowing the element indices being extracted from the original
12757 // vector and the order in which they're being inserted, just put
12758 // them at element indices required for the instruction.
12759 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
12760 if (DAG
.getDataLayout().isLittleEndian())
12761 ShuffleMask
[CorrectElems
& 0xF] = Elems
& 0xF;
12763 ShuffleMask
[(CorrectElems
& 0xF0) >> 4] = (Elems
& 0xF0) >> 4;
12764 CorrectElems
= CorrectElems
>> 8;
12765 Elems
= Elems
>> 8;
12769 DAG
.getVectorShuffle(Input
.getValueType(), dl
, Input
,
12770 DAG
.getUNDEF(Input
.getValueType()), ShuffleMask
);
12772 EVT Ty
= N
->getValueType(0);
12773 SDValue BV
= DAG
.getNode(PPCISD::SExtVElems
, dl
, Ty
, Shuffle
);
12777 // Look for build vector patterns where input operands come from sign
12778 // extended vector_extract elements of specific indices. If the correct indices
12779 // aren't used, add a vector shuffle to fix up the indices and create a new
12780 // PPCISD:SExtVElems node which selects the vector sign extend instructions
12781 // during instruction selection.
12782 static SDValue
combineBVOfVecSExt(SDNode
*N
, SelectionDAG
&DAG
) {
12783 // This array encodes the indices that the vector sign extend instructions
12784 // extract from when extending from one type to another for both BE and LE.
12785 // The right nibble of each byte corresponds to the LE incides.
12786 // and the left nibble of each byte corresponds to the BE incides.
12787 // For example: 0x3074B8FC byte->word
12788 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12789 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12790 // For example: 0x000070F8 byte->double word
12791 // For LE: the allowed indices are: 0x0,0x8
12792 // For BE: the allowed indices are: 0x7,0xF
12793 uint64_t TargetElems
[] = {
12794 0x3074B8FC, // b->w
12795 0x000070F8, // b->d
12796 0x10325476, // h->w
12797 0x00003074, // h->d
12798 0x00001032, // w->d
12801 uint64_t Elems
= 0;
12805 auto isSExtOfVecExtract
= [&](SDValue Op
) -> bool {
12808 if (Op
.getOpcode() != ISD::SIGN_EXTEND
&&
12809 Op
.getOpcode() != ISD::SIGN_EXTEND_INREG
)
12812 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12813 // of the right width.
12814 SDValue Extract
= Op
.getOperand(0);
12815 if (Extract
.getOpcode() == ISD::ANY_EXTEND
)
12816 Extract
= Extract
.getOperand(0);
12817 if (Extract
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
12820 ConstantSDNode
*ExtOp
= dyn_cast
<ConstantSDNode
>(Extract
.getOperand(1));
12824 Index
= ExtOp
->getZExtValue();
12825 if (Input
&& Input
!= Extract
.getOperand(0))
12829 Input
= Extract
.getOperand(0);
12831 Elems
= Elems
<< 8;
12832 Index
= DAG
.getDataLayout().isLittleEndian() ? Index
: Index
<< 4;
12838 // If the build vector operands aren't sign extended vector extracts,
12839 // of the same input vector, then return.
12840 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
12841 if (!isSExtOfVecExtract(N
->getOperand(i
))) {
12846 // If the vector extract indicies are not correct, add the appropriate
12848 int TgtElemArrayIdx
;
12849 int InputSize
= Input
.getValueType().getScalarSizeInBits();
12850 int OutputSize
= N
->getValueType(0).getScalarSizeInBits();
12851 if (InputSize
+ OutputSize
== 40)
12852 TgtElemArrayIdx
= 0;
12853 else if (InputSize
+ OutputSize
== 72)
12854 TgtElemArrayIdx
= 1;
12855 else if (InputSize
+ OutputSize
== 48)
12856 TgtElemArrayIdx
= 2;
12857 else if (InputSize
+ OutputSize
== 80)
12858 TgtElemArrayIdx
= 3;
12859 else if (InputSize
+ OutputSize
== 96)
12860 TgtElemArrayIdx
= 4;
12864 uint64_t CorrectElems
= TargetElems
[TgtElemArrayIdx
];
12865 CorrectElems
= DAG
.getDataLayout().isLittleEndian()
12866 ? CorrectElems
& 0x0F0F0F0F0F0F0F0F
12867 : CorrectElems
& 0xF0F0F0F0F0F0F0F0;
12868 if (Elems
!= CorrectElems
) {
12869 return addShuffleForVecExtend(N
, DAG
, Input
, Elems
, CorrectElems
);
12872 // Regular lowering will catch cases where a shuffle is not needed.
12876 SDValue
PPCTargetLowering::DAGCombineBuildVector(SDNode
*N
,
12877 DAGCombinerInfo
&DCI
) const {
12878 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12879 "Should be called with a BUILD_VECTOR node");
12881 SelectionDAG
&DAG
= DCI
.DAG
;
12884 if (!Subtarget
.hasVSX())
12887 // The target independent DAG combiner will leave a build_vector of
12888 // float-to-int conversions intact. We can generate MUCH better code for
12889 // a float-to-int conversion of a vector of floats.
12890 SDValue FirstInput
= N
->getOperand(0);
12891 if (FirstInput
.getOpcode() == PPCISD::MFVSR
) {
12892 SDValue Reduced
= combineElementTruncationToVectorTruncation(N
, DCI
);
12897 // If we're building a vector out of consecutive loads, just load that
12899 SDValue Reduced
= combineBVOfConsecutiveLoads(N
, DAG
);
12903 // If we're building a vector out of extended elements from another vector
12904 // we have P9 vector integer extend instructions. The code assumes legal
12905 // input types (i.e. it can't handle things like v4i16) so do not run before
12907 if (Subtarget
.hasP9Altivec() && !DCI
.isBeforeLegalize()) {
12908 Reduced
= combineBVOfVecSExt(N
, DAG
);
12914 if (N
->getValueType(0) != MVT::v2f64
)
12918 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12919 if (FirstInput
.getOpcode() != ISD::SINT_TO_FP
&&
12920 FirstInput
.getOpcode() != ISD::UINT_TO_FP
)
12922 if (N
->getOperand(1).getOpcode() != ISD::SINT_TO_FP
&&
12923 N
->getOperand(1).getOpcode() != ISD::UINT_TO_FP
)
12925 if (FirstInput
.getOpcode() != N
->getOperand(1).getOpcode())
12928 SDValue Ext1
= FirstInput
.getOperand(0);
12929 SDValue Ext2
= N
->getOperand(1).getOperand(0);
12930 if(Ext1
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
12931 Ext2
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
12934 ConstantSDNode
*Ext1Op
= dyn_cast
<ConstantSDNode
>(Ext1
.getOperand(1));
12935 ConstantSDNode
*Ext2Op
= dyn_cast
<ConstantSDNode
>(Ext2
.getOperand(1));
12936 if (!Ext1Op
|| !Ext2Op
)
12938 if (Ext1
.getOperand(0).getValueType() != MVT::v4i32
||
12939 Ext1
.getOperand(0) != Ext2
.getOperand(0))
12942 int FirstElem
= Ext1Op
->getZExtValue();
12943 int SecondElem
= Ext2Op
->getZExtValue();
12945 if (FirstElem
== 0 && SecondElem
== 1)
12946 SubvecIdx
= Subtarget
.isLittleEndian() ? 1 : 0;
12947 else if (FirstElem
== 2 && SecondElem
== 3)
12948 SubvecIdx
= Subtarget
.isLittleEndian() ? 0 : 1;
12952 SDValue SrcVec
= Ext1
.getOperand(0);
12953 auto NodeType
= (N
->getOperand(1).getOpcode() == ISD::SINT_TO_FP
) ?
12954 PPCISD::SINT_VEC_TO_FP
: PPCISD::UINT_VEC_TO_FP
;
12955 return DAG
.getNode(NodeType
, dl
, MVT::v2f64
,
12956 SrcVec
, DAG
.getIntPtrConstant(SubvecIdx
, dl
));
12959 SDValue
PPCTargetLowering::combineFPToIntToFP(SDNode
*N
,
12960 DAGCombinerInfo
&DCI
) const {
12961 assert((N
->getOpcode() == ISD::SINT_TO_FP
||
12962 N
->getOpcode() == ISD::UINT_TO_FP
) &&
12963 "Need an int -> FP conversion node here");
12965 if (useSoftFloat() || !Subtarget
.has64BitSupport())
12968 SelectionDAG
&DAG
= DCI
.DAG
;
12972 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12973 // from the hardware.
12974 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
12976 if (Op
.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1
) ||
12977 Op
.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64
))
12980 SDValue
FirstOperand(Op
.getOperand(0));
12981 bool SubWordLoad
= FirstOperand
.getOpcode() == ISD::LOAD
&&
12982 (FirstOperand
.getValueType() == MVT::i8
||
12983 FirstOperand
.getValueType() == MVT::i16
);
12984 if (Subtarget
.hasP9Vector() && Subtarget
.hasP9Altivec() && SubWordLoad
) {
12985 bool Signed
= N
->getOpcode() == ISD::SINT_TO_FP
;
12986 bool DstDouble
= Op
.getValueType() == MVT::f64
;
12987 unsigned ConvOp
= Signed
?
12988 (DstDouble
? PPCISD::FCFID
: PPCISD::FCFIDS
) :
12989 (DstDouble
? PPCISD::FCFIDU
: PPCISD::FCFIDUS
);
12990 SDValue WidthConst
=
12991 DAG
.getIntPtrConstant(FirstOperand
.getValueType() == MVT::i8
? 1 : 2,
12993 LoadSDNode
*LDN
= cast
<LoadSDNode
>(FirstOperand
.getNode());
12994 SDValue Ops
[] = { LDN
->getChain(), LDN
->getBasePtr(), WidthConst
};
12995 SDValue Ld
= DAG
.getMemIntrinsicNode(PPCISD::LXSIZX
, dl
,
12996 DAG
.getVTList(MVT::f64
, MVT::Other
),
12997 Ops
, MVT::i8
, LDN
->getMemOperand());
12999 // For signed conversion, we need to sign-extend the value in the VSR
13001 SDValue ExtOps
[] = { Ld
, WidthConst
};
13002 SDValue Ext
= DAG
.getNode(PPCISD::VEXTS
, dl
, MVT::f64
, ExtOps
);
13003 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ext
);
13005 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ld
);
13009 // For i32 intermediate values, unfortunately, the conversion functions
13010 // leave the upper 32 bits of the value are undefined. Within the set of
13011 // scalar instructions, we have no method for zero- or sign-extending the
13012 // value. Thus, we cannot handle i32 intermediate values here.
13013 if (Op
.getOperand(0).getValueType() == MVT::i32
)
13016 assert((Op
.getOpcode() == ISD::SINT_TO_FP
|| Subtarget
.hasFPCVT()) &&
13017 "UINT_TO_FP is supported only with FPCVT");
13019 // If we have FCFIDS, then use it when converting to single-precision.
13020 // Otherwise, convert to double-precision and then round.
13021 unsigned FCFOp
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
13022 ? (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDUS
13024 : (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDU
13026 MVT FCFTy
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
13030 // If we're converting from a float, to an int, and back to a float again,
13031 // then we don't need the store/load pair at all.
13032 if ((Op
.getOperand(0).getOpcode() == ISD::FP_TO_UINT
&&
13033 Subtarget
.hasFPCVT()) ||
13034 (Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
)) {
13035 SDValue Src
= Op
.getOperand(0).getOperand(0);
13036 if (Src
.getValueType() == MVT::f32
) {
13037 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
13038 DCI
.AddToWorklist(Src
.getNode());
13039 } else if (Src
.getValueType() != MVT::f64
) {
13040 // Make sure that we don't pick up a ppc_fp128 source value.
13045 Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
13048 SDValue Tmp
= DAG
.getNode(FCTOp
, dl
, MVT::f64
, Src
);
13049 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Tmp
);
13051 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT()) {
13052 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
,
13053 MVT::f32
, FP
, DAG
.getIntPtrConstant(0, dl
));
13054 DCI
.AddToWorklist(FP
.getNode());
13063 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
13064 // builtins) into loads with swaps.
13065 SDValue
PPCTargetLowering::expandVSXLoadForLE(SDNode
*N
,
13066 DAGCombinerInfo
&DCI
) const {
13067 SelectionDAG
&DAG
= DCI
.DAG
;
13071 MachineMemOperand
*MMO
;
13073 switch (N
->getOpcode()) {
13075 llvm_unreachable("Unexpected opcode for little endian VSX load");
13077 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
13078 Chain
= LD
->getChain();
13079 Base
= LD
->getBasePtr();
13080 MMO
= LD
->getMemOperand();
13081 // If the MMO suggests this isn't a load of a full vector, leave
13082 // things alone. For a built-in, we have to make the change for
13083 // correctness, so if there is a size problem that will be a bug.
13084 if (MMO
->getSize() < 16)
13088 case ISD::INTRINSIC_W_CHAIN
: {
13089 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
13090 Chain
= Intrin
->getChain();
13091 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
13092 // us what we want. Get operand 2 instead.
13093 Base
= Intrin
->getOperand(2);
13094 MMO
= Intrin
->getMemOperand();
13099 MVT VecTy
= N
->getValueType(0).getSimpleVT();
13101 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
13102 // aligned and the type is a vector with elements up to 4 bytes
13103 if (Subtarget
.needsSwapsForVSXMemOps() && !(MMO
->getAlignment()%16)
13104 && VecTy
.getScalarSizeInBits() <= 32 ) {
13108 SDValue LoadOps
[] = { Chain
, Base
};
13109 SDValue Load
= DAG
.getMemIntrinsicNode(PPCISD::LXVD2X
, dl
,
13110 DAG
.getVTList(MVT::v2f64
, MVT::Other
),
13111 LoadOps
, MVT::v2f64
, MMO
);
13113 DCI
.AddToWorklist(Load
.getNode());
13114 Chain
= Load
.getValue(1);
13115 SDValue Swap
= DAG
.getNode(
13116 PPCISD::XXSWAPD
, dl
, DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Load
);
13117 DCI
.AddToWorklist(Swap
.getNode());
13119 // Add a bitcast if the resulting load type doesn't match v2f64.
13120 if (VecTy
!= MVT::v2f64
) {
13121 SDValue N
= DAG
.getNode(ISD::BITCAST
, dl
, VecTy
, Swap
);
13122 DCI
.AddToWorklist(N
.getNode());
13123 // Package {bitcast value, swap's chain} to match Load's shape.
13124 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, DAG
.getVTList(VecTy
, MVT::Other
),
13125 N
, Swap
.getValue(1));
13131 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13132 // builtins) into stores with swaps.
13133 SDValue
PPCTargetLowering::expandVSXStoreForLE(SDNode
*N
,
13134 DAGCombinerInfo
&DCI
) const {
13135 SelectionDAG
&DAG
= DCI
.DAG
;
13140 MachineMemOperand
*MMO
;
13142 switch (N
->getOpcode()) {
13144 llvm_unreachable("Unexpected opcode for little endian VSX store");
13146 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
13147 Chain
= ST
->getChain();
13148 Base
= ST
->getBasePtr();
13149 MMO
= ST
->getMemOperand();
13151 // If the MMO suggests this isn't a store of a full vector, leave
13152 // things alone. For a built-in, we have to make the change for
13153 // correctness, so if there is a size problem that will be a bug.
13154 if (MMO
->getSize() < 16)
13158 case ISD::INTRINSIC_VOID
: {
13159 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
13160 Chain
= Intrin
->getChain();
13161 // Intrin->getBasePtr() oddly does not get what we want.
13162 Base
= Intrin
->getOperand(3);
13163 MMO
= Intrin
->getMemOperand();
13169 SDValue Src
= N
->getOperand(SrcOpnd
);
13170 MVT VecTy
= Src
.getValueType().getSimpleVT();
13172 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13173 // aligned and the type is a vector with elements up to 4 bytes
13174 if (Subtarget
.needsSwapsForVSXMemOps() && !(MMO
->getAlignment()%16)
13175 && VecTy
.getScalarSizeInBits() <= 32 ) {
13179 // All stores are done as v2f64 and possible bit cast.
13180 if (VecTy
!= MVT::v2f64
) {
13181 Src
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, Src
);
13182 DCI
.AddToWorklist(Src
.getNode());
13185 SDValue Swap
= DAG
.getNode(PPCISD::XXSWAPD
, dl
,
13186 DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Src
);
13187 DCI
.AddToWorklist(Swap
.getNode());
13188 Chain
= Swap
.getValue(1);
13189 SDValue StoreOps
[] = { Chain
, Swap
, Base
};
13190 SDValue Store
= DAG
.getMemIntrinsicNode(PPCISD::STXVD2X
, dl
,
13191 DAG
.getVTList(MVT::Other
),
13192 StoreOps
, VecTy
, MMO
);
13193 DCI
.AddToWorklist(Store
.getNode());
13197 // Handle DAG combine for STORE (FP_TO_INT F).
13198 SDValue
PPCTargetLowering::combineStoreFPToInt(SDNode
*N
,
13199 DAGCombinerInfo
&DCI
) const {
13201 SelectionDAG
&DAG
= DCI
.DAG
;
13203 unsigned Opcode
= N
->getOperand(1).getOpcode();
13205 assert((Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
)
13206 && "Not a FP_TO_INT Instruction!");
13208 SDValue Val
= N
->getOperand(1).getOperand(0);
13209 EVT Op1VT
= N
->getOperand(1).getValueType();
13210 EVT ResVT
= Val
.getValueType();
13212 // Floating point types smaller than 32 bits are not legal on Power.
13213 if (ResVT
.getScalarSizeInBits() < 32)
13216 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13217 bool ValidTypeForStoreFltAsInt
=
13218 (Op1VT
== MVT::i32
|| Op1VT
== MVT::i64
||
13219 (Subtarget
.hasP9Vector() && (Op1VT
== MVT::i16
|| Op1VT
== MVT::i8
)));
13221 if (ResVT
== MVT::ppcf128
|| !Subtarget
.hasP8Altivec() ||
13222 cast
<StoreSDNode
>(N
)->isTruncatingStore() || !ValidTypeForStoreFltAsInt
)
13225 // Extend f32 values to f64
13226 if (ResVT
.getScalarSizeInBits() == 32) {
13227 Val
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Val
);
13228 DCI
.AddToWorklist(Val
.getNode());
13231 // Set signed or unsigned conversion opcode.
13232 unsigned ConvOpcode
= (Opcode
== ISD::FP_TO_SINT
) ?
13233 PPCISD::FP_TO_SINT_IN_VSR
:
13234 PPCISD::FP_TO_UINT_IN_VSR
;
13236 Val
= DAG
.getNode(ConvOpcode
,
13237 dl
, ResVT
== MVT::f128
? MVT::f128
: MVT::f64
, Val
);
13238 DCI
.AddToWorklist(Val
.getNode());
13240 // Set number of bytes being converted.
13241 unsigned ByteSize
= Op1VT
.getScalarSizeInBits() / 8;
13242 SDValue Ops
[] = { N
->getOperand(0), Val
, N
->getOperand(2),
13243 DAG
.getIntPtrConstant(ByteSize
, dl
, false),
13244 DAG
.getValueType(Op1VT
) };
13246 Val
= DAG
.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT
, dl
,
13247 DAG
.getVTList(MVT::Other
), Ops
,
13248 cast
<StoreSDNode
>(N
)->getMemoryVT(),
13249 cast
<StoreSDNode
>(N
)->getMemOperand());
13251 DCI
.AddToWorklist(Val
.getNode());
13255 SDValue
PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode
*SVN
,
13256 LSBaseSDNode
*LSBase
,
13257 DAGCombinerInfo
&DCI
) const {
13258 assert((ISD::isNormalLoad(LSBase
) || ISD::isNormalStore(LSBase
)) &&
13259 "Not a reverse memop pattern!");
13261 auto IsElementReverse
= [](const ShuffleVectorSDNode
*SVN
) -> bool {
13262 auto Mask
= SVN
->getMask();
13264 auto I
= Mask
.rbegin();
13265 auto E
= Mask
.rend();
13267 for (; I
!= E
; ++I
) {
13275 SelectionDAG
&DAG
= DCI
.DAG
;
13276 EVT VT
= SVN
->getValueType(0);
13278 if (!isTypeLegal(VT
) || !Subtarget
.isLittleEndian() || !Subtarget
.hasVSX())
13281 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
13282 // See comment in PPCVSXSwapRemoval.cpp.
13283 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
13284 if (!Subtarget
.hasP9Vector())
13287 if(!IsElementReverse(SVN
))
13290 if (LSBase
->getOpcode() == ISD::LOAD
) {
13292 SDValue LoadOps
[] = {LSBase
->getChain(), LSBase
->getBasePtr()};
13293 return DAG
.getMemIntrinsicNode(
13294 PPCISD::LOAD_VEC_BE
, dl
, DAG
.getVTList(VT
, MVT::Other
), LoadOps
,
13295 LSBase
->getMemoryVT(), LSBase
->getMemOperand());
13298 if (LSBase
->getOpcode() == ISD::STORE
) {
13300 SDValue StoreOps
[] = {LSBase
->getChain(), SVN
->getOperand(0),
13301 LSBase
->getBasePtr()};
13302 return DAG
.getMemIntrinsicNode(
13303 PPCISD::STORE_VEC_BE
, dl
, DAG
.getVTList(MVT::Other
), StoreOps
,
13304 LSBase
->getMemoryVT(), LSBase
->getMemOperand());
13307 llvm_unreachable("Expected a load or store node here");
13310 SDValue
PPCTargetLowering::PerformDAGCombine(SDNode
*N
,
13311 DAGCombinerInfo
&DCI
) const {
13312 SelectionDAG
&DAG
= DCI
.DAG
;
13314 switch (N
->getOpcode()) {
13317 return combineADD(N
, DCI
);
13319 return combineSHL(N
, DCI
);
13321 return combineSRA(N
, DCI
);
13323 return combineSRL(N
, DCI
);
13325 return combineMUL(N
, DCI
);
13327 if (isNullConstant(N
->getOperand(0))) // 0 << V -> 0.
13328 return N
->getOperand(0);
13331 if (isNullConstant(N
->getOperand(0))) // 0 >>u V -> 0.
13332 return N
->getOperand(0);
13335 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(0))) {
13336 if (C
->isNullValue() || // 0 >>s V -> 0.
13337 C
->isAllOnesValue()) // -1 >>s V -> -1.
13338 return N
->getOperand(0);
13341 case ISD::SIGN_EXTEND
:
13342 case ISD::ZERO_EXTEND
:
13343 case ISD::ANY_EXTEND
:
13344 return DAGCombineExtBoolTrunc(N
, DCI
);
13345 case ISD::TRUNCATE
:
13346 return combineTRUNCATE(N
, DCI
);
13348 if (SDValue CSCC
= combineSetCC(N
, DCI
))
13351 case ISD::SELECT_CC
:
13352 return DAGCombineTruncBoolExt(N
, DCI
);
13353 case ISD::SINT_TO_FP
:
13354 case ISD::UINT_TO_FP
:
13355 return combineFPToIntToFP(N
, DCI
);
13356 case ISD::VECTOR_SHUFFLE
:
13357 if (ISD::isNormalLoad(N
->getOperand(0).getNode())) {
13358 LSBaseSDNode
* LSBase
= cast
<LSBaseSDNode
>(N
->getOperand(0));
13359 return combineVReverseMemOP(cast
<ShuffleVectorSDNode
>(N
), LSBase
, DCI
);
13364 EVT Op1VT
= N
->getOperand(1).getValueType();
13365 unsigned Opcode
= N
->getOperand(1).getOpcode();
13367 if (Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
) {
13368 SDValue Val
= combineStoreFPToInt(N
, DCI
);
13373 if (Opcode
== ISD::VECTOR_SHUFFLE
&& ISD::isNormalStore(N
)) {
13374 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
->getOperand(1));
13375 SDValue Val
= combineVReverseMemOP(SVN
, cast
<LSBaseSDNode
>(N
), DCI
);
13380 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
13381 if (cast
<StoreSDNode
>(N
)->isUnindexed() && Opcode
== ISD::BSWAP
&&
13382 N
->getOperand(1).getNode()->hasOneUse() &&
13383 (Op1VT
== MVT::i32
|| Op1VT
== MVT::i16
||
13384 (Subtarget
.hasLDBRX() && Subtarget
.isPPC64() && Op1VT
== MVT::i64
))) {
13386 // STBRX can only handle simple types and it makes no sense to store less
13387 // two bytes in byte-reversed order.
13388 EVT mVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
13389 if (mVT
.isExtended() || mVT
.getSizeInBits() < 16)
13392 SDValue BSwapOp
= N
->getOperand(1).getOperand(0);
13393 // Do an any-extend to 32-bits if this is a half-word input.
13394 if (BSwapOp
.getValueType() == MVT::i16
)
13395 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, BSwapOp
);
13397 // If the type of BSWAP operand is wider than stored memory width
13398 // it need to be shifted to the right side before STBRX.
13399 if (Op1VT
.bitsGT(mVT
)) {
13400 int Shift
= Op1VT
.getSizeInBits() - mVT
.getSizeInBits();
13401 BSwapOp
= DAG
.getNode(ISD::SRL
, dl
, Op1VT
, BSwapOp
,
13402 DAG
.getConstant(Shift
, dl
, MVT::i32
));
13403 // Need to truncate if this is a bswap of i64 stored as i32/i16.
13404 if (Op1VT
== MVT::i64
)
13405 BSwapOp
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, BSwapOp
);
13409 N
->getOperand(0), BSwapOp
, N
->getOperand(2), DAG
.getValueType(mVT
)
13412 DAG
.getMemIntrinsicNode(PPCISD::STBRX
, dl
, DAG
.getVTList(MVT::Other
),
13413 Ops
, cast
<StoreSDNode
>(N
)->getMemoryVT(),
13414 cast
<StoreSDNode
>(N
)->getMemOperand());
13417 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
13418 // So it can increase the chance of CSE constant construction.
13419 if (Subtarget
.isPPC64() && !DCI
.isBeforeLegalize() &&
13420 isa
<ConstantSDNode
>(N
->getOperand(1)) && Op1VT
== MVT::i32
) {
13421 // Need to sign-extended to 64-bits to handle negative values.
13422 EVT MemVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
13423 uint64_t Val64
= SignExtend64(N
->getConstantOperandVal(1),
13424 MemVT
.getSizeInBits());
13425 SDValue Const64
= DAG
.getConstant(Val64
, dl
, MVT::i64
);
13427 // DAG.getTruncStore() can't be used here because it doesn't accept
13428 // the general (base + offset) addressing mode.
13429 // So we use UpdateNodeOperands and setTruncatingStore instead.
13430 DAG
.UpdateNodeOperands(N
, N
->getOperand(0), Const64
, N
->getOperand(2),
13432 cast
<StoreSDNode
>(N
)->setTruncatingStore(true);
13433 return SDValue(N
, 0);
13436 // For little endian, VSX stores require generating xxswapd/lxvd2x.
13437 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13438 if (Op1VT
.isSimple()) {
13439 MVT StoreVT
= Op1VT
.getSimpleVT();
13440 if (Subtarget
.needsSwapsForVSXMemOps() &&
13441 (StoreVT
== MVT::v2f64
|| StoreVT
== MVT::v2i64
||
13442 StoreVT
== MVT::v4f32
|| StoreVT
== MVT::v4i32
))
13443 return expandVSXStoreForLE(N
, DCI
);
13448 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
13449 EVT VT
= LD
->getValueType(0);
13451 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13452 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13453 if (VT
.isSimple()) {
13454 MVT LoadVT
= VT
.getSimpleVT();
13455 if (Subtarget
.needsSwapsForVSXMemOps() &&
13456 (LoadVT
== MVT::v2f64
|| LoadVT
== MVT::v2i64
||
13457 LoadVT
== MVT::v4f32
|| LoadVT
== MVT::v4i32
))
13458 return expandVSXLoadForLE(N
, DCI
);
13461 // We sometimes end up with a 64-bit integer load, from which we extract
13462 // two single-precision floating-point numbers. This happens with
13463 // std::complex<float>, and other similar structures, because of the way we
13464 // canonicalize structure copies. However, if we lack direct moves,
13465 // then the final bitcasts from the extracted integer values to the
13466 // floating-point numbers turn into store/load pairs. Even with direct moves,
13467 // just loading the two floating-point numbers is likely better.
13468 auto ReplaceTwoFloatLoad
= [&]() {
13469 if (VT
!= MVT::i64
)
13472 if (LD
->getExtensionType() != ISD::NON_EXTLOAD
||
13476 // We're looking for a sequence like this:
13477 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
13478 // t16: i64 = srl t13, Constant:i32<32>
13479 // t17: i32 = truncate t16
13480 // t18: f32 = bitcast t17
13481 // t19: i32 = truncate t13
13482 // t20: f32 = bitcast t19
13484 if (!LD
->hasNUsesOfValue(2, 0))
13487 auto UI
= LD
->use_begin();
13488 while (UI
.getUse().getResNo() != 0) ++UI
;
13489 SDNode
*Trunc
= *UI
++;
13490 while (UI
.getUse().getResNo() != 0) ++UI
;
13491 SDNode
*RightShift
= *UI
;
13492 if (Trunc
->getOpcode() != ISD::TRUNCATE
)
13493 std::swap(Trunc
, RightShift
);
13495 if (Trunc
->getOpcode() != ISD::TRUNCATE
||
13496 Trunc
->getValueType(0) != MVT::i32
||
13497 !Trunc
->hasOneUse())
13499 if (RightShift
->getOpcode() != ISD::SRL
||
13500 !isa
<ConstantSDNode
>(RightShift
->getOperand(1)) ||
13501 RightShift
->getConstantOperandVal(1) != 32 ||
13502 !RightShift
->hasOneUse())
13505 SDNode
*Trunc2
= *RightShift
->use_begin();
13506 if (Trunc2
->getOpcode() != ISD::TRUNCATE
||
13507 Trunc2
->getValueType(0) != MVT::i32
||
13508 !Trunc2
->hasOneUse())
13511 SDNode
*Bitcast
= *Trunc
->use_begin();
13512 SDNode
*Bitcast2
= *Trunc2
->use_begin();
13514 if (Bitcast
->getOpcode() != ISD::BITCAST
||
13515 Bitcast
->getValueType(0) != MVT::f32
)
13517 if (Bitcast2
->getOpcode() != ISD::BITCAST
||
13518 Bitcast2
->getValueType(0) != MVT::f32
)
13521 if (Subtarget
.isLittleEndian())
13522 std::swap(Bitcast
, Bitcast2
);
13524 // Bitcast has the second float (in memory-layout order) and Bitcast2
13525 // has the first one.
13527 SDValue BasePtr
= LD
->getBasePtr();
13528 if (LD
->isIndexed()) {
13529 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
13530 "Non-pre-inc AM on PPC?");
13532 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
13537 LD
->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile
;
13538 SDValue FloatLoad
= DAG
.getLoad(MVT::f32
, dl
, LD
->getChain(), BasePtr
,
13539 LD
->getPointerInfo(), LD
->getAlignment(),
13540 MMOFlags
, LD
->getAAInfo());
13542 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(),
13543 BasePtr
, DAG
.getIntPtrConstant(4, dl
));
13544 SDValue FloatLoad2
= DAG
.getLoad(
13545 MVT::f32
, dl
, SDValue(FloatLoad
.getNode(), 1), AddPtr
,
13546 LD
->getPointerInfo().getWithOffset(4),
13547 MinAlign(LD
->getAlignment(), 4), MMOFlags
, LD
->getAAInfo());
13549 if (LD
->isIndexed()) {
13550 // Note that DAGCombine should re-form any pre-increment load(s) from
13551 // what is produced here if that makes sense.
13552 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, 1), BasePtr
);
13555 DCI
.CombineTo(Bitcast2
, FloatLoad
);
13556 DCI
.CombineTo(Bitcast
, FloatLoad2
);
13558 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, LD
->isIndexed() ? 2 : 1),
13559 SDValue(FloatLoad2
.getNode(), 1));
13563 if (ReplaceTwoFloatLoad())
13564 return SDValue(N
, 0);
13566 EVT MemVT
= LD
->getMemoryVT();
13567 Type
*Ty
= MemVT
.getTypeForEVT(*DAG
.getContext());
13568 unsigned ABIAlignment
= DAG
.getDataLayout().getABITypeAlignment(Ty
);
13569 Type
*STy
= MemVT
.getScalarType().getTypeForEVT(*DAG
.getContext());
13570 unsigned ScalarABIAlignment
= DAG
.getDataLayout().getABITypeAlignment(STy
);
13571 if (LD
->isUnindexed() && VT
.isVector() &&
13572 ((Subtarget
.hasAltivec() && ISD::isNON_EXTLoad(N
) &&
13573 // P8 and later hardware should just use LOAD.
13574 !Subtarget
.hasP8Vector() && (VT
== MVT::v16i8
|| VT
== MVT::v8i16
||
13575 VT
== MVT::v4i32
|| VT
== MVT::v4f32
)) ||
13576 (Subtarget
.hasQPX() && (VT
== MVT::v4f64
|| VT
== MVT::v4f32
) &&
13577 LD
->getAlignment() >= ScalarABIAlignment
)) &&
13578 LD
->getAlignment() < ABIAlignment
) {
13579 // This is a type-legal unaligned Altivec or QPX load.
13580 SDValue Chain
= LD
->getChain();
13581 SDValue Ptr
= LD
->getBasePtr();
13582 bool isLittleEndian
= Subtarget
.isLittleEndian();
13584 // This implements the loading of unaligned vectors as described in
13585 // the venerable Apple Velocity Engine overview. Specifically:
13586 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
13587 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
13589 // The general idea is to expand a sequence of one or more unaligned
13590 // loads into an alignment-based permutation-control instruction (lvsl
13591 // or lvsr), a series of regular vector loads (which always truncate
13592 // their input address to an aligned address), and a series of
13593 // permutations. The results of these permutations are the requested
13594 // loaded values. The trick is that the last "extra" load is not taken
13595 // from the address you might suspect (sizeof(vector) bytes after the
13596 // last requested load), but rather sizeof(vector) - 1 bytes after the
13597 // last requested vector. The point of this is to avoid a page fault if
13598 // the base address happened to be aligned. This works because if the
13599 // base address is aligned, then adding less than a full vector length
13600 // will cause the last vector in the sequence to be (re)loaded.
13601 // Otherwise, the next vector will be fetched as you might suspect was
13604 // We might be able to reuse the permutation generation from
13605 // a different base address offset from this one by an aligned amount.
13606 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
13607 // optimization later.
13608 Intrinsic::ID Intr
, IntrLD
, IntrPerm
;
13609 MVT PermCntlTy
, PermTy
, LDTy
;
13610 if (Subtarget
.hasAltivec()) {
13611 Intr
= isLittleEndian
? Intrinsic::ppc_altivec_lvsr
:
13612 Intrinsic::ppc_altivec_lvsl
;
13613 IntrLD
= Intrinsic::ppc_altivec_lvx
;
13614 IntrPerm
= Intrinsic::ppc_altivec_vperm
;
13615 PermCntlTy
= MVT::v16i8
;
13616 PermTy
= MVT::v4i32
;
13619 Intr
= MemVT
== MVT::v4f64
? Intrinsic::ppc_qpx_qvlpcld
:
13620 Intrinsic::ppc_qpx_qvlpcls
;
13621 IntrLD
= MemVT
== MVT::v4f64
? Intrinsic::ppc_qpx_qvlfd
:
13622 Intrinsic::ppc_qpx_qvlfs
;
13623 IntrPerm
= Intrinsic::ppc_qpx_qvfperm
;
13624 PermCntlTy
= MVT::v4f64
;
13625 PermTy
= MVT::v4f64
;
13626 LDTy
= MemVT
.getSimpleVT();
13629 SDValue PermCntl
= BuildIntrinsicOp(Intr
, Ptr
, DAG
, dl
, PermCntlTy
);
13631 // Create the new MMO for the new base load. It is like the original MMO,
13632 // but represents an area in memory almost twice the vector size centered
13633 // on the original address. If the address is unaligned, we might start
13634 // reading up to (sizeof(vector)-1) bytes below the address of the
13635 // original unaligned load.
13636 MachineFunction
&MF
= DAG
.getMachineFunction();
13637 MachineMemOperand
*BaseMMO
=
13638 MF
.getMachineMemOperand(LD
->getMemOperand(),
13639 -(long)MemVT
.getStoreSize()+1,
13640 2*MemVT
.getStoreSize()-1);
13642 // Create the new base load.
13644 DAG
.getTargetConstant(IntrLD
, dl
, getPointerTy(MF
.getDataLayout()));
13645 SDValue BaseLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
13647 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
13648 DAG
.getVTList(PermTy
, MVT::Other
),
13649 BaseLoadOps
, LDTy
, BaseMMO
);
13651 // Note that the value of IncOffset (which is provided to the next
13652 // load's pointer info offset value, and thus used to calculate the
13653 // alignment), and the value of IncValue (which is actually used to
13654 // increment the pointer value) are different! This is because we
13655 // require the next load to appear to be aligned, even though it
13656 // is actually offset from the base pointer by a lesser amount.
13657 int IncOffset
= VT
.getSizeInBits() / 8;
13658 int IncValue
= IncOffset
;
13660 // Walk (both up and down) the chain looking for another load at the real
13661 // (aligned) offset (the alignment of the other load does not matter in
13662 // this case). If found, then do not use the offset reduction trick, as
13663 // that will prevent the loads from being later combined (as they would
13664 // otherwise be duplicates).
13665 if (!findConsecutiveLoad(LD
, DAG
))
13668 SDValue Increment
=
13669 DAG
.getConstant(IncValue
, dl
, getPointerTy(MF
.getDataLayout()));
13670 Ptr
= DAG
.getNode(ISD::ADD
, dl
, Ptr
.getValueType(), Ptr
, Increment
);
13672 MachineMemOperand
*ExtraMMO
=
13673 MF
.getMachineMemOperand(LD
->getMemOperand(),
13674 1, 2*MemVT
.getStoreSize()-1);
13675 SDValue ExtraLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
13676 SDValue ExtraLoad
=
13677 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
13678 DAG
.getVTList(PermTy
, MVT::Other
),
13679 ExtraLoadOps
, LDTy
, ExtraMMO
);
13681 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
13682 BaseLoad
.getValue(1), ExtraLoad
.getValue(1));
13684 // Because vperm has a big-endian bias, we must reverse the order
13685 // of the input vectors and complement the permute control vector
13686 // when generating little endian code. We have already handled the
13687 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
13688 // and ExtraLoad here.
13690 if (isLittleEndian
)
13691 Perm
= BuildIntrinsicOp(IntrPerm
,
13692 ExtraLoad
, BaseLoad
, PermCntl
, DAG
, dl
);
13694 Perm
= BuildIntrinsicOp(IntrPerm
,
13695 BaseLoad
, ExtraLoad
, PermCntl
, DAG
, dl
);
13698 Perm
= Subtarget
.hasAltivec() ?
13699 DAG
.getNode(ISD::BITCAST
, dl
, VT
, Perm
) :
13700 DAG
.getNode(ISD::FP_ROUND
, dl
, VT
, Perm
, // QPX
13701 DAG
.getTargetConstant(1, dl
, MVT::i64
));
13702 // second argument is 1 because this rounding
13703 // is always exact.
13705 // The output of the permutation is our loaded result, the TokenFactor is
13707 DCI
.CombineTo(N
, Perm
, TF
);
13708 return SDValue(N
, 0);
13712 case ISD::INTRINSIC_WO_CHAIN
: {
13713 bool isLittleEndian
= Subtarget
.isLittleEndian();
13714 unsigned IID
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
13715 Intrinsic::ID Intr
= (isLittleEndian
? Intrinsic::ppc_altivec_lvsr
13716 : Intrinsic::ppc_altivec_lvsl
);
13717 if ((IID
== Intr
||
13718 IID
== Intrinsic::ppc_qpx_qvlpcld
||
13719 IID
== Intrinsic::ppc_qpx_qvlpcls
) &&
13720 N
->getOperand(1)->getOpcode() == ISD::ADD
) {
13721 SDValue Add
= N
->getOperand(1);
13723 int Bits
= IID
== Intrinsic::ppc_qpx_qvlpcld
?
13724 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
13726 if (DAG
.MaskedValueIsZero(Add
->getOperand(1),
13727 APInt::getAllOnesValue(Bits
/* alignment */)
13728 .zext(Add
.getScalarValueSizeInBits()))) {
13729 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
13730 for (SDNode::use_iterator UI
= BasePtr
->use_begin(),
13731 UE
= BasePtr
->use_end();
13733 if (UI
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13734 cast
<ConstantSDNode
>(UI
->getOperand(0))->getZExtValue() == IID
) {
13735 // We've found another LVSL/LVSR, and this address is an aligned
13736 // multiple of that one. The results will be the same, so use the
13737 // one we've just found instead.
13739 return SDValue(*UI
, 0);
13744 if (isa
<ConstantSDNode
>(Add
->getOperand(1))) {
13745 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
13746 for (SDNode::use_iterator UI
= BasePtr
->use_begin(),
13747 UE
= BasePtr
->use_end(); UI
!= UE
; ++UI
) {
13748 if (UI
->getOpcode() == ISD::ADD
&&
13749 isa
<ConstantSDNode
>(UI
->getOperand(1)) &&
13750 (cast
<ConstantSDNode
>(Add
->getOperand(1))->getZExtValue() -
13751 cast
<ConstantSDNode
>(UI
->getOperand(1))->getZExtValue()) %
13752 (1ULL << Bits
) == 0) {
13753 SDNode
*OtherAdd
= *UI
;
13754 for (SDNode::use_iterator VI
= OtherAdd
->use_begin(),
13755 VE
= OtherAdd
->use_end(); VI
!= VE
; ++VI
) {
13756 if (VI
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13757 cast
<ConstantSDNode
>(VI
->getOperand(0))->getZExtValue() == IID
) {
13758 return SDValue(*VI
, 0);
13766 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13767 // Expose the vabsduw/h/b opportunity for down stream
13768 if (!DCI
.isAfterLegalizeDAG() && Subtarget
.hasP9Altivec() &&
13769 (IID
== Intrinsic::ppc_altivec_vmaxsw
||
13770 IID
== Intrinsic::ppc_altivec_vmaxsh
||
13771 IID
== Intrinsic::ppc_altivec_vmaxsb
)) {
13772 SDValue V1
= N
->getOperand(1);
13773 SDValue V2
= N
->getOperand(2);
13774 if ((V1
.getSimpleValueType() == MVT::v4i32
||
13775 V1
.getSimpleValueType() == MVT::v8i16
||
13776 V1
.getSimpleValueType() == MVT::v16i8
) &&
13777 V1
.getSimpleValueType() == V2
.getSimpleValueType()) {
13779 if (V1
.getOpcode() == ISD::SUB
&&
13780 ISD::isBuildVectorAllZeros(V1
.getOperand(0).getNode()) &&
13781 V1
.getOperand(1) == V2
) {
13782 return DAG
.getNode(ISD::ABS
, dl
, V2
.getValueType(), V2
);
13785 if (V2
.getOpcode() == ISD::SUB
&&
13786 ISD::isBuildVectorAllZeros(V2
.getOperand(0).getNode()) &&
13787 V2
.getOperand(1) == V1
) {
13788 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
13791 if (V1
.getOpcode() == ISD::SUB
&& V2
.getOpcode() == ISD::SUB
&&
13792 V1
.getOperand(0) == V2
.getOperand(1) &&
13793 V1
.getOperand(1) == V2
.getOperand(0)) {
13794 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
13801 case ISD::INTRINSIC_W_CHAIN
:
13802 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13803 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13804 if (Subtarget
.needsSwapsForVSXMemOps()) {
13805 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
13808 case Intrinsic::ppc_vsx_lxvw4x
:
13809 case Intrinsic::ppc_vsx_lxvd2x
:
13810 return expandVSXLoadForLE(N
, DCI
);
13814 case ISD::INTRINSIC_VOID
:
13815 // For little endian, VSX stores require generating xxswapd/stxvd2x.
13816 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13817 if (Subtarget
.needsSwapsForVSXMemOps()) {
13818 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
13821 case Intrinsic::ppc_vsx_stxvw4x
:
13822 case Intrinsic::ppc_vsx_stxvd2x
:
13823 return expandVSXStoreForLE(N
, DCI
);
13828 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13829 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
13830 N
->getOperand(0).hasOneUse() &&
13831 (N
->getValueType(0) == MVT::i32
|| N
->getValueType(0) == MVT::i16
||
13832 (Subtarget
.hasLDBRX() && Subtarget
.isPPC64() &&
13833 N
->getValueType(0) == MVT::i64
))) {
13834 SDValue Load
= N
->getOperand(0);
13835 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
13836 // Create the byte-swapping load.
13838 LD
->getChain(), // Chain
13839 LD
->getBasePtr(), // Ptr
13840 DAG
.getValueType(N
->getValueType(0)) // VT
13843 DAG
.getMemIntrinsicNode(PPCISD::LBRX
, dl
,
13844 DAG
.getVTList(N
->getValueType(0) == MVT::i64
?
13845 MVT::i64
: MVT::i32
, MVT::Other
),
13846 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
13848 // If this is an i16 load, insert the truncate.
13849 SDValue ResVal
= BSLoad
;
13850 if (N
->getValueType(0) == MVT::i16
)
13851 ResVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i16
, BSLoad
);
13853 // First, combine the bswap away. This makes the value produced by the
13855 DCI
.CombineTo(N
, ResVal
);
13857 // Next, combine the load away, we give it a bogus result value but a real
13858 // chain result. The result value is dead because the bswap is dead.
13859 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
13861 // Return N so it doesn't get rechecked!
13862 return SDValue(N
, 0);
13866 // If a VCMPo node already exists with exactly the same operands as this
13867 // node, use its result instead of this node (VCMPo computes both a CR6 and
13868 // a normal output).
13870 if (!N
->getOperand(0).hasOneUse() &&
13871 !N
->getOperand(1).hasOneUse() &&
13872 !N
->getOperand(2).hasOneUse()) {
13874 // Scan all of the users of the LHS, looking for VCMPo's that match.
13875 SDNode
*VCMPoNode
= nullptr;
13877 SDNode
*LHSN
= N
->getOperand(0).getNode();
13878 for (SDNode::use_iterator UI
= LHSN
->use_begin(), E
= LHSN
->use_end();
13880 if (UI
->getOpcode() == PPCISD::VCMPo
&&
13881 UI
->getOperand(1) == N
->getOperand(1) &&
13882 UI
->getOperand(2) == N
->getOperand(2) &&
13883 UI
->getOperand(0) == N
->getOperand(0)) {
13888 // If there is no VCMPo node, or if the flag value has a single use, don't
13890 if (!VCMPoNode
|| VCMPoNode
->hasNUsesOfValue(0, 1))
13893 // Look at the (necessarily single) use of the flag value. If it has a
13894 // chain, this transformation is more complex. Note that multiple things
13895 // could use the value result, which we should ignore.
13896 SDNode
*FlagUser
= nullptr;
13897 for (SDNode::use_iterator UI
= VCMPoNode
->use_begin();
13898 FlagUser
== nullptr; ++UI
) {
13899 assert(UI
!= VCMPoNode
->use_end() && "Didn't find user!");
13900 SDNode
*User
= *UI
;
13901 for (unsigned i
= 0, e
= User
->getNumOperands(); i
!= e
; ++i
) {
13902 if (User
->getOperand(i
) == SDValue(VCMPoNode
, 1)) {
13909 // If the user is a MFOCRF instruction, we know this is safe.
13910 // Otherwise we give up for right now.
13911 if (FlagUser
->getOpcode() == PPCISD::MFOCRF
)
13912 return SDValue(VCMPoNode
, 0);
13915 case ISD::BRCOND
: {
13916 SDValue Cond
= N
->getOperand(1);
13917 SDValue Target
= N
->getOperand(2);
13919 if (Cond
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13920 cast
<ConstantSDNode
>(Cond
.getOperand(1))->getZExtValue() ==
13921 Intrinsic::loop_decrement
) {
13923 // We now need to make the intrinsic dead (it cannot be instruction
13925 DAG
.ReplaceAllUsesOfValueWith(Cond
.getValue(1), Cond
.getOperand(0));
13926 assert(Cond
.getNode()->hasOneUse() &&
13927 "Counter decrement has more than one use");
13929 return DAG
.getNode(PPCISD::BDNZ
, dl
, MVT::Other
,
13930 N
->getOperand(0), Target
);
13935 // If this is a branch on an altivec predicate comparison, lower this so
13936 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
13937 // lowering is done pre-legalize, because the legalizer lowers the predicate
13938 // compare down to code that is difficult to reassemble.
13939 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
13940 SDValue LHS
= N
->getOperand(2), RHS
= N
->getOperand(3);
13942 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13943 // value. If so, pass-through the AND to get to the intrinsic.
13944 if (LHS
.getOpcode() == ISD::AND
&&
13945 LHS
.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13946 cast
<ConstantSDNode
>(LHS
.getOperand(0).getOperand(1))->getZExtValue() ==
13947 Intrinsic::loop_decrement
&&
13948 isa
<ConstantSDNode
>(LHS
.getOperand(1)) &&
13949 !isNullConstant(LHS
.getOperand(1)))
13950 LHS
= LHS
.getOperand(0);
13952 if (LHS
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13953 cast
<ConstantSDNode
>(LHS
.getOperand(1))->getZExtValue() ==
13954 Intrinsic::loop_decrement
&&
13955 isa
<ConstantSDNode
>(RHS
)) {
13956 assert((CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) &&
13957 "Counter decrement comparison is not EQ or NE");
13959 unsigned Val
= cast
<ConstantSDNode
>(RHS
)->getZExtValue();
13960 bool isBDNZ
= (CC
== ISD::SETEQ
&& Val
) ||
13961 (CC
== ISD::SETNE
&& !Val
);
13963 // We now need to make the intrinsic dead (it cannot be instruction
13965 DAG
.ReplaceAllUsesOfValueWith(LHS
.getValue(1), LHS
.getOperand(0));
13966 assert(LHS
.getNode()->hasOneUse() &&
13967 "Counter decrement has more than one use");
13969 return DAG
.getNode(isBDNZ
? PPCISD::BDNZ
: PPCISD::BDZ
, dl
, MVT::Other
,
13970 N
->getOperand(0), N
->getOperand(4));
13976 if (LHS
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13977 isa
<ConstantSDNode
>(RHS
) && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) &&
13978 getVectorCompareInfo(LHS
, CompareOpc
, isDot
, Subtarget
)) {
13979 assert(isDot
&& "Can't compare against a vector result!");
13981 // If this is a comparison against something other than 0/1, then we know
13982 // that the condition is never/always true.
13983 unsigned Val
= cast
<ConstantSDNode
>(RHS
)->getZExtValue();
13984 if (Val
!= 0 && Val
!= 1) {
13985 if (CC
== ISD::SETEQ
) // Cond never true, remove branch.
13986 return N
->getOperand(0);
13987 // Always !=, turn it into an unconditional branch.
13988 return DAG
.getNode(ISD::BR
, dl
, MVT::Other
,
13989 N
->getOperand(0), N
->getOperand(4));
13992 bool BranchOnWhenPredTrue
= (CC
== ISD::SETEQ
) ^ (Val
== 0);
13994 // Create the PPCISD altivec 'dot' comparison node.
13996 LHS
.getOperand(2), // LHS of compare
13997 LHS
.getOperand(3), // RHS of compare
13998 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
14000 EVT VTs
[] = { LHS
.getOperand(2).getValueType(), MVT::Glue
};
14001 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
);
14003 // Unpack the result based on how the target uses it.
14004 PPC::Predicate CompOpc
;
14005 switch (cast
<ConstantSDNode
>(LHS
.getOperand(1))->getZExtValue()) {
14006 default: // Can't happen, don't crash on invalid number though.
14007 case 0: // Branch on the value of the EQ bit of CR6.
14008 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_EQ
: PPC::PRED_NE
;
14010 case 1: // Branch on the inverted value of the EQ bit of CR6.
14011 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_NE
: PPC::PRED_EQ
;
14013 case 2: // Branch on the value of the LT bit of CR6.
14014 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_LT
: PPC::PRED_GE
;
14016 case 3: // Branch on the inverted value of the LT bit of CR6.
14017 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_GE
: PPC::PRED_LT
;
14021 return DAG
.getNode(PPCISD::COND_BRANCH
, dl
, MVT::Other
, N
->getOperand(0),
14022 DAG
.getConstant(CompOpc
, dl
, MVT::i32
),
14023 DAG
.getRegister(PPC::CR6
, MVT::i32
),
14024 N
->getOperand(4), CompNode
.getValue(1));
14028 case ISD::BUILD_VECTOR
:
14029 return DAGCombineBuildVector(N
, DCI
);
14031 return combineABS(N
, DCI
);
14033 return combineVSelect(N
, DCI
);
14040 PPCTargetLowering::BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
,
14042 SmallVectorImpl
<SDNode
*> &Created
) const {
14043 // fold (sdiv X, pow2)
14044 EVT VT
= N
->getValueType(0);
14045 if (VT
== MVT::i64
&& !Subtarget
.isPPC64())
14047 if ((VT
!= MVT::i32
&& VT
!= MVT::i64
) ||
14048 !(Divisor
.isPowerOf2() || (-Divisor
).isPowerOf2()))
14052 SDValue N0
= N
->getOperand(0);
14054 bool IsNegPow2
= (-Divisor
).isPowerOf2();
14055 unsigned Lg2
= (IsNegPow2
? -Divisor
: Divisor
).countTrailingZeros();
14056 SDValue ShiftAmt
= DAG
.getConstant(Lg2
, DL
, VT
);
14058 SDValue Op
= DAG
.getNode(PPCISD::SRA_ADDZE
, DL
, VT
, N0
, ShiftAmt
);
14059 Created
.push_back(Op
.getNode());
14062 Op
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Op
);
14063 Created
.push_back(Op
.getNode());
14069 //===----------------------------------------------------------------------===//
14070 // Inline Assembly Support
14071 //===----------------------------------------------------------------------===//
14073 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
14075 const APInt
&DemandedElts
,
14076 const SelectionDAG
&DAG
,
14077 unsigned Depth
) const {
14079 switch (Op
.getOpcode()) {
14081 case PPCISD::LBRX
: {
14082 // lhbrx is known to have the top bits cleared out.
14083 if (cast
<VTSDNode
>(Op
.getOperand(2))->getVT() == MVT::i16
)
14084 Known
.Zero
= 0xFFFF0000;
14087 case ISD::INTRINSIC_WO_CHAIN
: {
14088 switch (cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue()) {
14090 case Intrinsic::ppc_altivec_vcmpbfp_p
:
14091 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
14092 case Intrinsic::ppc_altivec_vcmpequb_p
:
14093 case Intrinsic::ppc_altivec_vcmpequh_p
:
14094 case Intrinsic::ppc_altivec_vcmpequw_p
:
14095 case Intrinsic::ppc_altivec_vcmpequd_p
:
14096 case Intrinsic::ppc_altivec_vcmpgefp_p
:
14097 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
14098 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
14099 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
14100 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
14101 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
14102 case Intrinsic::ppc_altivec_vcmpgtub_p
:
14103 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
14104 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
14105 case Intrinsic::ppc_altivec_vcmpgtud_p
:
14106 Known
.Zero
= ~1U; // All bits but the low one are known to be zero.
14113 Align
PPCTargetLowering::getPrefLoopAlignment(MachineLoop
*ML
) const {
14114 switch (Subtarget
.getDarwinDirective()) {
14117 case PPC::DIR_PWR4
:
14118 case PPC::DIR_PWR5
:
14119 case PPC::DIR_PWR5X
:
14120 case PPC::DIR_PWR6
:
14121 case PPC::DIR_PWR6X
:
14122 case PPC::DIR_PWR7
:
14123 case PPC::DIR_PWR8
:
14124 case PPC::DIR_PWR9
: {
14128 if (!DisableInnermostLoopAlign32
) {
14129 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14130 // so that we can decrease cache misses and branch-prediction misses.
14131 // Actual alignment of the loop will depend on the hotness check and other
14132 // logic in alignBlocks.
14133 if (ML
->getLoopDepth() > 1 && ML
->getSubLoops().empty())
14137 const PPCInstrInfo
*TII
= Subtarget
.getInstrInfo();
14139 // For small loops (between 5 and 8 instructions), align to a 32-byte
14140 // boundary so that the entire loop fits in one instruction-cache line.
14141 uint64_t LoopSize
= 0;
14142 for (auto I
= ML
->block_begin(), IE
= ML
->block_end(); I
!= IE
; ++I
)
14143 for (auto J
= (*I
)->begin(), JE
= (*I
)->end(); J
!= JE
; ++J
) {
14144 LoopSize
+= TII
->getInstSizeInBytes(*J
);
14149 if (LoopSize
> 16 && LoopSize
<= 32)
14156 return TargetLowering::getPrefLoopAlignment(ML
);
14159 /// getConstraintType - Given a constraint, return the type of
14160 /// constraint it is for this target.
14161 PPCTargetLowering::ConstraintType
14162 PPCTargetLowering::getConstraintType(StringRef Constraint
) const {
14163 if (Constraint
.size() == 1) {
14164 switch (Constraint
[0]) {
14172 return C_RegisterClass
;
14174 // FIXME: While Z does indicate a memory constraint, it specifically
14175 // indicates an r+r address (used in conjunction with the 'y' modifier
14176 // in the replacement string). Currently, we're forcing the base
14177 // register to be r0 in the asm printer (which is interpreted as zero)
14178 // and forming the complete address in the second register. This is
14182 } else if (Constraint
== "wc") { // individual CR bits.
14183 return C_RegisterClass
;
14184 } else if (Constraint
== "wa" || Constraint
== "wd" ||
14185 Constraint
== "wf" || Constraint
== "ws" ||
14186 Constraint
== "wi" || Constraint
== "ww") {
14187 return C_RegisterClass
; // VSX registers.
14189 return TargetLowering::getConstraintType(Constraint
);
14192 /// Examine constraint type and operand type and determine a weight value.
14193 /// This object must already have been set up with the operand type
14194 /// and the current alternative constraint selected.
14195 TargetLowering::ConstraintWeight
14196 PPCTargetLowering::getSingleConstraintMatchWeight(
14197 AsmOperandInfo
&info
, const char *constraint
) const {
14198 ConstraintWeight weight
= CW_Invalid
;
14199 Value
*CallOperandVal
= info
.CallOperandVal
;
14200 // If we don't have a value, we can't do a match,
14201 // but allow it at the lowest weight.
14202 if (!CallOperandVal
)
14204 Type
*type
= CallOperandVal
->getType();
14206 // Look at the constraint type.
14207 if (StringRef(constraint
) == "wc" && type
->isIntegerTy(1))
14208 return CW_Register
; // an individual CR bit.
14209 else if ((StringRef(constraint
) == "wa" ||
14210 StringRef(constraint
) == "wd" ||
14211 StringRef(constraint
) == "wf") &&
14212 type
->isVectorTy())
14213 return CW_Register
;
14214 else if (StringRef(constraint
) == "wi" && type
->isIntegerTy(64))
14215 return CW_Register
; // just hold 64-bit integers data.
14216 else if (StringRef(constraint
) == "ws" && type
->isDoubleTy())
14217 return CW_Register
;
14218 else if (StringRef(constraint
) == "ww" && type
->isFloatTy())
14219 return CW_Register
;
14221 switch (*constraint
) {
14223 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
14226 if (type
->isIntegerTy())
14227 weight
= CW_Register
;
14230 if (type
->isFloatTy())
14231 weight
= CW_Register
;
14234 if (type
->isDoubleTy())
14235 weight
= CW_Register
;
14238 if (type
->isVectorTy())
14239 weight
= CW_Register
;
14242 weight
= CW_Register
;
14245 weight
= CW_Memory
;
14251 std::pair
<unsigned, const TargetRegisterClass
*>
14252 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
14253 StringRef Constraint
,
14255 if (Constraint
.size() == 1) {
14256 // GCC RS6000 Constraint Letters
14257 switch (Constraint
[0]) {
14258 case 'b': // R1-R31
14259 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
14260 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass
);
14261 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass
);
14262 case 'r': // R0-R31
14263 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
14264 return std::make_pair(0U, &PPC::G8RCRegClass
);
14265 return std::make_pair(0U, &PPC::GPRCRegClass
);
14266 // 'd' and 'f' constraints are both defined to be "the floating point
14267 // registers", where one is for 32-bit and the other for 64-bit. We don't
14268 // really care overly much here so just give them all the same reg classes.
14271 if (Subtarget
.hasSPE()) {
14272 if (VT
== MVT::f32
|| VT
== MVT::i32
)
14273 return std::make_pair(0U, &PPC::GPRCRegClass
);
14274 if (VT
== MVT::f64
|| VT
== MVT::i64
)
14275 return std::make_pair(0U, &PPC::SPERCRegClass
);
14277 if (VT
== MVT::f32
|| VT
== MVT::i32
)
14278 return std::make_pair(0U, &PPC::F4RCRegClass
);
14279 if (VT
== MVT::f64
|| VT
== MVT::i64
)
14280 return std::make_pair(0U, &PPC::F8RCRegClass
);
14281 if (VT
== MVT::v4f64
&& Subtarget
.hasQPX())
14282 return std::make_pair(0U, &PPC::QFRCRegClass
);
14283 if (VT
== MVT::v4f32
&& Subtarget
.hasQPX())
14284 return std::make_pair(0U, &PPC::QSRCRegClass
);
14288 if (VT
== MVT::v4f64
&& Subtarget
.hasQPX())
14289 return std::make_pair(0U, &PPC::QFRCRegClass
);
14290 if (VT
== MVT::v4f32
&& Subtarget
.hasQPX())
14291 return std::make_pair(0U, &PPC::QSRCRegClass
);
14292 if (Subtarget
.hasAltivec())
14293 return std::make_pair(0U, &PPC::VRRCRegClass
);
14296 return std::make_pair(0U, &PPC::CRRCRegClass
);
14298 } else if (Constraint
== "wc" && Subtarget
.useCRBits()) {
14299 // An individual CR bit.
14300 return std::make_pair(0U, &PPC::CRBITRCRegClass
);
14301 } else if ((Constraint
== "wa" || Constraint
== "wd" ||
14302 Constraint
== "wf" || Constraint
== "wi") &&
14303 Subtarget
.hasVSX()) {
14304 return std::make_pair(0U, &PPC::VSRCRegClass
);
14305 } else if ((Constraint
== "ws" || Constraint
== "ww") && Subtarget
.hasVSX()) {
14306 if (VT
== MVT::f32
&& Subtarget
.hasP8Vector())
14307 return std::make_pair(0U, &PPC::VSSRCRegClass
);
14309 return std::make_pair(0U, &PPC::VSFRCRegClass
);
14312 std::pair
<unsigned, const TargetRegisterClass
*> R
=
14313 TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
14315 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
14316 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
14317 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
14319 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
14320 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
14321 if (R
.first
&& VT
== MVT::i64
&& Subtarget
.isPPC64() &&
14322 PPC::GPRCRegClass
.contains(R
.first
))
14323 return std::make_pair(TRI
->getMatchingSuperReg(R
.first
,
14324 PPC::sub_32
, &PPC::G8RCRegClass
),
14325 &PPC::G8RCRegClass
);
14327 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
14328 if (!R
.second
&& StringRef("{cc}").equals_lower(Constraint
)) {
14329 R
.first
= PPC::CR0
;
14330 R
.second
= &PPC::CRRCRegClass
;
14336 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
14337 /// vector. If it is invalid, don't add anything to Ops.
14338 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
14339 std::string
&Constraint
,
14340 std::vector
<SDValue
>&Ops
,
14341 SelectionDAG
&DAG
) const {
14344 // Only support length 1 constraints.
14345 if (Constraint
.length() > 1) return;
14347 char Letter
= Constraint
[0];
14358 ConstantSDNode
*CST
= dyn_cast
<ConstantSDNode
>(Op
);
14359 if (!CST
) return; // Must be an immediate to match.
14361 int64_t Value
= CST
->getSExtValue();
14362 EVT TCVT
= MVT::i64
; // All constants taken to be 64 bits so that negative
14363 // numbers are printed as such.
14365 default: llvm_unreachable("Unknown constraint letter!");
14366 case 'I': // "I" is a signed 16-bit constant.
14367 if (isInt
<16>(Value
))
14368 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14370 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
14371 if (isShiftedUInt
<16, 16>(Value
))
14372 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14374 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
14375 if (isShiftedInt
<16, 16>(Value
))
14376 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14378 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
14379 if (isUInt
<16>(Value
))
14380 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14382 case 'M': // "M" is a constant that is greater than 31.
14384 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14386 case 'N': // "N" is a positive constant that is an exact power of two.
14387 if (Value
> 0 && isPowerOf2_64(Value
))
14388 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14390 case 'O': // "O" is the constant zero.
14392 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14394 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
14395 if (isInt
<16>(-Value
))
14396 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14403 if (Result
.getNode()) {
14404 Ops
.push_back(Result
);
14408 // Handle standard constraint letters.
14409 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
14412 // isLegalAddressingMode - Return true if the addressing mode represented
14413 // by AM is legal for this target, for a load/store of the specified type.
14414 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
14415 const AddrMode
&AM
, Type
*Ty
,
14416 unsigned AS
, Instruction
*I
) const {
14417 // PPC does not allow r+i addressing modes for vectors!
14418 if (Ty
->isVectorTy() && AM
.BaseOffs
!= 0)
14421 // PPC allows a sign-extended 16-bit immediate field.
14422 if (AM
.BaseOffs
<= -(1LL << 16) || AM
.BaseOffs
>= (1LL << 16)-1)
14425 // No global is ever allowed as a base.
14429 // PPC only support r+r,
14430 switch (AM
.Scale
) {
14431 case 0: // "r+i" or just "i", depending on HasBaseReg.
14434 if (AM
.HasBaseReg
&& AM
.BaseOffs
) // "r+r+i" is not allowed.
14436 // Otherwise we have r+r or r+i.
14439 if (AM
.HasBaseReg
|| AM
.BaseOffs
) // 2*r+r or 2*r+i is not allowed.
14441 // Allow 2*r as r+r.
14444 // No other scales are supported.
14451 SDValue
PPCTargetLowering::LowerRETURNADDR(SDValue Op
,
14452 SelectionDAG
&DAG
) const {
14453 MachineFunction
&MF
= DAG
.getMachineFunction();
14454 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
14455 MFI
.setReturnAddressIsTaken(true);
14457 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
14461 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
14463 // Make sure the function does not optimize away the store of the RA to
14465 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
14466 FuncInfo
->setLRStoreRequired();
14467 bool isPPC64
= Subtarget
.isPPC64();
14468 auto PtrVT
= getPointerTy(MF
.getDataLayout());
14471 SDValue FrameAddr
= LowerFRAMEADDR(Op
, DAG
);
14473 DAG
.getConstant(Subtarget
.getFrameLowering()->getReturnSaveOffset(), dl
,
14474 isPPC64
? MVT::i64
: MVT::i32
);
14475 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(),
14476 DAG
.getNode(ISD::ADD
, dl
, PtrVT
, FrameAddr
, Offset
),
14477 MachinePointerInfo());
14480 // Just load the return address off the stack.
14481 SDValue RetAddrFI
= getReturnAddrFrameIndex(DAG
);
14482 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), RetAddrFI
,
14483 MachinePointerInfo());
14486 SDValue
PPCTargetLowering::LowerFRAMEADDR(SDValue Op
,
14487 SelectionDAG
&DAG
) const {
14489 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
14491 MachineFunction
&MF
= DAG
.getMachineFunction();
14492 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
14493 MFI
.setFrameAddressIsTaken(true);
14495 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
14496 bool isPPC64
= PtrVT
== MVT::i64
;
14498 // Naked functions never have a frame pointer, and so we use r1. For all
14499 // other functions, this decision must be delayed until during PEI.
14501 if (MF
.getFunction().hasFnAttribute(Attribute::Naked
))
14502 FrameReg
= isPPC64
? PPC::X1
: PPC::R1
;
14504 FrameReg
= isPPC64
? PPC::FP8
: PPC::FP
;
14506 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, FrameReg
,
14509 FrameAddr
= DAG
.getLoad(Op
.getValueType(), dl
, DAG
.getEntryNode(),
14510 FrameAddr
, MachinePointerInfo());
14514 // FIXME? Maybe this could be a TableGen attribute on some registers and
14515 // this table could be generated automatically from RegInfo.
14516 Register
PPCTargetLowering::getRegisterByName(const char* RegName
, EVT VT
,
14517 const MachineFunction
&MF
) const {
14518 bool isPPC64
= Subtarget
.isPPC64();
14519 bool IsDarwinABI
= Subtarget
.isDarwinABI();
14521 if ((isPPC64
&& VT
!= MVT::i64
&& VT
!= MVT::i32
) ||
14522 (!isPPC64
&& VT
!= MVT::i32
))
14523 report_fatal_error("Invalid register global variable type");
14525 bool is64Bit
= isPPC64
&& VT
== MVT::i64
;
14526 Register Reg
= StringSwitch
<Register
>(RegName
)
14527 .Case("r1", is64Bit
? PPC::X1
: PPC::R1
)
14528 .Case("r2", (IsDarwinABI
|| isPPC64
) ? Register() : PPC::R2
)
14529 .Case("r13", (!isPPC64
&& IsDarwinABI
) ? Register() :
14530 (is64Bit
? PPC::X13
: PPC::R13
))
14531 .Default(Register());
14535 report_fatal_error("Invalid register name global variable");
14538 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA
) const {
14539 // 32-bit SVR4 ABI access everything as got-indirect.
14540 if (Subtarget
.is32BitELFABI())
14543 // AIX accesses everything indirectly through the TOC, which is similar to
14545 if (Subtarget
.isAIXABI())
14548 CodeModel::Model CModel
= getTargetMachine().getCodeModel();
14549 // If it is small or large code model, module locals are accessed
14550 // indirectly by loading their address from .toc/.got.
14551 if (CModel
== CodeModel::Small
|| CModel
== CodeModel::Large
)
14554 // JumpTable and BlockAddress are accessed as got-indirect.
14555 if (isa
<JumpTableSDNode
>(GA
) || isa
<BlockAddressSDNode
>(GA
))
14558 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(GA
))
14559 return Subtarget
.isGVIndirectSymbol(G
->getGlobal());
14565 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
14566 // The PowerPC target isn't yet aware of offsets.
14570 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
14572 MachineFunction
&MF
,
14573 unsigned Intrinsic
) const {
14574 switch (Intrinsic
) {
14575 case Intrinsic::ppc_qpx_qvlfd
:
14576 case Intrinsic::ppc_qpx_qvlfs
:
14577 case Intrinsic::ppc_qpx_qvlfcd
:
14578 case Intrinsic::ppc_qpx_qvlfcs
:
14579 case Intrinsic::ppc_qpx_qvlfiwa
:
14580 case Intrinsic::ppc_qpx_qvlfiwz
:
14581 case Intrinsic::ppc_altivec_lvx
:
14582 case Intrinsic::ppc_altivec_lvxl
:
14583 case Intrinsic::ppc_altivec_lvebx
:
14584 case Intrinsic::ppc_altivec_lvehx
:
14585 case Intrinsic::ppc_altivec_lvewx
:
14586 case Intrinsic::ppc_vsx_lxvd2x
:
14587 case Intrinsic::ppc_vsx_lxvw4x
: {
14589 switch (Intrinsic
) {
14590 case Intrinsic::ppc_altivec_lvebx
:
14593 case Intrinsic::ppc_altivec_lvehx
:
14596 case Intrinsic::ppc_altivec_lvewx
:
14599 case Intrinsic::ppc_vsx_lxvd2x
:
14602 case Intrinsic::ppc_qpx_qvlfd
:
14605 case Intrinsic::ppc_qpx_qvlfs
:
14608 case Intrinsic::ppc_qpx_qvlfcd
:
14611 case Intrinsic::ppc_qpx_qvlfcs
:
14619 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
14621 Info
.ptrVal
= I
.getArgOperand(0);
14622 Info
.offset
= -VT
.getStoreSize()+1;
14623 Info
.size
= 2*VT
.getStoreSize()-1;
14624 Info
.align
= Align::None();
14625 Info
.flags
= MachineMemOperand::MOLoad
;
14628 case Intrinsic::ppc_qpx_qvlfda
:
14629 case Intrinsic::ppc_qpx_qvlfsa
:
14630 case Intrinsic::ppc_qpx_qvlfcda
:
14631 case Intrinsic::ppc_qpx_qvlfcsa
:
14632 case Intrinsic::ppc_qpx_qvlfiwaa
:
14633 case Intrinsic::ppc_qpx_qvlfiwza
: {
14635 switch (Intrinsic
) {
14636 case Intrinsic::ppc_qpx_qvlfda
:
14639 case Intrinsic::ppc_qpx_qvlfsa
:
14642 case Intrinsic::ppc_qpx_qvlfcda
:
14645 case Intrinsic::ppc_qpx_qvlfcsa
:
14653 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
14655 Info
.ptrVal
= I
.getArgOperand(0);
14657 Info
.size
= VT
.getStoreSize();
14658 Info
.align
= Align::None();
14659 Info
.flags
= MachineMemOperand::MOLoad
;
14662 case Intrinsic::ppc_qpx_qvstfd
:
14663 case Intrinsic::ppc_qpx_qvstfs
:
14664 case Intrinsic::ppc_qpx_qvstfcd
:
14665 case Intrinsic::ppc_qpx_qvstfcs
:
14666 case Intrinsic::ppc_qpx_qvstfiw
:
14667 case Intrinsic::ppc_altivec_stvx
:
14668 case Intrinsic::ppc_altivec_stvxl
:
14669 case Intrinsic::ppc_altivec_stvebx
:
14670 case Intrinsic::ppc_altivec_stvehx
:
14671 case Intrinsic::ppc_altivec_stvewx
:
14672 case Intrinsic::ppc_vsx_stxvd2x
:
14673 case Intrinsic::ppc_vsx_stxvw4x
: {
14675 switch (Intrinsic
) {
14676 case Intrinsic::ppc_altivec_stvebx
:
14679 case Intrinsic::ppc_altivec_stvehx
:
14682 case Intrinsic::ppc_altivec_stvewx
:
14685 case Intrinsic::ppc_vsx_stxvd2x
:
14688 case Intrinsic::ppc_qpx_qvstfd
:
14691 case Intrinsic::ppc_qpx_qvstfs
:
14694 case Intrinsic::ppc_qpx_qvstfcd
:
14697 case Intrinsic::ppc_qpx_qvstfcs
:
14705 Info
.opc
= ISD::INTRINSIC_VOID
;
14707 Info
.ptrVal
= I
.getArgOperand(1);
14708 Info
.offset
= -VT
.getStoreSize()+1;
14709 Info
.size
= 2*VT
.getStoreSize()-1;
14710 Info
.align
= Align::None();
14711 Info
.flags
= MachineMemOperand::MOStore
;
14714 case Intrinsic::ppc_qpx_qvstfda
:
14715 case Intrinsic::ppc_qpx_qvstfsa
:
14716 case Intrinsic::ppc_qpx_qvstfcda
:
14717 case Intrinsic::ppc_qpx_qvstfcsa
:
14718 case Intrinsic::ppc_qpx_qvstfiwa
: {
14720 switch (Intrinsic
) {
14721 case Intrinsic::ppc_qpx_qvstfda
:
14724 case Intrinsic::ppc_qpx_qvstfsa
:
14727 case Intrinsic::ppc_qpx_qvstfcda
:
14730 case Intrinsic::ppc_qpx_qvstfcsa
:
14738 Info
.opc
= ISD::INTRINSIC_VOID
;
14740 Info
.ptrVal
= I
.getArgOperand(1);
14742 Info
.size
= VT
.getStoreSize();
14743 Info
.align
= Align::None();
14744 Info
.flags
= MachineMemOperand::MOStore
;
14754 /// getOptimalMemOpType - Returns the target specific optimal type for load
14755 /// and store operations as a result of memset, memcpy, and memmove
14756 /// lowering. If DstAlign is zero that means it's safe to destination
14757 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14758 /// means there isn't a need to check it against alignment requirement,
14759 /// probably because the source does not need to be loaded. If 'IsMemset' is
14760 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14761 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14762 /// source is constant so it does not need to be loaded.
14763 /// It returns EVT::Other if the type should be determined using generic
14764 /// target-independent logic.
14765 EVT
PPCTargetLowering::getOptimalMemOpType(
14766 uint64_t Size
, unsigned DstAlign
, unsigned SrcAlign
, bool IsMemset
,
14767 bool ZeroMemset
, bool MemcpyStrSrc
,
14768 const AttributeList
&FuncAttributes
) const {
14769 if (getTargetMachine().getOptLevel() != CodeGenOpt::None
) {
14770 // When expanding a memset, require at least two QPX instructions to cover
14771 // the cost of loading the value to be stored from the constant pool.
14772 if (Subtarget
.hasQPX() && Size
>= 32 && (!IsMemset
|| Size
>= 64) &&
14773 (!SrcAlign
|| SrcAlign
>= 32) && (!DstAlign
|| DstAlign
>= 32) &&
14774 !FuncAttributes
.hasFnAttribute(Attribute::NoImplicitFloat
)) {
14778 // We should use Altivec/VSX loads and stores when available. For unaligned
14779 // addresses, unaligned VSX loads are only fast starting with the P8.
14780 if (Subtarget
.hasAltivec() && Size
>= 16 &&
14781 (((!SrcAlign
|| SrcAlign
>= 16) && (!DstAlign
|| DstAlign
>= 16)) ||
14782 ((IsMemset
&& Subtarget
.hasVSX()) || Subtarget
.hasP8Vector())))
14786 if (Subtarget
.isPPC64()) {
14793 /// Returns true if it is beneficial to convert a load of a constant
14794 /// to just the constant itself.
14795 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt
&Imm
,
14797 assert(Ty
->isIntegerTy());
14799 unsigned BitSize
= Ty
->getPrimitiveSizeInBits();
14800 return !(BitSize
== 0 || BitSize
> 64);
14803 bool PPCTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
14804 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
14806 unsigned NumBits1
= Ty1
->getPrimitiveSizeInBits();
14807 unsigned NumBits2
= Ty2
->getPrimitiveSizeInBits();
14808 return NumBits1
== 64 && NumBits2
== 32;
14811 bool PPCTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
14812 if (!VT1
.isInteger() || !VT2
.isInteger())
14814 unsigned NumBits1
= VT1
.getSizeInBits();
14815 unsigned NumBits2
= VT2
.getSizeInBits();
14816 return NumBits1
== 64 && NumBits2
== 32;
14819 bool PPCTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
14820 // Generally speaking, zexts are not free, but they are free when they can be
14821 // folded with other operations.
14822 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Val
)) {
14823 EVT MemVT
= LD
->getMemoryVT();
14824 if ((MemVT
== MVT::i1
|| MemVT
== MVT::i8
|| MemVT
== MVT::i16
||
14825 (Subtarget
.isPPC64() && MemVT
== MVT::i32
)) &&
14826 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
14827 LD
->getExtensionType() == ISD::ZEXTLOAD
))
14831 // FIXME: Add other cases...
14832 // - 32-bit shifts with a zext to i64
14833 // - zext after ctlz, bswap, etc.
14834 // - zext after and by a constant mask
14836 return TargetLowering::isZExtFree(Val
, VT2
);
14839 bool PPCTargetLowering::isFPExtFree(EVT DestVT
, EVT SrcVT
) const {
14840 assert(DestVT
.isFloatingPoint() && SrcVT
.isFloatingPoint() &&
14841 "invalid fpext types");
14842 // Extending to float128 is not free.
14843 if (DestVT
== MVT::f128
)
14848 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
14849 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
14852 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
14853 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
14856 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT
,
14859 MachineMemOperand::Flags
,
14860 bool *Fast
) const {
14861 if (DisablePPCUnaligned
)
14864 // PowerPC supports unaligned memory access for simple non-vector types.
14865 // Although accessing unaligned addresses is not as efficient as accessing
14866 // aligned addresses, it is generally more efficient than manual expansion,
14867 // and generally only traps for software emulation when crossing page
14870 if (!VT
.isSimple())
14873 if (VT
.getSimpleVT().isVector()) {
14874 if (Subtarget
.hasVSX()) {
14875 if (VT
!= MVT::v2f64
&& VT
!= MVT::v2i64
&&
14876 VT
!= MVT::v4f32
&& VT
!= MVT::v4i32
)
14883 if (VT
== MVT::ppcf128
)
14892 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT
) const {
14893 VT
= VT
.getScalarType();
14895 if (!VT
.isSimple())
14898 switch (VT
.getSimpleVT().SimpleTy
) {
14903 return (EnableQuadPrecision
&& Subtarget
.hasP9Vector());
14912 PPCTargetLowering::getScratchRegisters(CallingConv::ID
) const {
14913 // LR is a callee-save register, but we must treat it as clobbered by any call
14914 // site. Hence we include LR in the scratch registers, which are in turn added
14915 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14916 // to CTR, which is used by any indirect call.
14917 static const MCPhysReg ScratchRegs
[] = {
14918 PPC::X12
, PPC::LR8
, PPC::CTR8
, 0
14921 return ScratchRegs
;
14924 unsigned PPCTargetLowering::getExceptionPointerRegister(
14925 const Constant
*PersonalityFn
) const {
14926 return Subtarget
.isPPC64() ? PPC::X3
: PPC::R3
;
14929 unsigned PPCTargetLowering::getExceptionSelectorRegister(
14930 const Constant
*PersonalityFn
) const {
14931 return Subtarget
.isPPC64() ? PPC::X4
: PPC::R4
;
14935 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
14936 EVT VT
, unsigned DefinedValues
) const {
14937 if (VT
== MVT::v2i64
)
14938 return Subtarget
.hasDirectMove(); // Don't need stack ops with direct moves
14940 if (Subtarget
.hasVSX() || Subtarget
.hasQPX())
14943 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT
, DefinedValues
);
14946 Sched::Preference
PPCTargetLowering::getSchedulingPreference(SDNode
*N
) const {
14947 if (DisableILPPref
|| Subtarget
.enableMachineScheduler())
14948 return TargetLowering::getSchedulingPreference(N
);
14953 // Create a fast isel object.
14955 PPCTargetLowering::createFastISel(FunctionLoweringInfo
&FuncInfo
,
14956 const TargetLibraryInfo
*LibInfo
) const {
14957 return PPC::createFastISel(FuncInfo
, LibInfo
);
14960 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock
*Entry
) const {
14961 if (Subtarget
.isDarwinABI()) return;
14962 if (!Subtarget
.isPPC64()) return;
14964 // Update IsSplitCSR in PPCFunctionInfo
14965 PPCFunctionInfo
*PFI
= Entry
->getParent()->getInfo
<PPCFunctionInfo
>();
14966 PFI
->setIsSplitCSR(true);
14969 void PPCTargetLowering::insertCopiesSplitCSR(
14970 MachineBasicBlock
*Entry
,
14971 const SmallVectorImpl
<MachineBasicBlock
*> &Exits
) const {
14972 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
14973 const MCPhysReg
*IStart
= TRI
->getCalleeSavedRegsViaCopy(Entry
->getParent());
14977 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
14978 MachineRegisterInfo
*MRI
= &Entry
->getParent()->getRegInfo();
14979 MachineBasicBlock::iterator MBBI
= Entry
->begin();
14980 for (const MCPhysReg
*I
= IStart
; *I
; ++I
) {
14981 const TargetRegisterClass
*RC
= nullptr;
14982 if (PPC::G8RCRegClass
.contains(*I
))
14983 RC
= &PPC::G8RCRegClass
;
14984 else if (PPC::F8RCRegClass
.contains(*I
))
14985 RC
= &PPC::F8RCRegClass
;
14986 else if (PPC::CRRCRegClass
.contains(*I
))
14987 RC
= &PPC::CRRCRegClass
;
14988 else if (PPC::VRRCRegClass
.contains(*I
))
14989 RC
= &PPC::VRRCRegClass
;
14991 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
14993 Register NewVR
= MRI
->createVirtualRegister(RC
);
14994 // Create copy from CSR to a virtual register.
14995 // FIXME: this currently does not emit CFI pseudo-instructions, it works
14996 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14997 // nounwind. If we want to generalize this later, we may need to emit
14998 // CFI pseudo-instructions.
14999 assert(Entry
->getParent()->getFunction().hasFnAttribute(
15000 Attribute::NoUnwind
) &&
15001 "Function should be nounwind in insertCopiesSplitCSR!");
15002 Entry
->addLiveIn(*I
);
15003 BuildMI(*Entry
, MBBI
, DebugLoc(), TII
->get(TargetOpcode::COPY
), NewVR
)
15006 // Insert the copy-back instructions right before the terminator.
15007 for (auto *Exit
: Exits
)
15008 BuildMI(*Exit
, Exit
->getFirstTerminator(), DebugLoc(),
15009 TII
->get(TargetOpcode::COPY
), *I
)
15014 // Override to enable LOAD_STACK_GUARD lowering on Linux.
15015 bool PPCTargetLowering::useLoadStackGuardNode() const {
15016 if (!Subtarget
.isTargetLinux())
15017 return TargetLowering::useLoadStackGuardNode();
15021 // Override to disable global variable loading on Linux.
15022 void PPCTargetLowering::insertSSPDeclarations(Module
&M
) const {
15023 if (!Subtarget
.isTargetLinux())
15024 return TargetLowering::insertSSPDeclarations(M
);
15027 bool PPCTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
15028 bool ForCodeSize
) const {
15029 if (!VT
.isSimple() || !Subtarget
.hasVSX())
15032 switch(VT
.getSimpleVT().SimpleTy
) {
15034 // For FP types that are currently not supported by PPC backend, return
15035 // false. Examples: f16, f80.
15040 return Imm
.isPosZero();
15044 // For vector shift operation op, fold
15045 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
15046 static SDValue
stripModuloOnShift(const TargetLowering
&TLI
, SDNode
*N
,
15047 SelectionDAG
&DAG
) {
15048 SDValue N0
= N
->getOperand(0);
15049 SDValue N1
= N
->getOperand(1);
15050 EVT VT
= N0
.getValueType();
15051 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
15052 unsigned Opcode
= N
->getOpcode();
15053 unsigned TargetOpcode
;
15057 llvm_unreachable("Unexpected shift operation");
15059 TargetOpcode
= PPCISD::SHL
;
15062 TargetOpcode
= PPCISD::SRL
;
15065 TargetOpcode
= PPCISD::SRA
;
15069 if (VT
.isVector() && TLI
.isOperationLegal(Opcode
, VT
) &&
15070 N1
->getOpcode() == ISD::AND
)
15071 if (ConstantSDNode
*Mask
= isConstOrConstSplat(N1
->getOperand(1)))
15072 if (Mask
->getZExtValue() == OpSizeInBits
- 1)
15073 return DAG
.getNode(TargetOpcode
, SDLoc(N
), VT
, N0
, N1
->getOperand(0));
15078 SDValue
PPCTargetLowering::combineSHL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15079 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
15082 SDValue N0
= N
->getOperand(0);
15083 ConstantSDNode
*CN1
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
15084 if (!Subtarget
.isISA3_0() ||
15085 N0
.getOpcode() != ISD::SIGN_EXTEND
||
15086 N0
.getOperand(0).getValueType() != MVT::i32
||
15087 CN1
== nullptr || N
->getValueType(0) != MVT::i64
)
15090 // We can't save an operation here if the value is already extended, and
15091 // the existing shift is easier to combine.
15092 SDValue ExtsSrc
= N0
.getOperand(0);
15093 if (ExtsSrc
.getOpcode() == ISD::TRUNCATE
&&
15094 ExtsSrc
.getOperand(0).getOpcode() == ISD::AssertSext
)
15098 SDValue ShiftBy
= SDValue(CN1
, 0);
15099 // We want the shift amount to be i32 on the extswli, but the shift could
15101 if (ShiftBy
.getValueType() == MVT::i64
)
15102 ShiftBy
= DCI
.DAG
.getConstant(CN1
->getZExtValue(), DL
, MVT::i32
);
15104 return DCI
.DAG
.getNode(PPCISD::EXTSWSLI
, DL
, MVT::i64
, N0
->getOperand(0),
15108 SDValue
PPCTargetLowering::combineSRA(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15109 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
15115 SDValue
PPCTargetLowering::combineSRL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15116 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
15122 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
15123 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
15124 // When C is zero, the equation (addi Z, -C) can be simplified to Z
15125 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
15126 static SDValue
combineADDToADDZE(SDNode
*N
, SelectionDAG
&DAG
,
15127 const PPCSubtarget
&Subtarget
) {
15128 if (!Subtarget
.isPPC64())
15131 SDValue LHS
= N
->getOperand(0);
15132 SDValue RHS
= N
->getOperand(1);
15134 auto isZextOfCompareWithConstant
= [](SDValue Op
) {
15135 if (Op
.getOpcode() != ISD::ZERO_EXTEND
|| !Op
.hasOneUse() ||
15136 Op
.getValueType() != MVT::i64
)
15139 SDValue Cmp
= Op
.getOperand(0);
15140 if (Cmp
.getOpcode() != ISD::SETCC
|| !Cmp
.hasOneUse() ||
15141 Cmp
.getOperand(0).getValueType() != MVT::i64
)
15144 if (auto *Constant
= dyn_cast
<ConstantSDNode
>(Cmp
.getOperand(1))) {
15145 int64_t NegConstant
= 0 - Constant
->getSExtValue();
15146 // Due to the limitations of the addi instruction,
15147 // -C is required to be [-32768, 32767].
15148 return isInt
<16>(NegConstant
);
15154 bool LHSHasPattern
= isZextOfCompareWithConstant(LHS
);
15155 bool RHSHasPattern
= isZextOfCompareWithConstant(RHS
);
15157 // If there is a pattern, canonicalize a zext operand to the RHS.
15158 if (LHSHasPattern
&& !RHSHasPattern
)
15159 std::swap(LHS
, RHS
);
15160 else if (!LHSHasPattern
&& !RHSHasPattern
)
15164 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Glue
);
15165 SDValue Cmp
= RHS
.getOperand(0);
15166 SDValue Z
= Cmp
.getOperand(0);
15167 auto *Constant
= dyn_cast
<ConstantSDNode
>(Cmp
.getOperand(1));
15169 assert(Constant
&& "Constant Should not be a null pointer.");
15170 int64_t NegConstant
= 0 - Constant
->getSExtValue();
15172 switch(cast
<CondCodeSDNode
>(Cmp
.getOperand(2))->get()) {
15176 // --> addze X, (addic Z, -1).carry
15178 // add X, (zext(setne Z, C))--
15179 // \ when -32768 <= -C <= 32767 && C != 0
15180 // --> addze X, (addic (addi Z, -C), -1).carry
15181 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
15182 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
15183 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
15184 SDValue Addc
= DAG
.getNode(ISD::ADDC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
15185 AddOrZ
, DAG
.getConstant(-1ULL, DL
, MVT::i64
));
15186 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
15187 SDValue(Addc
.getNode(), 1));
15191 // --> addze X, (subfic Z, 0).carry
15193 // add X, (zext(sete Z, C))--
15194 // \ when -32768 <= -C <= 32767 && C != 0
15195 // --> addze X, (subfic (addi Z, -C), 0).carry
15196 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
15197 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
15198 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
15199 SDValue Subc
= DAG
.getNode(ISD::SUBC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
15200 DAG
.getConstant(0, DL
, MVT::i64
), AddOrZ
);
15201 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
15202 SDValue(Subc
.getNode(), 1));
15209 SDValue
PPCTargetLowering::combineADD(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15210 if (auto Value
= combineADDToADDZE(N
, DCI
.DAG
, Subtarget
))
15216 // Detect TRUNCATE operations on bitcasts of float128 values.
15217 // What we are looking for here is the situtation where we extract a subset
15218 // of bits from a 128 bit float.
15219 // This can be of two forms:
15220 // 1) BITCAST of f128 feeding TRUNCATE
15221 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
15222 // The reason this is required is because we do not have a legal i128 type
15223 // and so we want to prevent having to store the f128 and then reload part
15225 SDValue
PPCTargetLowering::combineTRUNCATE(SDNode
*N
,
15226 DAGCombinerInfo
&DCI
) const {
15227 // If we are using CRBits then try that first.
15228 if (Subtarget
.useCRBits()) {
15229 // Check if CRBits did anything and return that if it did.
15230 if (SDValue CRTruncValue
= DAGCombineTruncBoolExt(N
, DCI
))
15231 return CRTruncValue
;
15235 SDValue Op0
= N
->getOperand(0);
15237 // Looking for a truncate of i128 to i64.
15238 if (Op0
.getValueType() != MVT::i128
|| N
->getValueType(0) != MVT::i64
)
15241 int EltToExtract
= DCI
.DAG
.getDataLayout().isBigEndian() ? 1 : 0;
15243 // SRL feeding TRUNCATE.
15244 if (Op0
.getOpcode() == ISD::SRL
) {
15245 ConstantSDNode
*ConstNode
= dyn_cast
<ConstantSDNode
>(Op0
.getOperand(1));
15246 // The right shift has to be by 64 bits.
15247 if (!ConstNode
|| ConstNode
->getZExtValue() != 64)
15250 // Switch the element number to extract.
15251 EltToExtract
= EltToExtract
? 0 : 1;
15252 // Update Op0 past the SRL.
15253 Op0
= Op0
.getOperand(0);
15256 // BITCAST feeding a TRUNCATE possibly via SRL.
15257 if (Op0
.getOpcode() == ISD::BITCAST
&&
15258 Op0
.getValueType() == MVT::i128
&&
15259 Op0
.getOperand(0).getValueType() == MVT::f128
) {
15260 SDValue Bitcast
= DCI
.DAG
.getBitcast(MVT::v2i64
, Op0
.getOperand(0));
15261 return DCI
.DAG
.getNode(
15262 ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Bitcast
,
15263 DCI
.DAG
.getTargetConstant(EltToExtract
, dl
, MVT::i32
));
15268 SDValue
PPCTargetLowering::combineMUL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15269 SelectionDAG
&DAG
= DCI
.DAG
;
15271 ConstantSDNode
*ConstOpOrElement
= isConstOrConstSplat(N
->getOperand(1));
15272 if (!ConstOpOrElement
)
15275 // An imul is usually smaller than the alternative sequence for legal type.
15276 if (DAG
.getMachineFunction().getFunction().hasMinSize() &&
15277 isOperationLegal(ISD::MUL
, N
->getValueType(0)))
15280 auto IsProfitable
= [this](bool IsNeg
, bool IsAddOne
, EVT VT
) -> bool {
15281 switch (this->Subtarget
.getDarwinDirective()) {
15283 // TODO: enhance the condition for subtarget before pwr8
15285 case PPC::DIR_PWR8
:
15286 // type mul add shl
15290 case PPC::DIR_PWR9
:
15291 // type mul add shl
15295 // The cycle RATIO of related operations are showed as a table above.
15296 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
15297 // scalar and vector type. For 2 instrs patterns, add/sub + shl
15298 // are 4, it is always profitable; but for 3 instrs patterns
15299 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
15300 // So we should only do it for vector type.
15301 return IsAddOne
&& IsNeg
? VT
.isVector() : true;
15305 EVT VT
= N
->getValueType(0);
15308 const APInt
&MulAmt
= ConstOpOrElement
->getAPIntValue();
15309 bool IsNeg
= MulAmt
.isNegative();
15310 APInt MulAmtAbs
= MulAmt
.abs();
15312 if ((MulAmtAbs
- 1).isPowerOf2()) {
15313 // (mul x, 2^N + 1) => (add (shl x, N), x)
15314 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
15316 if (!IsProfitable(IsNeg
, true, VT
))
15319 SDValue Op0
= N
->getOperand(0);
15321 DAG
.getNode(ISD::SHL
, DL
, VT
, N
->getOperand(0),
15322 DAG
.getConstant((MulAmtAbs
- 1).logBase2(), DL
, VT
));
15323 SDValue Res
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
, Op1
);
15328 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Res
);
15329 } else if ((MulAmtAbs
+ 1).isPowerOf2()) {
15330 // (mul x, 2^N - 1) => (sub (shl x, N), x)
15331 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
15333 if (!IsProfitable(IsNeg
, false, VT
))
15336 SDValue Op0
= N
->getOperand(0);
15338 DAG
.getNode(ISD::SHL
, DL
, VT
, N
->getOperand(0),
15339 DAG
.getConstant((MulAmtAbs
+ 1).logBase2(), DL
, VT
));
15342 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op1
, Op0
);
15344 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op0
, Op1
);
15351 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
15352 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
15353 if (!Subtarget
.is64BitELFABI())
15356 // If not a tail call then no need to proceed.
15357 if (!CI
->isTailCall())
15360 // If tail calls are disabled for the caller then we are done.
15361 const Function
*Caller
= CI
->getParent()->getParent();
15362 auto Attr
= Caller
->getFnAttribute("disable-tail-calls");
15363 if (Attr
.getValueAsString() == "true")
15366 // If sibling calls have been disabled and tail-calls aren't guaranteed
15367 // there is no reason to duplicate.
15368 auto &TM
= getTargetMachine();
15369 if (!TM
.Options
.GuaranteedTailCallOpt
&& DisableSCO
)
15372 // Can't tail call a function called indirectly, or if it has variadic args.
15373 const Function
*Callee
= CI
->getCalledFunction();
15374 if (!Callee
|| Callee
->isVarArg())
15377 // Make sure the callee and caller calling conventions are eligible for tco.
15378 if (!areCallingConvEligibleForTCO_64SVR4(Caller
->getCallingConv(),
15379 CI
->getCallingConv()))
15382 // If the function is local then we have a good chance at tail-calling it
15383 return getTargetMachine().shouldAssumeDSOLocal(*Caller
->getParent(), Callee
);
15386 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT
) const {
15387 if (!Subtarget
.hasVSX())
15389 if (Subtarget
.hasP9Vector() && VT
== MVT::f128
)
15391 return VT
== MVT::f32
|| VT
== MVT::f64
||
15392 VT
== MVT::v4f32
|| VT
== MVT::v2f64
;
15395 bool PPCTargetLowering::
15396 isMaskAndCmp0FoldingBeneficial(const Instruction
&AndI
) const {
15397 const Value
*Mask
= AndI
.getOperand(1);
15398 // If the mask is suitable for andi. or andis. we should sink the and.
15399 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Mask
)) {
15400 // Can't handle constants wider than 64-bits.
15401 if (CI
->getBitWidth() > 64)
15403 int64_t ConstVal
= CI
->getZExtValue();
15404 return isUInt
<16>(ConstVal
) ||
15405 (isUInt
<16>(ConstVal
>> 16) && !(ConstVal
& 0xFFFF));
15408 // For non-constant masks, we can always use the record-form and.
15412 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
15413 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
15414 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
15415 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
15416 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
15417 SDValue
PPCTargetLowering::combineABS(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15418 assert((N
->getOpcode() == ISD::ABS
) && "Need ABS node here");
15419 assert(Subtarget
.hasP9Altivec() &&
15420 "Only combine this when P9 altivec supported!");
15421 EVT VT
= N
->getValueType(0);
15422 if (VT
!= MVT::v4i32
&& VT
!= MVT::v8i16
&& VT
!= MVT::v16i8
)
15425 SelectionDAG
&DAG
= DCI
.DAG
;
15427 if (N
->getOperand(0).getOpcode() == ISD::SUB
) {
15428 // Even for signed integers, if it's known to be positive (as signed
15429 // integer) due to zero-extended inputs.
15430 unsigned SubOpcd0
= N
->getOperand(0)->getOperand(0).getOpcode();
15431 unsigned SubOpcd1
= N
->getOperand(0)->getOperand(1).getOpcode();
15432 if ((SubOpcd0
== ISD::ZERO_EXTEND
||
15433 SubOpcd0
== ISD::ZERO_EXTEND_VECTOR_INREG
) &&
15434 (SubOpcd1
== ISD::ZERO_EXTEND
||
15435 SubOpcd1
== ISD::ZERO_EXTEND_VECTOR_INREG
)) {
15436 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(0).getValueType(),
15437 N
->getOperand(0)->getOperand(0),
15438 N
->getOperand(0)->getOperand(1),
15439 DAG
.getTargetConstant(0, dl
, MVT::i32
));
15442 // For type v4i32, it can be optimized with xvnegsp + vabsduw
15443 if (N
->getOperand(0).getValueType() == MVT::v4i32
&&
15444 N
->getOperand(0).hasOneUse()) {
15445 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(0).getValueType(),
15446 N
->getOperand(0)->getOperand(0),
15447 N
->getOperand(0)->getOperand(1),
15448 DAG
.getTargetConstant(1, dl
, MVT::i32
));
15455 // For type v4i32/v8ii16/v16i8, transform
15456 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
15457 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
15458 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
15459 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
15460 SDValue
PPCTargetLowering::combineVSelect(SDNode
*N
,
15461 DAGCombinerInfo
&DCI
) const {
15462 assert((N
->getOpcode() == ISD::VSELECT
) && "Need VSELECT node here");
15463 assert(Subtarget
.hasP9Altivec() &&
15464 "Only combine this when P9 altivec supported!");
15466 SelectionDAG
&DAG
= DCI
.DAG
;
15468 SDValue Cond
= N
->getOperand(0);
15469 SDValue TrueOpnd
= N
->getOperand(1);
15470 SDValue FalseOpnd
= N
->getOperand(2);
15471 EVT VT
= N
->getOperand(1).getValueType();
15473 if (Cond
.getOpcode() != ISD::SETCC
|| TrueOpnd
.getOpcode() != ISD::SUB
||
15474 FalseOpnd
.getOpcode() != ISD::SUB
)
15477 // ABSD only available for type v4i32/v8i16/v16i8
15478 if (VT
!= MVT::v4i32
&& VT
!= MVT::v8i16
&& VT
!= MVT::v16i8
)
15481 // At least to save one more dependent computation
15482 if (!(Cond
.hasOneUse() || TrueOpnd
.hasOneUse() || FalseOpnd
.hasOneUse()))
15485 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Cond
.getOperand(2))->get();
15487 // Can only handle unsigned comparison here
15496 std::swap(TrueOpnd
, FalseOpnd
);
15500 SDValue CmpOpnd1
= Cond
.getOperand(0);
15501 SDValue CmpOpnd2
= Cond
.getOperand(1);
15503 // SETCC CmpOpnd1 CmpOpnd2 cond
15504 // TrueOpnd = CmpOpnd1 - CmpOpnd2
15505 // FalseOpnd = CmpOpnd2 - CmpOpnd1
15506 if (TrueOpnd
.getOperand(0) == CmpOpnd1
&&
15507 TrueOpnd
.getOperand(1) == CmpOpnd2
&&
15508 FalseOpnd
.getOperand(0) == CmpOpnd2
&&
15509 FalseOpnd
.getOperand(1) == CmpOpnd1
) {
15510 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(1).getValueType(),
15511 CmpOpnd1
, CmpOpnd2
,
15512 DAG
.getTargetConstant(0, dl
, MVT::i32
));