1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the PPCISelLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetRegisterInfo.h"
56 #include "llvm/CodeGen/ValueTypes.h"
57 #include "llvm/IR/CallSite.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/Value.h"
73 #include "llvm/MC/MCContext.h"
74 #include "llvm/MC/MCExpr.h"
75 #include "llvm/MC/MCRegisterInfo.h"
76 #include "llvm/MC/MCSymbolXCOFF.h"
77 #include "llvm/Support/AtomicOrdering.h"
78 #include "llvm/Support/BranchProbability.h"
79 #include "llvm/Support/Casting.h"
80 #include "llvm/Support/CodeGen.h"
81 #include "llvm/Support/CommandLine.h"
82 #include "llvm/Support/Compiler.h"
83 #include "llvm/Support/Debug.h"
84 #include "llvm/Support/ErrorHandling.h"
85 #include "llvm/Support/Format.h"
86 #include "llvm/Support/KnownBits.h"
87 #include "llvm/Support/MachineValueType.h"
88 #include "llvm/Support/MathExtras.h"
89 #include "llvm/Support/raw_ostream.h"
90 #include "llvm/Target/TargetMachine.h"
91 #include "llvm/Target/TargetOptions.h"
100 using namespace llvm
;
102 #define DEBUG_TYPE "ppc-lowering"
104 static cl::opt
<bool> DisablePPCPreinc("disable-ppc-preinc",
105 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden
);
107 static cl::opt
<bool> DisableILPPref("disable-ppc-ilp-pref",
108 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden
);
110 static cl::opt
<bool> DisablePPCUnaligned("disable-ppc-unaligned",
111 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden
);
113 static cl::opt
<bool> DisableSCO("disable-ppc-sco",
114 cl::desc("disable sibling call optimization on ppc"), cl::Hidden
);
116 static cl::opt
<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
117 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden
);
119 static cl::opt
<bool> EnableQuadPrecision("enable-ppc-quad-precision",
120 cl::desc("enable quad precision float support on ppc"), cl::Hidden
);
122 STATISTIC(NumTailCalls
, "Number of tail calls");
123 STATISTIC(NumSiblingCalls
, "Number of sibling calls");
125 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*, unsigned, int);
127 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
);
129 // FIXME: Remove this once the bug has been fixed!
130 extern cl::opt
<bool> ANDIGlueBug
;
132 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine
&TM
,
133 const PPCSubtarget
&STI
)
134 : TargetLowering(TM
), Subtarget(STI
) {
135 // Use _setjmp/_longjmp instead of setjmp/longjmp.
136 setUseUnderscoreSetJmp(true);
137 setUseUnderscoreLongJmp(true);
139 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140 // arguments are at least 4/8 bytes aligned.
141 bool isPPC64
= Subtarget
.isPPC64();
142 setMinStackArgumentAlignment(isPPC64
? llvm::Align(8) : llvm::Align(4));
144 // Set up the register classes.
145 addRegisterClass(MVT::i32
, &PPC::GPRCRegClass
);
146 if (!useSoftFloat()) {
148 addRegisterClass(MVT::f32
, &PPC::SPE4RCRegClass
);
149 addRegisterClass(MVT::f64
, &PPC::SPERCRegClass
);
151 addRegisterClass(MVT::f32
, &PPC::F4RCRegClass
);
152 addRegisterClass(MVT::f64
, &PPC::F8RCRegClass
);
156 // Match BITREVERSE to customized fast code sequence in the td file.
157 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Legal
);
158 setOperationAction(ISD::BITREVERSE
, MVT::i64
, Legal
);
160 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161 setOperationAction(ISD::ATOMIC_CMP_SWAP
, MVT::i32
, Custom
);
163 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164 for (MVT VT
: MVT::integer_valuetypes()) {
165 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
166 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i8
, Expand
);
169 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
171 // PowerPC has pre-inc load and store's.
172 setIndexedLoadAction(ISD::PRE_INC
, MVT::i1
, Legal
);
173 setIndexedLoadAction(ISD::PRE_INC
, MVT::i8
, Legal
);
174 setIndexedLoadAction(ISD::PRE_INC
, MVT::i16
, Legal
);
175 setIndexedLoadAction(ISD::PRE_INC
, MVT::i32
, Legal
);
176 setIndexedLoadAction(ISD::PRE_INC
, MVT::i64
, Legal
);
177 setIndexedStoreAction(ISD::PRE_INC
, MVT::i1
, Legal
);
178 setIndexedStoreAction(ISD::PRE_INC
, MVT::i8
, Legal
);
179 setIndexedStoreAction(ISD::PRE_INC
, MVT::i16
, Legal
);
180 setIndexedStoreAction(ISD::PRE_INC
, MVT::i32
, Legal
);
181 setIndexedStoreAction(ISD::PRE_INC
, MVT::i64
, Legal
);
182 if (!Subtarget
.hasSPE()) {
183 setIndexedLoadAction(ISD::PRE_INC
, MVT::f32
, Legal
);
184 setIndexedLoadAction(ISD::PRE_INC
, MVT::f64
, Legal
);
185 setIndexedStoreAction(ISD::PRE_INC
, MVT::f32
, Legal
);
186 setIndexedStoreAction(ISD::PRE_INC
, MVT::f64
, Legal
);
189 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
190 const MVT ScalarIntVTs
[] = { MVT::i32
, MVT::i64
};
191 for (MVT VT
: ScalarIntVTs
) {
192 setOperationAction(ISD::ADDC
, VT
, Legal
);
193 setOperationAction(ISD::ADDE
, VT
, Legal
);
194 setOperationAction(ISD::SUBC
, VT
, Legal
);
195 setOperationAction(ISD::SUBE
, VT
, Legal
);
198 if (Subtarget
.useCRBits()) {
199 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
201 if (isPPC64
|| Subtarget
.hasFPCVT()) {
202 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
203 AddPromotedToType (ISD::SINT_TO_FP
, MVT::i1
,
204 isPPC64
? MVT::i64
: MVT::i32
);
205 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
206 AddPromotedToType(ISD::UINT_TO_FP
, MVT::i1
,
207 isPPC64
? MVT::i64
: MVT::i32
);
209 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Custom
);
210 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Custom
);
213 // PowerPC does not support direct load/store of condition registers.
214 setOperationAction(ISD::LOAD
, MVT::i1
, Custom
);
215 setOperationAction(ISD::STORE
, MVT::i1
, Custom
);
217 // FIXME: Remove this once the ANDI glue bug is fixed:
219 setOperationAction(ISD::TRUNCATE
, MVT::i1
, Custom
);
221 for (MVT VT
: MVT::integer_valuetypes()) {
222 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
223 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
224 setTruncStoreAction(VT
, MVT::i1
, Expand
);
227 addRegisterClass(MVT::i1
, &PPC::CRBITRCRegClass
);
230 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
231 // PPC (the libcall is not available).
232 setOperationAction(ISD::FP_TO_SINT
, MVT::ppcf128
, Custom
);
233 setOperationAction(ISD::FP_TO_UINT
, MVT::ppcf128
, Custom
);
235 // We do not currently implement these libm ops for PowerPC.
236 setOperationAction(ISD::FFLOOR
, MVT::ppcf128
, Expand
);
237 setOperationAction(ISD::FCEIL
, MVT::ppcf128
, Expand
);
238 setOperationAction(ISD::FTRUNC
, MVT::ppcf128
, Expand
);
239 setOperationAction(ISD::FRINT
, MVT::ppcf128
, Expand
);
240 setOperationAction(ISD::FNEARBYINT
, MVT::ppcf128
, Expand
);
241 setOperationAction(ISD::FREM
, MVT::ppcf128
, Expand
);
243 // PowerPC has no SREM/UREM instructions unless we are on P9
244 // On P9 we may use a hardware instruction to compute the remainder.
245 // The instructions are not legalized directly because in the cases where the
246 // result of both the remainder and the division is required it is more
247 // efficient to compute the remainder from the result of the division rather
248 // than use the remainder instruction.
249 if (Subtarget
.isISA3_0()) {
250 setOperationAction(ISD::SREM
, MVT::i32
, Custom
);
251 setOperationAction(ISD::UREM
, MVT::i32
, Custom
);
252 setOperationAction(ISD::SREM
, MVT::i64
, Custom
);
253 setOperationAction(ISD::UREM
, MVT::i64
, Custom
);
255 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
256 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
257 setOperationAction(ISD::SREM
, MVT::i64
, Expand
);
258 setOperationAction(ISD::UREM
, MVT::i64
, Expand
);
261 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
262 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
263 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
264 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Expand
);
265 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Expand
);
266 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
267 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
268 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
269 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
271 // We don't support sin/cos/sqrt/fmod/pow
272 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
273 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
274 setOperationAction(ISD::FSINCOS
, MVT::f64
, Expand
);
275 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
276 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
277 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
278 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
279 setOperationAction(ISD::FSINCOS
, MVT::f32
, Expand
);
280 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
281 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
282 if (Subtarget
.hasSPE()) {
283 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
284 setOperationAction(ISD::FMA
, MVT::f32
, Expand
);
286 setOperationAction(ISD::FMA
, MVT::f64
, Legal
);
287 setOperationAction(ISD::FMA
, MVT::f32
, Legal
);
290 setOperationAction(ISD::FLT_ROUNDS_
, MVT::i32
, Custom
);
292 // If we're enabling GP optimizations, use hardware square root
293 if (!Subtarget
.hasFSQRT() &&
294 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTE() &&
296 setOperationAction(ISD::FSQRT
, MVT::f64
, Expand
);
298 if (!Subtarget
.hasFSQRT() &&
299 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTES() &&
300 Subtarget
.hasFRES()))
301 setOperationAction(ISD::FSQRT
, MVT::f32
, Expand
);
303 if (Subtarget
.hasFCPSGN()) {
304 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Legal
);
305 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Legal
);
307 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
308 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
311 if (Subtarget
.hasFPRND()) {
312 setOperationAction(ISD::FFLOOR
, MVT::f64
, Legal
);
313 setOperationAction(ISD::FCEIL
, MVT::f64
, Legal
);
314 setOperationAction(ISD::FTRUNC
, MVT::f64
, Legal
);
315 setOperationAction(ISD::FROUND
, MVT::f64
, Legal
);
317 setOperationAction(ISD::FFLOOR
, MVT::f32
, Legal
);
318 setOperationAction(ISD::FCEIL
, MVT::f32
, Legal
);
319 setOperationAction(ISD::FTRUNC
, MVT::f32
, Legal
);
320 setOperationAction(ISD::FROUND
, MVT::f32
, Legal
);
323 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
324 // to speed up scalar BSWAP64.
325 // CTPOP or CTTZ were introduced in P8/P9 respectively
326 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
327 if (Subtarget
.hasP9Vector())
328 setOperationAction(ISD::BSWAP
, MVT::i64
, Custom
);
330 setOperationAction(ISD::BSWAP
, MVT::i64
, Expand
);
331 if (Subtarget
.isISA3_0()) {
332 setOperationAction(ISD::CTTZ
, MVT::i32
, Legal
);
333 setOperationAction(ISD::CTTZ
, MVT::i64
, Legal
);
335 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
336 setOperationAction(ISD::CTTZ
, MVT::i64
, Expand
);
339 if (Subtarget
.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast
) {
340 setOperationAction(ISD::CTPOP
, MVT::i32
, Legal
);
341 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
343 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
344 setOperationAction(ISD::CTPOP
, MVT::i64
, Expand
);
347 // PowerPC does not have ROTR
348 setOperationAction(ISD::ROTR
, MVT::i32
, Expand
);
349 setOperationAction(ISD::ROTR
, MVT::i64
, Expand
);
351 if (!Subtarget
.useCRBits()) {
352 // PowerPC does not have Select
353 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
354 setOperationAction(ISD::SELECT
, MVT::i64
, Expand
);
355 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
356 setOperationAction(ISD::SELECT
, MVT::f64
, Expand
);
359 // PowerPC wants to turn select_cc of FP into fsel when possible.
360 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
361 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Custom
);
363 // PowerPC wants to optimize integer setcc a bit
364 if (!Subtarget
.useCRBits())
365 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
367 // PowerPC does not have BRCOND which requires SetCC
368 if (!Subtarget
.useCRBits())
369 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
371 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
373 if (Subtarget
.hasSPE()) {
374 // SPE has built-in conversions
375 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Legal
);
376 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Legal
);
377 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Legal
);
379 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
380 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
382 // PowerPC does not have [U|S]INT_TO_FP
383 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Expand
);
384 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Expand
);
387 if (Subtarget
.hasDirectMove() && isPPC64
) {
388 setOperationAction(ISD::BITCAST
, MVT::f32
, Legal
);
389 setOperationAction(ISD::BITCAST
, MVT::i32
, Legal
);
390 setOperationAction(ISD::BITCAST
, MVT::i64
, Legal
);
391 setOperationAction(ISD::BITCAST
, MVT::f64
, Legal
);
393 setOperationAction(ISD::BITCAST
, MVT::f32
, Expand
);
394 setOperationAction(ISD::BITCAST
, MVT::i32
, Expand
);
395 setOperationAction(ISD::BITCAST
, MVT::i64
, Expand
);
396 setOperationAction(ISD::BITCAST
, MVT::f64
, Expand
);
399 // We cannot sextinreg(i1). Expand to shifts.
400 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
402 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
403 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
404 // support continuation, user-level threading, and etc.. As a result, no
405 // other SjLj exception interfaces are implemented and please don't build
406 // your own exception handling based on them.
407 // LLVM/Clang supports zero-cost DWARF exception handling.
408 setOperationAction(ISD::EH_SJLJ_SETJMP
, MVT::i32
, Custom
);
409 setOperationAction(ISD::EH_SJLJ_LONGJMP
, MVT::Other
, Custom
);
411 // We want to legalize GlobalAddress and ConstantPool nodes into the
412 // appropriate instructions to materialize the address.
413 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
414 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
415 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
416 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
417 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
418 setOperationAction(ISD::GlobalAddress
, MVT::i64
, Custom
);
419 setOperationAction(ISD::GlobalTLSAddress
, MVT::i64
, Custom
);
420 setOperationAction(ISD::BlockAddress
, MVT::i64
, Custom
);
421 setOperationAction(ISD::ConstantPool
, MVT::i64
, Custom
);
422 setOperationAction(ISD::JumpTable
, MVT::i64
, Custom
);
425 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
427 // TRAMPOLINE is custom lowered.
428 setOperationAction(ISD::INIT_TRAMPOLINE
, MVT::Other
, Custom
);
429 setOperationAction(ISD::ADJUST_TRAMPOLINE
, MVT::Other
, Custom
);
431 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
432 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
434 if (Subtarget
.is64BitELFABI()) {
435 // VAARG always uses double-word chunks, so promote anything smaller.
436 setOperationAction(ISD::VAARG
, MVT::i1
, Promote
);
437 AddPromotedToType(ISD::VAARG
, MVT::i1
, MVT::i64
);
438 setOperationAction(ISD::VAARG
, MVT::i8
, Promote
);
439 AddPromotedToType(ISD::VAARG
, MVT::i8
, MVT::i64
);
440 setOperationAction(ISD::VAARG
, MVT::i16
, Promote
);
441 AddPromotedToType(ISD::VAARG
, MVT::i16
, MVT::i64
);
442 setOperationAction(ISD::VAARG
, MVT::i32
, Promote
);
443 AddPromotedToType(ISD::VAARG
, MVT::i32
, MVT::i64
);
444 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
445 } else if (Subtarget
.is32BitELFABI()) {
446 // VAARG is custom lowered with the 32-bit SVR4 ABI.
447 setOperationAction(ISD::VAARG
, MVT::Other
, Custom
);
448 setOperationAction(ISD::VAARG
, MVT::i64
, Custom
);
450 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
452 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
453 if (Subtarget
.is32BitELFABI())
454 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
456 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
458 // Use the default implementation.
459 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
460 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
461 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
462 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
463 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Custom
);
464 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i32
, Custom
);
465 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i64
, Custom
);
466 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i32
, Custom
);
467 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i64
, Custom
);
469 // We want to custom lower some of our intrinsics.
470 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
472 // To handle counter-based loop conditions.
473 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i1
, Custom
);
475 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i8
, Custom
);
476 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i16
, Custom
);
477 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i32
, Custom
);
478 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
480 // Comparisons that require checking two conditions.
481 if (Subtarget
.hasSPE()) {
482 setCondCodeAction(ISD::SETO
, MVT::f32
, Expand
);
483 setCondCodeAction(ISD::SETO
, MVT::f64
, Expand
);
484 setCondCodeAction(ISD::SETUO
, MVT::f32
, Expand
);
485 setCondCodeAction(ISD::SETUO
, MVT::f64
, Expand
);
487 setCondCodeAction(ISD::SETULT
, MVT::f32
, Expand
);
488 setCondCodeAction(ISD::SETULT
, MVT::f64
, Expand
);
489 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
490 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
491 setCondCodeAction(ISD::SETUEQ
, MVT::f32
, Expand
);
492 setCondCodeAction(ISD::SETUEQ
, MVT::f64
, Expand
);
493 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
494 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
495 setCondCodeAction(ISD::SETOLE
, MVT::f32
, Expand
);
496 setCondCodeAction(ISD::SETOLE
, MVT::f64
, Expand
);
497 setCondCodeAction(ISD::SETONE
, MVT::f32
, Expand
);
498 setCondCodeAction(ISD::SETONE
, MVT::f64
, Expand
);
500 if (Subtarget
.has64BitSupport()) {
501 // They also have instructions for converting between i64 and fp.
502 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
503 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Expand
);
504 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
505 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
506 // This is just the low 32 bits of a (signed) fp->i64 conversion.
507 // We cannot do this with Promote because i64 is not a legal type.
508 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
510 if (Subtarget
.hasLFIWAX() || Subtarget
.isPPC64())
511 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
513 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
514 if (Subtarget
.hasSPE())
515 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Legal
);
517 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Expand
);
520 // With the instructions enabled under FPCVT, we can do everything.
521 if (Subtarget
.hasFPCVT()) {
522 if (Subtarget
.has64BitSupport()) {
523 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
524 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Custom
);
525 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
526 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Custom
);
529 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
530 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
531 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
532 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
535 if (Subtarget
.use64BitRegs()) {
536 // 64-bit PowerPC implementations can support i64 types directly
537 addRegisterClass(MVT::i64
, &PPC::G8RCRegClass
);
538 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
539 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
540 // 64-bit PowerPC wants to expand i128 shifts itself.
541 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Custom
);
542 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Custom
);
543 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Custom
);
545 // 32-bit PowerPC wants to expand i64 shifts itself.
546 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
547 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
548 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
551 if (Subtarget
.hasAltivec()) {
552 // First set operation action for all vector types to expand. Then we
553 // will selectively turn on ones that can be effectively codegen'd.
554 for (MVT VT
: MVT::vector_valuetypes()) {
555 // add/sub are legal for all supported vector VT's.
556 setOperationAction(ISD::ADD
, VT
, Legal
);
557 setOperationAction(ISD::SUB
, VT
, Legal
);
559 // For v2i64, these are only valid with P8Vector. This is corrected after
561 if (VT
.getSizeInBits() <= 128 && VT
.getScalarSizeInBits() <= 64) {
562 setOperationAction(ISD::SMAX
, VT
, Legal
);
563 setOperationAction(ISD::SMIN
, VT
, Legal
);
564 setOperationAction(ISD::UMAX
, VT
, Legal
);
565 setOperationAction(ISD::UMIN
, VT
, Legal
);
568 setOperationAction(ISD::SMAX
, VT
, Expand
);
569 setOperationAction(ISD::SMIN
, VT
, Expand
);
570 setOperationAction(ISD::UMAX
, VT
, Expand
);
571 setOperationAction(ISD::UMIN
, VT
, Expand
);
574 if (Subtarget
.hasVSX()) {
575 setOperationAction(ISD::FMAXNUM
, VT
, Legal
);
576 setOperationAction(ISD::FMINNUM
, VT
, Legal
);
579 // Vector instructions introduced in P8
580 if (Subtarget
.hasP8Altivec() && (VT
.SimpleTy
!= MVT::v1i128
)) {
581 setOperationAction(ISD::CTPOP
, VT
, Legal
);
582 setOperationAction(ISD::CTLZ
, VT
, Legal
);
585 setOperationAction(ISD::CTPOP
, VT
, Expand
);
586 setOperationAction(ISD::CTLZ
, VT
, Expand
);
589 // Vector instructions introduced in P9
590 if (Subtarget
.hasP9Altivec() && (VT
.SimpleTy
!= MVT::v1i128
))
591 setOperationAction(ISD::CTTZ
, VT
, Legal
);
593 setOperationAction(ISD::CTTZ
, VT
, Expand
);
595 // We promote all shuffles to v16i8.
596 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Promote
);
597 AddPromotedToType (ISD::VECTOR_SHUFFLE
, VT
, MVT::v16i8
);
599 // We promote all non-typed operations to v4i32.
600 setOperationAction(ISD::AND
, VT
, Promote
);
601 AddPromotedToType (ISD::AND
, VT
, MVT::v4i32
);
602 setOperationAction(ISD::OR
, VT
, Promote
);
603 AddPromotedToType (ISD::OR
, VT
, MVT::v4i32
);
604 setOperationAction(ISD::XOR
, VT
, Promote
);
605 AddPromotedToType (ISD::XOR
, VT
, MVT::v4i32
);
606 setOperationAction(ISD::LOAD
, VT
, Promote
);
607 AddPromotedToType (ISD::LOAD
, VT
, MVT::v4i32
);
608 setOperationAction(ISD::SELECT
, VT
, Promote
);
609 AddPromotedToType (ISD::SELECT
, VT
, MVT::v4i32
);
610 setOperationAction(ISD::VSELECT
, VT
, Legal
);
611 setOperationAction(ISD::SELECT_CC
, VT
, Promote
);
612 AddPromotedToType (ISD::SELECT_CC
, VT
, MVT::v4i32
);
613 setOperationAction(ISD::STORE
, VT
, Promote
);
614 AddPromotedToType (ISD::STORE
, VT
, MVT::v4i32
);
616 // No other operations are legal.
617 setOperationAction(ISD::MUL
, VT
, Expand
);
618 setOperationAction(ISD::SDIV
, VT
, Expand
);
619 setOperationAction(ISD::SREM
, VT
, Expand
);
620 setOperationAction(ISD::UDIV
, VT
, Expand
);
621 setOperationAction(ISD::UREM
, VT
, Expand
);
622 setOperationAction(ISD::FDIV
, VT
, Expand
);
623 setOperationAction(ISD::FREM
, VT
, Expand
);
624 setOperationAction(ISD::FNEG
, VT
, Expand
);
625 setOperationAction(ISD::FSQRT
, VT
, Expand
);
626 setOperationAction(ISD::FLOG
, VT
, Expand
);
627 setOperationAction(ISD::FLOG10
, VT
, Expand
);
628 setOperationAction(ISD::FLOG2
, VT
, Expand
);
629 setOperationAction(ISD::FEXP
, VT
, Expand
);
630 setOperationAction(ISD::FEXP2
, VT
, Expand
);
631 setOperationAction(ISD::FSIN
, VT
, Expand
);
632 setOperationAction(ISD::FCOS
, VT
, Expand
);
633 setOperationAction(ISD::FABS
, VT
, Expand
);
634 setOperationAction(ISD::FFLOOR
, VT
, Expand
);
635 setOperationAction(ISD::FCEIL
, VT
, Expand
);
636 setOperationAction(ISD::FTRUNC
, VT
, Expand
);
637 setOperationAction(ISD::FRINT
, VT
, Expand
);
638 setOperationAction(ISD::FNEARBYINT
, VT
, Expand
);
639 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Expand
);
640 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Expand
);
641 setOperationAction(ISD::BUILD_VECTOR
, VT
, Expand
);
642 setOperationAction(ISD::MULHU
, VT
, Expand
);
643 setOperationAction(ISD::MULHS
, VT
, Expand
);
644 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
645 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
646 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
647 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
648 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Expand
);
649 setOperationAction(ISD::FPOW
, VT
, Expand
);
650 setOperationAction(ISD::BSWAP
, VT
, Expand
);
651 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Expand
);
652 setOperationAction(ISD::ROTL
, VT
, Expand
);
653 setOperationAction(ISD::ROTR
, VT
, Expand
);
655 for (MVT InnerVT
: MVT::vector_valuetypes()) {
656 setTruncStoreAction(VT
, InnerVT
, Expand
);
657 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
658 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
659 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
662 if (!Subtarget
.hasP8Vector()) {
663 setOperationAction(ISD::SMAX
, MVT::v2i64
, Expand
);
664 setOperationAction(ISD::SMIN
, MVT::v2i64
, Expand
);
665 setOperationAction(ISD::UMAX
, MVT::v2i64
, Expand
);
666 setOperationAction(ISD::UMIN
, MVT::v2i64
, Expand
);
669 for (auto VT
: {MVT::v2i64
, MVT::v4i32
, MVT::v8i16
, MVT::v16i8
})
670 setOperationAction(ISD::ABS
, VT
, Custom
);
672 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
673 // with merges, splats, etc.
674 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v16i8
, Custom
);
676 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
677 // are cheap, so handle them before they get expanded to scalar.
678 setOperationAction(ISD::TRUNCATE
, MVT::v8i8
, Custom
);
679 setOperationAction(ISD::TRUNCATE
, MVT::v4i8
, Custom
);
680 setOperationAction(ISD::TRUNCATE
, MVT::v2i8
, Custom
);
681 setOperationAction(ISD::TRUNCATE
, MVT::v4i16
, Custom
);
682 setOperationAction(ISD::TRUNCATE
, MVT::v2i16
, Custom
);
684 setOperationAction(ISD::AND
, MVT::v4i32
, Legal
);
685 setOperationAction(ISD::OR
, MVT::v4i32
, Legal
);
686 setOperationAction(ISD::XOR
, MVT::v4i32
, Legal
);
687 setOperationAction(ISD::LOAD
, MVT::v4i32
, Legal
);
688 setOperationAction(ISD::SELECT
, MVT::v4i32
,
689 Subtarget
.useCRBits() ? Legal
: Expand
);
690 setOperationAction(ISD::STORE
, MVT::v4i32
, Legal
);
691 setOperationAction(ISD::FP_TO_SINT
, MVT::v4i32
, Legal
);
692 setOperationAction(ISD::FP_TO_UINT
, MVT::v4i32
, Legal
);
693 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i32
, Legal
);
694 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i32
, Legal
);
695 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
696 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
697 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
698 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
700 // Without hasP8Altivec set, v2i64 SMAX isn't available.
701 // But ABS custom lowering requires SMAX support.
702 if (!Subtarget
.hasP8Altivec())
703 setOperationAction(ISD::ABS
, MVT::v2i64
, Expand
);
705 addRegisterClass(MVT::v4f32
, &PPC::VRRCRegClass
);
706 addRegisterClass(MVT::v4i32
, &PPC::VRRCRegClass
);
707 addRegisterClass(MVT::v8i16
, &PPC::VRRCRegClass
);
708 addRegisterClass(MVT::v16i8
, &PPC::VRRCRegClass
);
710 setOperationAction(ISD::MUL
, MVT::v4f32
, Legal
);
711 setOperationAction(ISD::FMA
, MVT::v4f32
, Legal
);
713 if (TM
.Options
.UnsafeFPMath
|| Subtarget
.hasVSX()) {
714 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
715 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
718 if (Subtarget
.hasP8Altivec())
719 setOperationAction(ISD::MUL
, MVT::v4i32
, Legal
);
721 setOperationAction(ISD::MUL
, MVT::v4i32
, Custom
);
723 setOperationAction(ISD::MUL
, MVT::v8i16
, Custom
);
724 setOperationAction(ISD::MUL
, MVT::v16i8
, Custom
);
726 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Custom
);
727 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Custom
);
729 setOperationAction(ISD::BUILD_VECTOR
, MVT::v16i8
, Custom
);
730 setOperationAction(ISD::BUILD_VECTOR
, MVT::v8i16
, Custom
);
731 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i32
, Custom
);
732 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
734 // Altivec does not contain unordered floating-point compare instructions
735 setCondCodeAction(ISD::SETUO
, MVT::v4f32
, Expand
);
736 setCondCodeAction(ISD::SETUEQ
, MVT::v4f32
, Expand
);
737 setCondCodeAction(ISD::SETO
, MVT::v4f32
, Expand
);
738 setCondCodeAction(ISD::SETONE
, MVT::v4f32
, Expand
);
740 if (Subtarget
.hasVSX()) {
741 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, Legal
);
742 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
743 if (Subtarget
.hasP8Vector()) {
744 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
745 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Legal
);
747 if (Subtarget
.hasDirectMove() && isPPC64
) {
748 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v16i8
, Legal
);
749 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v8i16
, Legal
);
750 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Legal
);
751 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2i64
, Legal
);
752 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v16i8
, Legal
);
753 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v8i16
, Legal
);
754 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i32
, Legal
);
755 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2i64
, Legal
);
757 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
759 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Legal
);
760 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Legal
);
761 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Legal
);
762 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Legal
);
763 setOperationAction(ISD::FROUND
, MVT::v2f64
, Legal
);
765 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
767 setOperationAction(ISD::MUL
, MVT::v2f64
, Legal
);
768 setOperationAction(ISD::FMA
, MVT::v2f64
, Legal
);
770 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
771 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
773 // Share the Altivec comparison restrictions.
774 setCondCodeAction(ISD::SETUO
, MVT::v2f64
, Expand
);
775 setCondCodeAction(ISD::SETUEQ
, MVT::v2f64
, Expand
);
776 setCondCodeAction(ISD::SETO
, MVT::v2f64
, Expand
);
777 setCondCodeAction(ISD::SETONE
, MVT::v2f64
, Expand
);
779 setOperationAction(ISD::LOAD
, MVT::v2f64
, Legal
);
780 setOperationAction(ISD::STORE
, MVT::v2f64
, Legal
);
782 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2f64
, Legal
);
784 if (Subtarget
.hasP8Vector())
785 addRegisterClass(MVT::f32
, &PPC::VSSRCRegClass
);
787 addRegisterClass(MVT::f64
, &PPC::VSFRCRegClass
);
789 addRegisterClass(MVT::v4i32
, &PPC::VSRCRegClass
);
790 addRegisterClass(MVT::v4f32
, &PPC::VSRCRegClass
);
791 addRegisterClass(MVT::v2f64
, &PPC::VSRCRegClass
);
793 if (Subtarget
.hasP8Altivec()) {
794 setOperationAction(ISD::SHL
, MVT::v2i64
, Legal
);
795 setOperationAction(ISD::SRA
, MVT::v2i64
, Legal
);
796 setOperationAction(ISD::SRL
, MVT::v2i64
, Legal
);
798 // 128 bit shifts can be accomplished via 3 instructions for SHL and
799 // SRL, but not for SRA because of the instructions available:
800 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
802 setOperationAction(ISD::SHL
, MVT::v1i128
, Expand
);
803 setOperationAction(ISD::SRL
, MVT::v1i128
, Expand
);
804 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
806 setOperationAction(ISD::SETCC
, MVT::v2i64
, Legal
);
809 setOperationAction(ISD::SHL
, MVT::v2i64
, Expand
);
810 setOperationAction(ISD::SRA
, MVT::v2i64
, Expand
);
811 setOperationAction(ISD::SRL
, MVT::v2i64
, Expand
);
813 setOperationAction(ISD::SETCC
, MVT::v2i64
, Custom
);
815 // VSX v2i64 only supports non-arithmetic operations.
816 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
817 setOperationAction(ISD::SUB
, MVT::v2i64
, Expand
);
820 setOperationAction(ISD::LOAD
, MVT::v2i64
, Promote
);
821 AddPromotedToType (ISD::LOAD
, MVT::v2i64
, MVT::v2f64
);
822 setOperationAction(ISD::STORE
, MVT::v2i64
, Promote
);
823 AddPromotedToType (ISD::STORE
, MVT::v2i64
, MVT::v2f64
);
825 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2i64
, Legal
);
827 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Legal
);
828 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Legal
);
829 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Legal
);
830 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Legal
);
832 // Custom handling for partial vectors of integers converted to
833 // floating point. We already have optimal handling for v2i32 through
834 // the DAG combine, so those aren't necessary.
835 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i8
, Custom
);
836 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i8
, Custom
);
837 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i16
, Custom
);
838 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i16
, Custom
);
839 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i8
, Custom
);
840 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i8
, Custom
);
841 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i16
, Custom
);
842 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i16
, Custom
);
844 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
845 setOperationAction(ISD::FNEG
, MVT::v2f64
, Legal
);
846 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
847 setOperationAction(ISD::FABS
, MVT::v2f64
, Legal
);
848 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f32
, Legal
);
849 setOperationAction(ISD::FCOPYSIGN
, MVT::v2f64
, Legal
);
851 if (Subtarget
.hasDirectMove())
852 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2i64
, Custom
);
853 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2f64
, Custom
);
855 addRegisterClass(MVT::v2i64
, &PPC::VSRCRegClass
);
858 if (Subtarget
.hasP8Altivec()) {
859 addRegisterClass(MVT::v2i64
, &PPC::VRRCRegClass
);
860 addRegisterClass(MVT::v1i128
, &PPC::VRRCRegClass
);
863 if (Subtarget
.hasP9Vector()) {
864 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i32
, Custom
);
865 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
867 // 128 bit shifts can be accomplished via 3 instructions for SHL and
868 // SRL, but not for SRA because of the instructions available:
869 // VS{RL} and VS{RL}O.
870 setOperationAction(ISD::SHL
, MVT::v1i128
, Legal
);
871 setOperationAction(ISD::SRL
, MVT::v1i128
, Legal
);
872 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
874 if (EnableQuadPrecision
) {
875 addRegisterClass(MVT::f128
, &PPC::VRRCRegClass
);
876 setOperationAction(ISD::FADD
, MVT::f128
, Legal
);
877 setOperationAction(ISD::FSUB
, MVT::f128
, Legal
);
878 setOperationAction(ISD::FDIV
, MVT::f128
, Legal
);
879 setOperationAction(ISD::FMUL
, MVT::f128
, Legal
);
880 setOperationAction(ISD::FP_EXTEND
, MVT::f128
, Legal
);
881 // No extending loads to f128 on PPC.
882 for (MVT FPT
: MVT::fp_valuetypes())
883 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, FPT
, Expand
);
884 setOperationAction(ISD::FMA
, MVT::f128
, Legal
);
885 setCondCodeAction(ISD::SETULT
, MVT::f128
, Expand
);
886 setCondCodeAction(ISD::SETUGT
, MVT::f128
, Expand
);
887 setCondCodeAction(ISD::SETUEQ
, MVT::f128
, Expand
);
888 setCondCodeAction(ISD::SETOGE
, MVT::f128
, Expand
);
889 setCondCodeAction(ISD::SETOLE
, MVT::f128
, Expand
);
890 setCondCodeAction(ISD::SETONE
, MVT::f128
, Expand
);
892 setOperationAction(ISD::FTRUNC
, MVT::f128
, Legal
);
893 setOperationAction(ISD::FRINT
, MVT::f128
, Legal
);
894 setOperationAction(ISD::FFLOOR
, MVT::f128
, Legal
);
895 setOperationAction(ISD::FCEIL
, MVT::f128
, Legal
);
896 setOperationAction(ISD::FNEARBYINT
, MVT::f128
, Legal
);
897 setOperationAction(ISD::FROUND
, MVT::f128
, Legal
);
899 setOperationAction(ISD::SELECT
, MVT::f128
, Expand
);
900 setOperationAction(ISD::FP_ROUND
, MVT::f64
, Legal
);
901 setOperationAction(ISD::FP_ROUND
, MVT::f32
, Legal
);
902 setTruncStoreAction(MVT::f128
, MVT::f64
, Expand
);
903 setTruncStoreAction(MVT::f128
, MVT::f32
, Expand
);
904 setOperationAction(ISD::BITCAST
, MVT::i128
, Custom
);
905 // No implementation for these ops for PowerPC.
906 setOperationAction(ISD::FSIN
, MVT::f128
, Expand
);
907 setOperationAction(ISD::FCOS
, MVT::f128
, Expand
);
908 setOperationAction(ISD::FPOW
, MVT::f128
, Expand
);
909 setOperationAction(ISD::FPOWI
, MVT::f128
, Expand
);
910 setOperationAction(ISD::FREM
, MVT::f128
, Expand
);
912 setOperationAction(ISD::FP_EXTEND
, MVT::v2f32
, Custom
);
916 if (Subtarget
.hasP9Altivec()) {
917 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v8i16
, Custom
);
918 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v16i8
, Custom
);
922 if (Subtarget
.hasQPX()) {
923 setOperationAction(ISD::FADD
, MVT::v4f64
, Legal
);
924 setOperationAction(ISD::FSUB
, MVT::v4f64
, Legal
);
925 setOperationAction(ISD::FMUL
, MVT::v4f64
, Legal
);
926 setOperationAction(ISD::FREM
, MVT::v4f64
, Expand
);
928 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f64
, Legal
);
929 setOperationAction(ISD::FGETSIGN
, MVT::v4f64
, Expand
);
931 setOperationAction(ISD::LOAD
, MVT::v4f64
, Custom
);
932 setOperationAction(ISD::STORE
, MVT::v4f64
, Custom
);
934 setTruncStoreAction(MVT::v4f64
, MVT::v4f32
, Custom
);
935 setLoadExtAction(ISD::EXTLOAD
, MVT::v4f64
, MVT::v4f32
, Custom
);
937 if (!Subtarget
.useCRBits())
938 setOperationAction(ISD::SELECT
, MVT::v4f64
, Expand
);
939 setOperationAction(ISD::VSELECT
, MVT::v4f64
, Legal
);
941 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f64
, Legal
);
942 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f64
, Expand
);
943 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4f64
, Expand
);
944 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4f64
, Expand
);
945 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4f64
, Custom
);
946 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f64
, Legal
);
947 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f64
, Custom
);
949 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f64
, Legal
);
950 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f64
, Expand
);
952 setOperationAction(ISD::FP_ROUND
, MVT::v4f32
, Legal
);
953 setOperationAction(ISD::FP_EXTEND
, MVT::v4f64
, Legal
);
955 setOperationAction(ISD::FNEG
, MVT::v4f64
, Legal
);
956 setOperationAction(ISD::FABS
, MVT::v4f64
, Legal
);
957 setOperationAction(ISD::FSIN
, MVT::v4f64
, Expand
);
958 setOperationAction(ISD::FCOS
, MVT::v4f64
, Expand
);
959 setOperationAction(ISD::FPOW
, MVT::v4f64
, Expand
);
960 setOperationAction(ISD::FLOG
, MVT::v4f64
, Expand
);
961 setOperationAction(ISD::FLOG2
, MVT::v4f64
, Expand
);
962 setOperationAction(ISD::FLOG10
, MVT::v4f64
, Expand
);
963 setOperationAction(ISD::FEXP
, MVT::v4f64
, Expand
);
964 setOperationAction(ISD::FEXP2
, MVT::v4f64
, Expand
);
966 setOperationAction(ISD::FMINNUM
, MVT::v4f64
, Legal
);
967 setOperationAction(ISD::FMAXNUM
, MVT::v4f64
, Legal
);
969 setIndexedLoadAction(ISD::PRE_INC
, MVT::v4f64
, Legal
);
970 setIndexedStoreAction(ISD::PRE_INC
, MVT::v4f64
, Legal
);
972 addRegisterClass(MVT::v4f64
, &PPC::QFRCRegClass
);
974 setOperationAction(ISD::FADD
, MVT::v4f32
, Legal
);
975 setOperationAction(ISD::FSUB
, MVT::v4f32
, Legal
);
976 setOperationAction(ISD::FMUL
, MVT::v4f32
, Legal
);
977 setOperationAction(ISD::FREM
, MVT::v4f32
, Expand
);
979 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f32
, Legal
);
980 setOperationAction(ISD::FGETSIGN
, MVT::v4f32
, Expand
);
982 setOperationAction(ISD::LOAD
, MVT::v4f32
, Custom
);
983 setOperationAction(ISD::STORE
, MVT::v4f32
, Custom
);
985 if (!Subtarget
.useCRBits())
986 setOperationAction(ISD::SELECT
, MVT::v4f32
, Expand
);
987 setOperationAction(ISD::VSELECT
, MVT::v4f32
, Legal
);
989 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Legal
);
990 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Expand
);
991 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4f32
, Expand
);
992 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4f32
, Expand
);
993 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4f32
, Custom
);
994 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
995 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
997 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f32
, Legal
);
998 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f32
, Expand
);
1000 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
1001 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
1002 setOperationAction(ISD::FSIN
, MVT::v4f32
, Expand
);
1003 setOperationAction(ISD::FCOS
, MVT::v4f32
, Expand
);
1004 setOperationAction(ISD::FPOW
, MVT::v4f32
, Expand
);
1005 setOperationAction(ISD::FLOG
, MVT::v4f32
, Expand
);
1006 setOperationAction(ISD::FLOG2
, MVT::v4f32
, Expand
);
1007 setOperationAction(ISD::FLOG10
, MVT::v4f32
, Expand
);
1008 setOperationAction(ISD::FEXP
, MVT::v4f32
, Expand
);
1009 setOperationAction(ISD::FEXP2
, MVT::v4f32
, Expand
);
1011 setOperationAction(ISD::FMINNUM
, MVT::v4f32
, Legal
);
1012 setOperationAction(ISD::FMAXNUM
, MVT::v4f32
, Legal
);
1014 setIndexedLoadAction(ISD::PRE_INC
, MVT::v4f32
, Legal
);
1015 setIndexedStoreAction(ISD::PRE_INC
, MVT::v4f32
, Legal
);
1017 addRegisterClass(MVT::v4f32
, &PPC::QSRCRegClass
);
1019 setOperationAction(ISD::AND
, MVT::v4i1
, Legal
);
1020 setOperationAction(ISD::OR
, MVT::v4i1
, Legal
);
1021 setOperationAction(ISD::XOR
, MVT::v4i1
, Legal
);
1023 if (!Subtarget
.useCRBits())
1024 setOperationAction(ISD::SELECT
, MVT::v4i1
, Expand
);
1025 setOperationAction(ISD::VSELECT
, MVT::v4i1
, Legal
);
1027 setOperationAction(ISD::LOAD
, MVT::v4i1
, Custom
);
1028 setOperationAction(ISD::STORE
, MVT::v4i1
, Custom
);
1030 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i1
, Custom
);
1031 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i1
, Expand
);
1032 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4i1
, Expand
);
1033 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4i1
, Expand
);
1034 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i1
, Custom
);
1035 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i1
, Expand
);
1036 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i1
, Custom
);
1038 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i1
, Custom
);
1039 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i1
, Custom
);
1041 addRegisterClass(MVT::v4i1
, &PPC::QBRCRegClass
);
1043 setOperationAction(ISD::FFLOOR
, MVT::v4f64
, Legal
);
1044 setOperationAction(ISD::FCEIL
, MVT::v4f64
, Legal
);
1045 setOperationAction(ISD::FTRUNC
, MVT::v4f64
, Legal
);
1046 setOperationAction(ISD::FROUND
, MVT::v4f64
, Legal
);
1048 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
1049 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
1050 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
1051 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
1053 setOperationAction(ISD::FNEARBYINT
, MVT::v4f64
, Expand
);
1054 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Expand
);
1056 // These need to set FE_INEXACT, and so cannot be vectorized here.
1057 setOperationAction(ISD::FRINT
, MVT::v4f64
, Expand
);
1058 setOperationAction(ISD::FRINT
, MVT::v4f32
, Expand
);
1060 if (TM
.Options
.UnsafeFPMath
) {
1061 setOperationAction(ISD::FDIV
, MVT::v4f64
, Legal
);
1062 setOperationAction(ISD::FSQRT
, MVT::v4f64
, Legal
);
1064 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
1065 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
1067 setOperationAction(ISD::FDIV
, MVT::v4f64
, Expand
);
1068 setOperationAction(ISD::FSQRT
, MVT::v4f64
, Expand
);
1070 setOperationAction(ISD::FDIV
, MVT::v4f32
, Expand
);
1071 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Expand
);
1075 if (Subtarget
.has64BitSupport())
1076 setOperationAction(ISD::PREFETCH
, MVT::Other
, Legal
);
1078 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, isPPC64
? Legal
: Custom
);
1081 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i64
, Expand
);
1082 setOperationAction(ISD::ATOMIC_STORE
, MVT::i64
, Expand
);
1085 setBooleanContents(ZeroOrOneBooleanContent
);
1087 if (Subtarget
.hasAltivec()) {
1088 // Altivec instructions set fields to all zeros or all ones.
1089 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
1093 // These libcalls are not available in 32-bit.
1094 setLibcallName(RTLIB::SHL_I128
, nullptr);
1095 setLibcallName(RTLIB::SRL_I128
, nullptr);
1096 setLibcallName(RTLIB::SRA_I128
, nullptr);
1099 setStackPointerRegisterToSaveRestore(isPPC64
? PPC::X1
: PPC::R1
);
1101 // We have target-specific dag combine patterns for the following nodes:
1102 setTargetDAGCombine(ISD::ADD
);
1103 setTargetDAGCombine(ISD::SHL
);
1104 setTargetDAGCombine(ISD::SRA
);
1105 setTargetDAGCombine(ISD::SRL
);
1106 setTargetDAGCombine(ISD::MUL
);
1107 setTargetDAGCombine(ISD::SINT_TO_FP
);
1108 setTargetDAGCombine(ISD::BUILD_VECTOR
);
1109 if (Subtarget
.hasFPCVT())
1110 setTargetDAGCombine(ISD::UINT_TO_FP
);
1111 setTargetDAGCombine(ISD::LOAD
);
1112 setTargetDAGCombine(ISD::STORE
);
1113 setTargetDAGCombine(ISD::BR_CC
);
1114 if (Subtarget
.useCRBits())
1115 setTargetDAGCombine(ISD::BRCOND
);
1116 setTargetDAGCombine(ISD::BSWAP
);
1117 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN
);
1118 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN
);
1119 setTargetDAGCombine(ISD::INTRINSIC_VOID
);
1121 setTargetDAGCombine(ISD::SIGN_EXTEND
);
1122 setTargetDAGCombine(ISD::ZERO_EXTEND
);
1123 setTargetDAGCombine(ISD::ANY_EXTEND
);
1125 setTargetDAGCombine(ISD::TRUNCATE
);
1126 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
1129 if (Subtarget
.useCRBits()) {
1130 setTargetDAGCombine(ISD::TRUNCATE
);
1131 setTargetDAGCombine(ISD::SETCC
);
1132 setTargetDAGCombine(ISD::SELECT_CC
);
1135 // Use reciprocal estimates.
1136 if (TM
.Options
.UnsafeFPMath
) {
1137 setTargetDAGCombine(ISD::FDIV
);
1138 setTargetDAGCombine(ISD::FSQRT
);
1141 if (Subtarget
.hasP9Altivec()) {
1142 setTargetDAGCombine(ISD::ABS
);
1143 setTargetDAGCombine(ISD::VSELECT
);
1146 // Darwin long double math library functions have $LDBL128 appended.
1147 if (Subtarget
.isDarwin()) {
1148 setLibcallName(RTLIB::COS_PPCF128
, "cosl$LDBL128");
1149 setLibcallName(RTLIB::POW_PPCF128
, "powl$LDBL128");
1150 setLibcallName(RTLIB::REM_PPCF128
, "fmodl$LDBL128");
1151 setLibcallName(RTLIB::SIN_PPCF128
, "sinl$LDBL128");
1152 setLibcallName(RTLIB::SQRT_PPCF128
, "sqrtl$LDBL128");
1153 setLibcallName(RTLIB::LOG_PPCF128
, "logl$LDBL128");
1154 setLibcallName(RTLIB::LOG2_PPCF128
, "log2l$LDBL128");
1155 setLibcallName(RTLIB::LOG10_PPCF128
, "log10l$LDBL128");
1156 setLibcallName(RTLIB::EXP_PPCF128
, "expl$LDBL128");
1157 setLibcallName(RTLIB::EXP2_PPCF128
, "exp2l$LDBL128");
1160 if (EnableQuadPrecision
) {
1161 setLibcallName(RTLIB::LOG_F128
, "logf128");
1162 setLibcallName(RTLIB::LOG2_F128
, "log2f128");
1163 setLibcallName(RTLIB::LOG10_F128
, "log10f128");
1164 setLibcallName(RTLIB::EXP_F128
, "expf128");
1165 setLibcallName(RTLIB::EXP2_F128
, "exp2f128");
1166 setLibcallName(RTLIB::SIN_F128
, "sinf128");
1167 setLibcallName(RTLIB::COS_F128
, "cosf128");
1168 setLibcallName(RTLIB::POW_F128
, "powf128");
1169 setLibcallName(RTLIB::FMIN_F128
, "fminf128");
1170 setLibcallName(RTLIB::FMAX_F128
, "fmaxf128");
1171 setLibcallName(RTLIB::POWI_F128
, "__powikf2");
1172 setLibcallName(RTLIB::REM_F128
, "fmodf128");
1175 // With 32 condition bits, we don't need to sink (and duplicate) compares
1176 // aggressively in CodeGenPrep.
1177 if (Subtarget
.useCRBits()) {
1178 setHasMultipleConditionRegisters();
1179 setJumpIsExpensive();
1182 setMinFunctionAlignment(llvm::Align(4));
1183 if (Subtarget
.isDarwin())
1184 setPrefFunctionAlignment(llvm::Align(16));
1186 switch (Subtarget
.getDarwinDirective()) {
1191 case PPC::DIR_E500mc
:
1192 case PPC::DIR_E5500
:
1195 case PPC::DIR_PWR5X
:
1197 case PPC::DIR_PWR6X
:
1201 setPrefLoopAlignment(llvm::Align(16));
1202 setPrefFunctionAlignment(llvm::Align(16));
1206 if (Subtarget
.enableMachineScheduler())
1207 setSchedulingPreference(Sched::Source
);
1209 setSchedulingPreference(Sched::Hybrid
);
1211 computeRegisterProperties(STI
.getRegisterInfo());
1213 // The Freescale cores do better with aggressive inlining of memcpy and
1214 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1215 if (Subtarget
.getDarwinDirective() == PPC::DIR_E500mc
||
1216 Subtarget
.getDarwinDirective() == PPC::DIR_E5500
) {
1217 MaxStoresPerMemset
= 32;
1218 MaxStoresPerMemsetOptSize
= 16;
1219 MaxStoresPerMemcpy
= 32;
1220 MaxStoresPerMemcpyOptSize
= 8;
1221 MaxStoresPerMemmove
= 32;
1222 MaxStoresPerMemmoveOptSize
= 8;
1223 } else if (Subtarget
.getDarwinDirective() == PPC::DIR_A2
) {
1224 // The A2 also benefits from (very) aggressive inlining of memcpy and
1225 // friends. The overhead of a the function call, even when warm, can be
1226 // over one hundred cycles.
1227 MaxStoresPerMemset
= 128;
1228 MaxStoresPerMemcpy
= 128;
1229 MaxStoresPerMemmove
= 128;
1230 MaxLoadsPerMemcmp
= 128;
1232 MaxLoadsPerMemcmp
= 8;
1233 MaxLoadsPerMemcmpOptSize
= 4;
1237 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1238 /// the desired ByVal argument alignment.
1239 static void getMaxByValAlign(Type
*Ty
, unsigned &MaxAlign
,
1240 unsigned MaxMaxAlign
) {
1241 if (MaxAlign
== MaxMaxAlign
)
1243 if (VectorType
*VTy
= dyn_cast
<VectorType
>(Ty
)) {
1244 if (MaxMaxAlign
>= 32 && VTy
->getBitWidth() >= 256)
1246 else if (VTy
->getBitWidth() >= 128 && MaxAlign
< 16)
1248 } else if (ArrayType
*ATy
= dyn_cast
<ArrayType
>(Ty
)) {
1249 unsigned EltAlign
= 0;
1250 getMaxByValAlign(ATy
->getElementType(), EltAlign
, MaxMaxAlign
);
1251 if (EltAlign
> MaxAlign
)
1252 MaxAlign
= EltAlign
;
1253 } else if (StructType
*STy
= dyn_cast
<StructType
>(Ty
)) {
1254 for (auto *EltTy
: STy
->elements()) {
1255 unsigned EltAlign
= 0;
1256 getMaxByValAlign(EltTy
, EltAlign
, MaxMaxAlign
);
1257 if (EltAlign
> MaxAlign
)
1258 MaxAlign
= EltAlign
;
1259 if (MaxAlign
== MaxMaxAlign
)
1265 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1266 /// function arguments in the caller parameter area.
1267 unsigned PPCTargetLowering::getByValTypeAlignment(Type
*Ty
,
1268 const DataLayout
&DL
) const {
1269 // Darwin passes everything on 4 byte boundary.
1270 if (Subtarget
.isDarwin())
1273 // 16byte and wider vectors are passed on 16byte boundary.
1274 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1275 unsigned Align
= Subtarget
.isPPC64() ? 8 : 4;
1276 if (Subtarget
.hasAltivec() || Subtarget
.hasQPX())
1277 getMaxByValAlign(Ty
, Align
, Subtarget
.hasQPX() ? 32 : 16);
1281 bool PPCTargetLowering::useSoftFloat() const {
1282 return Subtarget
.useSoftFloat();
1285 bool PPCTargetLowering::hasSPE() const {
1286 return Subtarget
.hasSPE();
1289 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT
) const {
1290 return VT
.isScalarInteger();
1293 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode
) const {
1294 switch ((PPCISD::NodeType
)Opcode
) {
1295 case PPCISD::FIRST_NUMBER
: break;
1296 case PPCISD::FSEL
: return "PPCISD::FSEL";
1297 case PPCISD::FCFID
: return "PPCISD::FCFID";
1298 case PPCISD::FCFIDU
: return "PPCISD::FCFIDU";
1299 case PPCISD::FCFIDS
: return "PPCISD::FCFIDS";
1300 case PPCISD::FCFIDUS
: return "PPCISD::FCFIDUS";
1301 case PPCISD::FCTIDZ
: return "PPCISD::FCTIDZ";
1302 case PPCISD::FCTIWZ
: return "PPCISD::FCTIWZ";
1303 case PPCISD::FCTIDUZ
: return "PPCISD::FCTIDUZ";
1304 case PPCISD::FCTIWUZ
: return "PPCISD::FCTIWUZ";
1305 case PPCISD::FP_TO_UINT_IN_VSR
:
1306 return "PPCISD::FP_TO_UINT_IN_VSR,";
1307 case PPCISD::FP_TO_SINT_IN_VSR
:
1308 return "PPCISD::FP_TO_SINT_IN_VSR";
1309 case PPCISD::FRE
: return "PPCISD::FRE";
1310 case PPCISD::FRSQRTE
: return "PPCISD::FRSQRTE";
1311 case PPCISD::STFIWX
: return "PPCISD::STFIWX";
1312 case PPCISD::VMADDFP
: return "PPCISD::VMADDFP";
1313 case PPCISD::VNMSUBFP
: return "PPCISD::VNMSUBFP";
1314 case PPCISD::VPERM
: return "PPCISD::VPERM";
1315 case PPCISD::XXSPLT
: return "PPCISD::XXSPLT";
1316 case PPCISD::VECINSERT
: return "PPCISD::VECINSERT";
1317 case PPCISD::XXREVERSE
: return "PPCISD::XXREVERSE";
1318 case PPCISD::XXPERMDI
: return "PPCISD::XXPERMDI";
1319 case PPCISD::VECSHL
: return "PPCISD::VECSHL";
1320 case PPCISD::CMPB
: return "PPCISD::CMPB";
1321 case PPCISD::Hi
: return "PPCISD::Hi";
1322 case PPCISD::Lo
: return "PPCISD::Lo";
1323 case PPCISD::TOC_ENTRY
: return "PPCISD::TOC_ENTRY";
1324 case PPCISD::ATOMIC_CMP_SWAP_8
: return "PPCISD::ATOMIC_CMP_SWAP_8";
1325 case PPCISD::ATOMIC_CMP_SWAP_16
: return "PPCISD::ATOMIC_CMP_SWAP_16";
1326 case PPCISD::DYNALLOC
: return "PPCISD::DYNALLOC";
1327 case PPCISD::DYNAREAOFFSET
: return "PPCISD::DYNAREAOFFSET";
1328 case PPCISD::GlobalBaseReg
: return "PPCISD::GlobalBaseReg";
1329 case PPCISD::SRL
: return "PPCISD::SRL";
1330 case PPCISD::SRA
: return "PPCISD::SRA";
1331 case PPCISD::SHL
: return "PPCISD::SHL";
1332 case PPCISD::SRA_ADDZE
: return "PPCISD::SRA_ADDZE";
1333 case PPCISD::CALL
: return "PPCISD::CALL";
1334 case PPCISD::CALL_NOP
: return "PPCISD::CALL_NOP";
1335 case PPCISD::MTCTR
: return "PPCISD::MTCTR";
1336 case PPCISD::BCTRL
: return "PPCISD::BCTRL";
1337 case PPCISD::BCTRL_LOAD_TOC
: return "PPCISD::BCTRL_LOAD_TOC";
1338 case PPCISD::RET_FLAG
: return "PPCISD::RET_FLAG";
1339 case PPCISD::READ_TIME_BASE
: return "PPCISD::READ_TIME_BASE";
1340 case PPCISD::EH_SJLJ_SETJMP
: return "PPCISD::EH_SJLJ_SETJMP";
1341 case PPCISD::EH_SJLJ_LONGJMP
: return "PPCISD::EH_SJLJ_LONGJMP";
1342 case PPCISD::MFOCRF
: return "PPCISD::MFOCRF";
1343 case PPCISD::MFVSR
: return "PPCISD::MFVSR";
1344 case PPCISD::MTVSRA
: return "PPCISD::MTVSRA";
1345 case PPCISD::MTVSRZ
: return "PPCISD::MTVSRZ";
1346 case PPCISD::SINT_VEC_TO_FP
: return "PPCISD::SINT_VEC_TO_FP";
1347 case PPCISD::UINT_VEC_TO_FP
: return "PPCISD::UINT_VEC_TO_FP";
1348 case PPCISD::ANDIo_1_EQ_BIT
: return "PPCISD::ANDIo_1_EQ_BIT";
1349 case PPCISD::ANDIo_1_GT_BIT
: return "PPCISD::ANDIo_1_GT_BIT";
1350 case PPCISD::VCMP
: return "PPCISD::VCMP";
1351 case PPCISD::VCMPo
: return "PPCISD::VCMPo";
1352 case PPCISD::LBRX
: return "PPCISD::LBRX";
1353 case PPCISD::STBRX
: return "PPCISD::STBRX";
1354 case PPCISD::LFIWAX
: return "PPCISD::LFIWAX";
1355 case PPCISD::LFIWZX
: return "PPCISD::LFIWZX";
1356 case PPCISD::LXSIZX
: return "PPCISD::LXSIZX";
1357 case PPCISD::STXSIX
: return "PPCISD::STXSIX";
1358 case PPCISD::VEXTS
: return "PPCISD::VEXTS";
1359 case PPCISD::SExtVElems
: return "PPCISD::SExtVElems";
1360 case PPCISD::LXVD2X
: return "PPCISD::LXVD2X";
1361 case PPCISD::STXVD2X
: return "PPCISD::STXVD2X";
1362 case PPCISD::LOAD_VEC_BE
: return "PPCISD::LOAD_VEC_BE";
1363 case PPCISD::STORE_VEC_BE
: return "PPCISD::STORE_VEC_BE";
1364 case PPCISD::ST_VSR_SCAL_INT
:
1365 return "PPCISD::ST_VSR_SCAL_INT";
1366 case PPCISD::COND_BRANCH
: return "PPCISD::COND_BRANCH";
1367 case PPCISD::BDNZ
: return "PPCISD::BDNZ";
1368 case PPCISD::BDZ
: return "PPCISD::BDZ";
1369 case PPCISD::MFFS
: return "PPCISD::MFFS";
1370 case PPCISD::FADDRTZ
: return "PPCISD::FADDRTZ";
1371 case PPCISD::TC_RETURN
: return "PPCISD::TC_RETURN";
1372 case PPCISD::CR6SET
: return "PPCISD::CR6SET";
1373 case PPCISD::CR6UNSET
: return "PPCISD::CR6UNSET";
1374 case PPCISD::PPC32_GOT
: return "PPCISD::PPC32_GOT";
1375 case PPCISD::PPC32_PICGOT
: return "PPCISD::PPC32_PICGOT";
1376 case PPCISD::ADDIS_GOT_TPREL_HA
: return "PPCISD::ADDIS_GOT_TPREL_HA";
1377 case PPCISD::LD_GOT_TPREL_L
: return "PPCISD::LD_GOT_TPREL_L";
1378 case PPCISD::ADD_TLS
: return "PPCISD::ADD_TLS";
1379 case PPCISD::ADDIS_TLSGD_HA
: return "PPCISD::ADDIS_TLSGD_HA";
1380 case PPCISD::ADDI_TLSGD_L
: return "PPCISD::ADDI_TLSGD_L";
1381 case PPCISD::GET_TLS_ADDR
: return "PPCISD::GET_TLS_ADDR";
1382 case PPCISD::ADDI_TLSGD_L_ADDR
: return "PPCISD::ADDI_TLSGD_L_ADDR";
1383 case PPCISD::ADDIS_TLSLD_HA
: return "PPCISD::ADDIS_TLSLD_HA";
1384 case PPCISD::ADDI_TLSLD_L
: return "PPCISD::ADDI_TLSLD_L";
1385 case PPCISD::GET_TLSLD_ADDR
: return "PPCISD::GET_TLSLD_ADDR";
1386 case PPCISD::ADDI_TLSLD_L_ADDR
: return "PPCISD::ADDI_TLSLD_L_ADDR";
1387 case PPCISD::ADDIS_DTPREL_HA
: return "PPCISD::ADDIS_DTPREL_HA";
1388 case PPCISD::ADDI_DTPREL_L
: return "PPCISD::ADDI_DTPREL_L";
1389 case PPCISD::VADD_SPLAT
: return "PPCISD::VADD_SPLAT";
1390 case PPCISD::SC
: return "PPCISD::SC";
1391 case PPCISD::CLRBHRB
: return "PPCISD::CLRBHRB";
1392 case PPCISD::MFBHRBE
: return "PPCISD::MFBHRBE";
1393 case PPCISD::RFEBB
: return "PPCISD::RFEBB";
1394 case PPCISD::XXSWAPD
: return "PPCISD::XXSWAPD";
1395 case PPCISD::SWAP_NO_CHAIN
: return "PPCISD::SWAP_NO_CHAIN";
1396 case PPCISD::VABSD
: return "PPCISD::VABSD";
1397 case PPCISD::QVFPERM
: return "PPCISD::QVFPERM";
1398 case PPCISD::QVGPCI
: return "PPCISD::QVGPCI";
1399 case PPCISD::QVALIGNI
: return "PPCISD::QVALIGNI";
1400 case PPCISD::QVESPLATI
: return "PPCISD::QVESPLATI";
1401 case PPCISD::QBFLT
: return "PPCISD::QBFLT";
1402 case PPCISD::QVLFSb
: return "PPCISD::QVLFSb";
1403 case PPCISD::BUILD_FP128
: return "PPCISD::BUILD_FP128";
1404 case PPCISD::BUILD_SPE64
: return "PPCISD::BUILD_SPE64";
1405 case PPCISD::EXTRACT_SPE
: return "PPCISD::EXTRACT_SPE";
1406 case PPCISD::EXTSWSLI
: return "PPCISD::EXTSWSLI";
1407 case PPCISD::LD_VSX_LH
: return "PPCISD::LD_VSX_LH";
1408 case PPCISD::FP_EXTEND_LH
: return "PPCISD::FP_EXTEND_LH";
1413 EVT
PPCTargetLowering::getSetCCResultType(const DataLayout
&DL
, LLVMContext
&C
,
1416 return Subtarget
.useCRBits() ? MVT::i1
: MVT::i32
;
1418 if (Subtarget
.hasQPX())
1419 return EVT::getVectorVT(C
, MVT::i1
, VT
.getVectorNumElements());
1421 return VT
.changeVectorElementTypeToInteger();
1424 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT
) const {
1425 assert(VT
.isFloatingPoint() && "Non-floating-point FMA?");
1429 //===----------------------------------------------------------------------===//
1430 // Node matching predicates, for use by the tblgen matching code.
1431 //===----------------------------------------------------------------------===//
1433 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1434 static bool isFloatingPointZero(SDValue Op
) {
1435 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(Op
))
1436 return CFP
->getValueAPF().isZero();
1437 else if (ISD::isEXTLoad(Op
.getNode()) || ISD::isNON_EXTLoad(Op
.getNode())) {
1438 // Maybe this has already been legalized into the constant pool?
1439 if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(Op
.getOperand(1)))
1440 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CP
->getConstVal()))
1441 return CFP
->getValueAPF().isZero();
1446 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1447 /// true if Op is undef or if it matches the specified value.
1448 static bool isConstantOrUndef(int Op
, int Val
) {
1449 return Op
< 0 || Op
== Val
;
1452 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1453 /// VPKUHUM instruction.
1454 /// The ShuffleKind distinguishes between big-endian operations with
1455 /// two different inputs (0), either-endian operations with two identical
1456 /// inputs (1), and little-endian operations with two different inputs (2).
1457 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1458 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1459 SelectionDAG
&DAG
) {
1460 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1461 if (ShuffleKind
== 0) {
1464 for (unsigned i
= 0; i
!= 16; ++i
)
1465 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+1))
1467 } else if (ShuffleKind
== 2) {
1470 for (unsigned i
= 0; i
!= 16; ++i
)
1471 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2))
1473 } else if (ShuffleKind
== 1) {
1474 unsigned j
= IsLE
? 0 : 1;
1475 for (unsigned i
= 0; i
!= 8; ++i
)
1476 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1477 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
))
1483 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1484 /// VPKUWUM instruction.
1485 /// The ShuffleKind distinguishes between big-endian operations with
1486 /// two different inputs (0), either-endian operations with two identical
1487 /// inputs (1), and little-endian operations with two different inputs (2).
1488 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1489 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1490 SelectionDAG
&DAG
) {
1491 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1492 if (ShuffleKind
== 0) {
1495 for (unsigned i
= 0; i
!= 16; i
+= 2)
1496 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+2) ||
1497 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+3))
1499 } else if (ShuffleKind
== 2) {
1502 for (unsigned i
= 0; i
!= 16; i
+= 2)
1503 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1504 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1))
1506 } else if (ShuffleKind
== 1) {
1507 unsigned j
= IsLE
? 0 : 2;
1508 for (unsigned i
= 0; i
!= 8; i
+= 2)
1509 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1510 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1511 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1512 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1))
1518 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1519 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1520 /// current subtarget.
1522 /// The ShuffleKind distinguishes between big-endian operations with
1523 /// two different inputs (0), either-endian operations with two identical
1524 /// inputs (1), and little-endian operations with two different inputs (2).
1525 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1526 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1527 SelectionDAG
&DAG
) {
1528 const PPCSubtarget
& Subtarget
=
1529 static_cast<const PPCSubtarget
&>(DAG
.getSubtarget());
1530 if (!Subtarget
.hasP8Vector())
1533 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1534 if (ShuffleKind
== 0) {
1537 for (unsigned i
= 0; i
!= 16; i
+= 4)
1538 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+4) ||
1539 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+5) ||
1540 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+6) ||
1541 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+7))
1543 } else if (ShuffleKind
== 2) {
1546 for (unsigned i
= 0; i
!= 16; i
+= 4)
1547 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1548 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1) ||
1549 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+2) ||
1550 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+3))
1552 } else if (ShuffleKind
== 1) {
1553 unsigned j
= IsLE
? 0 : 4;
1554 for (unsigned i
= 0; i
!= 8; i
+= 4)
1555 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1556 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1557 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+j
+2) ||
1558 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+j
+3) ||
1559 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1560 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1) ||
1561 !isConstantOrUndef(N
->getMaskElt(i
+10), i
*2+j
+2) ||
1562 !isConstantOrUndef(N
->getMaskElt(i
+11), i
*2+j
+3))
1568 /// isVMerge - Common function, used to match vmrg* shuffles.
1570 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1571 unsigned LHSStart
, unsigned RHSStart
) {
1572 if (N
->getValueType(0) != MVT::v16i8
)
1574 assert((UnitSize
== 1 || UnitSize
== 2 || UnitSize
== 4) &&
1575 "Unsupported merge size!");
1577 for (unsigned i
= 0; i
!= 8/UnitSize
; ++i
) // Step over units
1578 for (unsigned j
= 0; j
!= UnitSize
; ++j
) { // Step over bytes within unit
1579 if (!isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+j
),
1580 LHSStart
+j
+i
*UnitSize
) ||
1581 !isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+UnitSize
+j
),
1582 RHSStart
+j
+i
*UnitSize
))
1588 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1589 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1590 /// The ShuffleKind distinguishes between big-endian merges with two
1591 /// different inputs (0), either-endian merges with two identical inputs (1),
1592 /// and little-endian merges with two different inputs (2). For the latter,
1593 /// the input operands are swapped (see PPCInstrAltivec.td).
1594 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1595 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1596 if (DAG
.getDataLayout().isLittleEndian()) {
1597 if (ShuffleKind
== 1) // unary
1598 return isVMerge(N
, UnitSize
, 0, 0);
1599 else if (ShuffleKind
== 2) // swapped
1600 return isVMerge(N
, UnitSize
, 0, 16);
1604 if (ShuffleKind
== 1) // unary
1605 return isVMerge(N
, UnitSize
, 8, 8);
1606 else if (ShuffleKind
== 0) // normal
1607 return isVMerge(N
, UnitSize
, 8, 24);
1613 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1614 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1615 /// The ShuffleKind distinguishes between big-endian merges with two
1616 /// different inputs (0), either-endian merges with two identical inputs (1),
1617 /// and little-endian merges with two different inputs (2). For the latter,
1618 /// the input operands are swapped (see PPCInstrAltivec.td).
1619 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1620 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1621 if (DAG
.getDataLayout().isLittleEndian()) {
1622 if (ShuffleKind
== 1) // unary
1623 return isVMerge(N
, UnitSize
, 8, 8);
1624 else if (ShuffleKind
== 2) // swapped
1625 return isVMerge(N
, UnitSize
, 8, 24);
1629 if (ShuffleKind
== 1) // unary
1630 return isVMerge(N
, UnitSize
, 0, 0);
1631 else if (ShuffleKind
== 0) // normal
1632 return isVMerge(N
, UnitSize
, 0, 16);
1639 * Common function used to match vmrgew and vmrgow shuffles
1641 * The indexOffset determines whether to look for even or odd words in
1642 * the shuffle mask. This is based on the of the endianness of the target
1645 * - Use offset of 0 to check for odd elements
1646 * - Use offset of 4 to check for even elements
1648 * - Use offset of 0 to check for even elements
1649 * - Use offset of 4 to check for odd elements
1650 * A detailed description of the vector element ordering for little endian and
1651 * big endian can be found at
1652 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1653 * Targeting your applications - what little endian and big endian IBM XL C/C++
1654 * compiler differences mean to you
1656 * The mask to the shuffle vector instruction specifies the indices of the
1657 * elements from the two input vectors to place in the result. The elements are
1658 * numbered in array-access order, starting with the first vector. These vectors
1659 * are always of type v16i8, thus each vector will contain 16 elements of size
1660 * 8. More info on the shuffle vector can be found in the
1661 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1662 * Language Reference.
1664 * The RHSStartValue indicates whether the same input vectors are used (unary)
1665 * or two different input vectors are used, based on the following:
1666 * - If the instruction uses the same vector for both inputs, the range of the
1667 * indices will be 0 to 15. In this case, the RHSStart value passed should
1669 * - If the instruction has two different vectors then the range of the
1670 * indices will be 0 to 31. In this case, the RHSStart value passed should
1671 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1672 * to 31 specify elements in the second vector).
1674 * \param[in] N The shuffle vector SD Node to analyze
1675 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1676 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1677 * vector to the shuffle_vector instruction
1678 * \return true iff this shuffle vector represents an even or odd word merge
1680 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned IndexOffset
,
1681 unsigned RHSStartValue
) {
1682 if (N
->getValueType(0) != MVT::v16i8
)
1685 for (unsigned i
= 0; i
< 2; ++i
)
1686 for (unsigned j
= 0; j
< 4; ++j
)
1687 if (!isConstantOrUndef(N
->getMaskElt(i
*4+j
),
1688 i
*RHSStartValue
+j
+IndexOffset
) ||
1689 !isConstantOrUndef(N
->getMaskElt(i
*4+j
+8),
1690 i
*RHSStartValue
+j
+IndexOffset
+8))
1696 * Determine if the specified shuffle mask is suitable for the vmrgew or
1697 * vmrgow instructions.
1699 * \param[in] N The shuffle vector SD Node to analyze
1700 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1701 * \param[in] ShuffleKind Identify the type of merge:
1702 * - 0 = big-endian merge with two different inputs;
1703 * - 1 = either-endian merge with two identical inputs;
1704 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1705 * little-endian merges).
1706 * \param[in] DAG The current SelectionDAG
1707 * \return true iff this shuffle mask
1709 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode
*N
, bool CheckEven
,
1710 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1711 if (DAG
.getDataLayout().isLittleEndian()) {
1712 unsigned indexOffset
= CheckEven
? 4 : 0;
1713 if (ShuffleKind
== 1) // Unary
1714 return isVMerge(N
, indexOffset
, 0);
1715 else if (ShuffleKind
== 2) // swapped
1716 return isVMerge(N
, indexOffset
, 16);
1721 unsigned indexOffset
= CheckEven
? 0 : 4;
1722 if (ShuffleKind
== 1) // Unary
1723 return isVMerge(N
, indexOffset
, 0);
1724 else if (ShuffleKind
== 0) // Normal
1725 return isVMerge(N
, indexOffset
, 16);
1732 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1733 /// amount, otherwise return -1.
1734 /// The ShuffleKind distinguishes between big-endian operations with two
1735 /// different inputs (0), either-endian operations with two identical inputs
1736 /// (1), and little-endian operations with two different inputs (2). For the
1737 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1738 int PPC::isVSLDOIShuffleMask(SDNode
*N
, unsigned ShuffleKind
,
1739 SelectionDAG
&DAG
) {
1740 if (N
->getValueType(0) != MVT::v16i8
)
1743 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
1745 // Find the first non-undef value in the shuffle mask.
1747 for (i
= 0; i
!= 16 && SVOp
->getMaskElt(i
) < 0; ++i
)
1750 if (i
== 16) return -1; // all undef.
1752 // Otherwise, check to see if the rest of the elements are consecutively
1753 // numbered from this value.
1754 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
1755 if (ShiftAmt
< i
) return -1;
1758 bool isLE
= DAG
.getDataLayout().isLittleEndian();
1760 if ((ShuffleKind
== 0 && !isLE
) || (ShuffleKind
== 2 && isLE
)) {
1761 // Check the rest of the elements to see if they are consecutive.
1762 for (++i
; i
!= 16; ++i
)
1763 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
1765 } else if (ShuffleKind
== 1) {
1766 // Check the rest of the elements to see if they are consecutive.
1767 for (++i
; i
!= 16; ++i
)
1768 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), (ShiftAmt
+i
) & 15))
1774 ShiftAmt
= 16 - ShiftAmt
;
1779 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1780 /// specifies a splat of a single element that is suitable for input to
1781 /// VSPLTB/VSPLTH/VSPLTW.
1782 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode
*N
, unsigned EltSize
) {
1783 assert(N
->getValueType(0) == MVT::v16i8
&&
1784 (EltSize
== 1 || EltSize
== 2 || EltSize
== 4));
1786 // The consecutive indices need to specify an element, not part of two
1787 // different elements. So abandon ship early if this isn't the case.
1788 if (N
->getMaskElt(0) % EltSize
!= 0)
1791 // This is a splat operation if each element of the permute is the same, and
1792 // if the value doesn't reference the second vector.
1793 unsigned ElementBase
= N
->getMaskElt(0);
1795 // FIXME: Handle UNDEF elements too!
1796 if (ElementBase
>= 16)
1799 // Check that the indices are consecutive, in the case of a multi-byte element
1800 // splatted with a v16i8 mask.
1801 for (unsigned i
= 1; i
!= EltSize
; ++i
)
1802 if (N
->getMaskElt(i
) < 0 || N
->getMaskElt(i
) != (int)(i
+ElementBase
))
1805 for (unsigned i
= EltSize
, e
= 16; i
!= e
; i
+= EltSize
) {
1806 if (N
->getMaskElt(i
) < 0) continue;
1807 for (unsigned j
= 0; j
!= EltSize
; ++j
)
1808 if (N
->getMaskElt(i
+j
) != N
->getMaskElt(j
))
1814 /// Check that the mask is shuffling N byte elements. Within each N byte
1815 /// element of the mask, the indices could be either in increasing or
1816 /// decreasing order as long as they are consecutive.
1817 /// \param[in] N the shuffle vector SD Node to analyze
1818 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1819 /// Word/DoubleWord/QuadWord).
1820 /// \param[in] StepLen the delta indices number among the N byte element, if
1821 /// the mask is in increasing/decreasing order then it is 1/-1.
1822 /// \return true iff the mask is shuffling N byte elements.
1823 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*N
, unsigned Width
,
1825 assert((Width
== 2 || Width
== 4 || Width
== 8 || Width
== 16) &&
1826 "Unexpected element width.");
1827 assert((StepLen
== 1 || StepLen
== -1) && "Unexpected element width.");
1829 unsigned NumOfElem
= 16 / Width
;
1830 unsigned MaskVal
[16]; // Width is never greater than 16
1831 for (unsigned i
= 0; i
< NumOfElem
; ++i
) {
1832 MaskVal
[0] = N
->getMaskElt(i
* Width
);
1833 if ((StepLen
== 1) && (MaskVal
[0] % Width
)) {
1835 } else if ((StepLen
== -1) && ((MaskVal
[0] + 1) % Width
)) {
1839 for (unsigned int j
= 1; j
< Width
; ++j
) {
1840 MaskVal
[j
] = N
->getMaskElt(i
* Width
+ j
);
1841 if (MaskVal
[j
] != MaskVal
[j
-1] + StepLen
) {
1850 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
1851 unsigned &InsertAtByte
, bool &Swap
, bool IsLE
) {
1852 if (!isNByteElemShuffleMask(N
, 4, 1))
1855 // Now we look at mask elements 0,4,8,12
1856 unsigned M0
= N
->getMaskElt(0) / 4;
1857 unsigned M1
= N
->getMaskElt(4) / 4;
1858 unsigned M2
= N
->getMaskElt(8) / 4;
1859 unsigned M3
= N
->getMaskElt(12) / 4;
1860 unsigned LittleEndianShifts
[] = { 2, 1, 0, 3 };
1861 unsigned BigEndianShifts
[] = { 3, 0, 1, 2 };
1863 // Below, let H and L be arbitrary elements of the shuffle mask
1864 // where H is in the range [4,7] and L is in the range [0,3].
1865 // H, 1, 2, 3 or L, 5, 6, 7
1866 if ((M0
> 3 && M1
== 1 && M2
== 2 && M3
== 3) ||
1867 (M0
< 4 && M1
== 5 && M2
== 6 && M3
== 7)) {
1868 ShiftElts
= IsLE
? LittleEndianShifts
[M0
& 0x3] : BigEndianShifts
[M0
& 0x3];
1869 InsertAtByte
= IsLE
? 12 : 0;
1873 // 0, H, 2, 3 or 4, L, 6, 7
1874 if ((M1
> 3 && M0
== 0 && M2
== 2 && M3
== 3) ||
1875 (M1
< 4 && M0
== 4 && M2
== 6 && M3
== 7)) {
1876 ShiftElts
= IsLE
? LittleEndianShifts
[M1
& 0x3] : BigEndianShifts
[M1
& 0x3];
1877 InsertAtByte
= IsLE
? 8 : 4;
1881 // 0, 1, H, 3 or 4, 5, L, 7
1882 if ((M2
> 3 && M0
== 0 && M1
== 1 && M3
== 3) ||
1883 (M2
< 4 && M0
== 4 && M1
== 5 && M3
== 7)) {
1884 ShiftElts
= IsLE
? LittleEndianShifts
[M2
& 0x3] : BigEndianShifts
[M2
& 0x3];
1885 InsertAtByte
= IsLE
? 4 : 8;
1889 // 0, 1, 2, H or 4, 5, 6, L
1890 if ((M3
> 3 && M0
== 0 && M1
== 1 && M2
== 2) ||
1891 (M3
< 4 && M0
== 4 && M1
== 5 && M2
== 6)) {
1892 ShiftElts
= IsLE
? LittleEndianShifts
[M3
& 0x3] : BigEndianShifts
[M3
& 0x3];
1893 InsertAtByte
= IsLE
? 0 : 12;
1898 // If both vector operands for the shuffle are the same vector, the mask will
1899 // contain only elements from the first one and the second one will be undef.
1900 if (N
->getOperand(1).isUndef()) {
1903 unsigned XXINSERTWSrcElem
= IsLE
? 2 : 1;
1904 if (M0
== XXINSERTWSrcElem
&& M1
== 1 && M2
== 2 && M3
== 3) {
1905 InsertAtByte
= IsLE
? 12 : 0;
1908 if (M0
== 0 && M1
== XXINSERTWSrcElem
&& M2
== 2 && M3
== 3) {
1909 InsertAtByte
= IsLE
? 8 : 4;
1912 if (M0
== 0 && M1
== 1 && M2
== XXINSERTWSrcElem
&& M3
== 3) {
1913 InsertAtByte
= IsLE
? 4 : 8;
1916 if (M0
== 0 && M1
== 1 && M2
== 2 && M3
== XXINSERTWSrcElem
) {
1917 InsertAtByte
= IsLE
? 0 : 12;
1925 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
1926 bool &Swap
, bool IsLE
) {
1927 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
1928 // Ensure each byte index of the word is consecutive.
1929 if (!isNByteElemShuffleMask(N
, 4, 1))
1932 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1933 unsigned M0
= N
->getMaskElt(0) / 4;
1934 unsigned M1
= N
->getMaskElt(4) / 4;
1935 unsigned M2
= N
->getMaskElt(8) / 4;
1936 unsigned M3
= N
->getMaskElt(12) / 4;
1938 // If both vector operands for the shuffle are the same vector, the mask will
1939 // contain only elements from the first one and the second one will be undef.
1940 if (N
->getOperand(1).isUndef()) {
1941 assert(M0
< 4 && "Indexing into an undef vector?");
1942 if (M1
!= (M0
+ 1) % 4 || M2
!= (M1
+ 1) % 4 || M3
!= (M2
+ 1) % 4)
1945 ShiftElts
= IsLE
? (4 - M0
) % 4 : M0
;
1950 // Ensure each word index of the ShuffleVector Mask is consecutive.
1951 if (M1
!= (M0
+ 1) % 8 || M2
!= (M1
+ 1) % 8 || M3
!= (M2
+ 1) % 8)
1955 if (M0
== 0 || M0
== 7 || M0
== 6 || M0
== 5) {
1956 // Input vectors don't need to be swapped if the leading element
1957 // of the result is one of the 3 left elements of the second vector
1958 // (or if there is no shift to be done at all).
1960 ShiftElts
= (8 - M0
) % 8;
1961 } else if (M0
== 4 || M0
== 3 || M0
== 2 || M0
== 1) {
1962 // Input vectors need to be swapped if the leading element
1963 // of the result is one of the 3 left elements of the first vector
1964 // (or if we're shifting by 4 - thereby simply swapping the vectors).
1966 ShiftElts
= (4 - M0
) % 4;
1971 if (M0
== 0 || M0
== 1 || M0
== 2 || M0
== 3) {
1972 // Input vectors don't need to be swapped if the leading element
1973 // of the result is one of the 4 elements of the first vector.
1976 } else if (M0
== 4 || M0
== 5 || M0
== 6 || M0
== 7) {
1977 // Input vectors need to be swapped if the leading element
1978 // of the result is one of the 4 elements of the right vector.
1987 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode
*N
, int Width
) {
1988 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
1990 if (!isNByteElemShuffleMask(N
, Width
, -1))
1993 for (int i
= 0; i
< 16; i
+= Width
)
1994 if (N
->getMaskElt(i
) != i
+ Width
- 1)
2000 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode
*N
) {
2001 return isXXBRShuffleMaskHelper(N
, 2);
2004 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode
*N
) {
2005 return isXXBRShuffleMaskHelper(N
, 4);
2008 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode
*N
) {
2009 return isXXBRShuffleMaskHelper(N
, 8);
2012 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode
*N
) {
2013 return isXXBRShuffleMaskHelper(N
, 16);
2016 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2017 /// if the inputs to the instruction should be swapped and set \p DM to the
2018 /// value for the immediate.
2019 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2020 /// AND element 0 of the result comes from the first input (LE) or second input
2021 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2022 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2024 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &DM
,
2025 bool &Swap
, bool IsLE
) {
2026 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
2028 // Ensure each byte index of the double word is consecutive.
2029 if (!isNByteElemShuffleMask(N
, 8, 1))
2032 unsigned M0
= N
->getMaskElt(0) / 8;
2033 unsigned M1
= N
->getMaskElt(8) / 8;
2034 assert(((M0
| M1
) < 4) && "A mask element out of bounds?");
2036 // If both vector operands for the shuffle are the same vector, the mask will
2037 // contain only elements from the first one and the second one will be undef.
2038 if (N
->getOperand(1).isUndef()) {
2039 if ((M0
| M1
) < 2) {
2040 DM
= IsLE
? (((~M1
) & 1) << 1) + ((~M0
) & 1) : (M0
<< 1) + (M1
& 1);
2048 if (M0
> 1 && M1
< 2) {
2050 } else if (M0
< 2 && M1
> 1) {
2057 // Note: if control flow comes here that means Swap is already set above
2058 DM
= (((~M1
) & 1) << 1) + ((~M0
) & 1);
2061 if (M0
< 2 && M1
> 1) {
2063 } else if (M0
> 1 && M1
< 2) {
2070 // Note: if control flow comes here that means Swap is already set above
2071 DM
= (M0
<< 1) + (M1
& 1);
2077 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2078 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2079 unsigned PPC::getVSPLTImmediate(SDNode
*N
, unsigned EltSize
,
2080 SelectionDAG
&DAG
) {
2081 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2082 assert(isSplatShuffleMask(SVOp
, EltSize
));
2083 if (DAG
.getDataLayout().isLittleEndian())
2084 return (16 / EltSize
) - 1 - (SVOp
->getMaskElt(0) / EltSize
);
2086 return SVOp
->getMaskElt(0) / EltSize
;
2089 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2090 /// by using a vspltis[bhw] instruction of the specified element size, return
2091 /// the constant being splatted. The ByteSize field indicates the number of
2092 /// bytes of each element [124] -> [bhw].
2093 SDValue
PPC::get_VSPLTI_elt(SDNode
*N
, unsigned ByteSize
, SelectionDAG
&DAG
) {
2094 SDValue
OpVal(nullptr, 0);
2096 // If ByteSize of the splat is bigger than the element size of the
2097 // build_vector, then we have a case where we are checking for a splat where
2098 // multiple elements of the buildvector are folded together into a single
2099 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2100 unsigned EltSize
= 16/N
->getNumOperands();
2101 if (EltSize
< ByteSize
) {
2102 unsigned Multiple
= ByteSize
/EltSize
; // Number of BV entries per spltval.
2103 SDValue UniquedVals
[4];
2104 assert(Multiple
> 1 && Multiple
<= 4 && "How can this happen?");
2106 // See if all of the elements in the buildvector agree across.
2107 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2108 if (N
->getOperand(i
).isUndef()) continue;
2109 // If the element isn't a constant, bail fully out.
2110 if (!isa
<ConstantSDNode
>(N
->getOperand(i
))) return SDValue();
2112 if (!UniquedVals
[i
&(Multiple
-1)].getNode())
2113 UniquedVals
[i
&(Multiple
-1)] = N
->getOperand(i
);
2114 else if (UniquedVals
[i
&(Multiple
-1)] != N
->getOperand(i
))
2115 return SDValue(); // no match.
2118 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2119 // either constant or undef values that are identical for each chunk. See
2120 // if these chunks can form into a larger vspltis*.
2122 // Check to see if all of the leading entries are either 0 or -1. If
2123 // neither, then this won't fit into the immediate field.
2124 bool LeadingZero
= true;
2125 bool LeadingOnes
= true;
2126 for (unsigned i
= 0; i
!= Multiple
-1; ++i
) {
2127 if (!UniquedVals
[i
].getNode()) continue; // Must have been undefs.
2129 LeadingZero
&= isNullConstant(UniquedVals
[i
]);
2130 LeadingOnes
&= isAllOnesConstant(UniquedVals
[i
]);
2132 // Finally, check the least significant entry.
2134 if (!UniquedVals
[Multiple
-1].getNode())
2135 return DAG
.getTargetConstant(0, SDLoc(N
), MVT::i32
); // 0,0,0,undef
2136 int Val
= cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getZExtValue();
2137 if (Val
< 16) // 0,0,0,4 -> vspltisw(4)
2138 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2141 if (!UniquedVals
[Multiple
-1].getNode())
2142 return DAG
.getTargetConstant(~0U, SDLoc(N
), MVT::i32
); // -1,-1,-1,undef
2143 int Val
=cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getSExtValue();
2144 if (Val
>= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2145 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2151 // Check to see if this buildvec has a single non-undef value in its elements.
2152 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2153 if (N
->getOperand(i
).isUndef()) continue;
2154 if (!OpVal
.getNode())
2155 OpVal
= N
->getOperand(i
);
2156 else if (OpVal
!= N
->getOperand(i
))
2160 if (!OpVal
.getNode()) return SDValue(); // All UNDEF: use implicit def.
2162 unsigned ValSizeInBytes
= EltSize
;
2164 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(OpVal
)) {
2165 Value
= CN
->getZExtValue();
2166 } else if (ConstantFPSDNode
*CN
= dyn_cast
<ConstantFPSDNode
>(OpVal
)) {
2167 assert(CN
->getValueType(0) == MVT::f32
&& "Only one legal FP vector type!");
2168 Value
= FloatToBits(CN
->getValueAPF().convertToFloat());
2171 // If the splat value is larger than the element value, then we can never do
2172 // this splat. The only case that we could fit the replicated bits into our
2173 // immediate field for would be zero, and we prefer to use vxor for it.
2174 if (ValSizeInBytes
< ByteSize
) return SDValue();
2176 // If the element value is larger than the splat value, check if it consists
2177 // of a repeated bit pattern of size ByteSize.
2178 if (!APInt(ValSizeInBytes
* 8, Value
).isSplat(ByteSize
* 8))
2181 // Properly sign extend the value.
2182 int MaskVal
= SignExtend32(Value
, ByteSize
* 8);
2184 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2185 if (MaskVal
== 0) return SDValue();
2187 // Finally, if this value fits in a 5 bit sext field, return it
2188 if (SignExtend32
<5>(MaskVal
) == MaskVal
)
2189 return DAG
.getTargetConstant(MaskVal
, SDLoc(N
), MVT::i32
);
2193 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2194 /// amount, otherwise return -1.
2195 int PPC::isQVALIGNIShuffleMask(SDNode
*N
) {
2196 EVT VT
= N
->getValueType(0);
2197 if (VT
!= MVT::v4f64
&& VT
!= MVT::v4f32
&& VT
!= MVT::v4i1
)
2200 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2202 // Find the first non-undef value in the shuffle mask.
2204 for (i
= 0; i
!= 4 && SVOp
->getMaskElt(i
) < 0; ++i
)
2207 if (i
== 4) return -1; // all undef.
2209 // Otherwise, check to see if the rest of the elements are consecutively
2210 // numbered from this value.
2211 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
2212 if (ShiftAmt
< i
) return -1;
2215 // Check the rest of the elements to see if they are consecutive.
2216 for (++i
; i
!= 4; ++i
)
2217 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
2223 //===----------------------------------------------------------------------===//
2224 // Addressing Mode Selection
2225 //===----------------------------------------------------------------------===//
2227 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2228 /// or 64-bit immediate, and if the value can be accurately represented as a
2229 /// sign extension from a 16-bit value. If so, this returns true and the
2231 bool llvm::isIntS16Immediate(SDNode
*N
, int16_t &Imm
) {
2232 if (!isa
<ConstantSDNode
>(N
))
2235 Imm
= (int16_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2236 if (N
->getValueType(0) == MVT::i32
)
2237 return Imm
== (int32_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2239 return Imm
== (int64_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2241 bool llvm::isIntS16Immediate(SDValue Op
, int16_t &Imm
) {
2242 return isIntS16Immediate(Op
.getNode(), Imm
);
2246 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2247 /// be represented as an indexed [r+r] operation.
2248 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N
, SDValue
&Base
,
2250 SelectionDAG
&DAG
) const {
2251 for (SDNode::use_iterator UI
= N
->use_begin(), E
= N
->use_end();
2253 if (MemSDNode
*Memop
= dyn_cast
<MemSDNode
>(*UI
)) {
2254 if (Memop
->getMemoryVT() == MVT::f64
) {
2255 Base
= N
.getOperand(0);
2256 Index
= N
.getOperand(1);
2264 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2265 /// can be represented as an indexed [r+r] operation. Returns false if it
2266 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2267 /// non-zero and N can be represented by a base register plus a signed 16-bit
2268 /// displacement, make a more precise judgement by checking (displacement % \p
2269 /// EncodingAlignment).
2270 bool PPCTargetLowering::SelectAddressRegReg(SDValue N
, SDValue
&Base
,
2271 SDValue
&Index
, SelectionDAG
&DAG
,
2272 unsigned EncodingAlignment
) const {
2274 if (N
.getOpcode() == ISD::ADD
) {
2275 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2276 // SPE load/store can only handle 8-bit offsets.
2277 if (hasSPE() && SelectAddressEVXRegReg(N
, Base
, Index
, DAG
))
2279 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2280 (!EncodingAlignment
|| !(imm
% EncodingAlignment
)))
2281 return false; // r+i
2282 if (N
.getOperand(1).getOpcode() == PPCISD::Lo
)
2283 return false; // r+i
2285 Base
= N
.getOperand(0);
2286 Index
= N
.getOperand(1);
2288 } else if (N
.getOpcode() == ISD::OR
) {
2289 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2290 (!EncodingAlignment
|| !(imm
% EncodingAlignment
)))
2291 return false; // r+i can fold it if we can.
2293 // If this is an or of disjoint bitfields, we can codegen this as an add
2294 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2296 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2298 if (LHSKnown
.Zero
.getBoolValue()) {
2299 KnownBits RHSKnown
= DAG
.computeKnownBits(N
.getOperand(1));
2300 // If all of the bits are known zero on the LHS or RHS, the add won't
2302 if (~(LHSKnown
.Zero
| RHSKnown
.Zero
) == 0) {
2303 Base
= N
.getOperand(0);
2304 Index
= N
.getOperand(1);
2313 // If we happen to be doing an i64 load or store into a stack slot that has
2314 // less than a 4-byte alignment, then the frame-index elimination may need to
2315 // use an indexed load or store instruction (because the offset may not be a
2316 // multiple of 4). The extra register needed to hold the offset comes from the
2317 // register scavenger, and it is possible that the scavenger will need to use
2318 // an emergency spill slot. As a result, we need to make sure that a spill slot
2319 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2321 static void fixupFuncForFI(SelectionDAG
&DAG
, int FrameIdx
, EVT VT
) {
2322 // FIXME: This does not handle the LWA case.
2326 // NOTE: We'll exclude negative FIs here, which come from argument
2327 // lowering, because there are no known test cases triggering this problem
2328 // using packed structures (or similar). We can remove this exclusion if
2329 // we find such a test case. The reason why this is so test-case driven is
2330 // because this entire 'fixup' is only to prevent crashes (from the
2331 // register scavenger) on not-really-valid inputs. For example, if we have:
2333 // %b = bitcast i1* %a to i64*
2334 // store i64* a, i64 b
2335 // then the store should really be marked as 'align 1', but is not. If it
2336 // were marked as 'align 1' then the indexed form would have been
2337 // instruction-selected initially, and the problem this 'fixup' is preventing
2338 // won't happen regardless.
2342 MachineFunction
&MF
= DAG
.getMachineFunction();
2343 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2345 unsigned Align
= MFI
.getObjectAlignment(FrameIdx
);
2349 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
2350 FuncInfo
->setHasNonRISpills();
2353 /// Returns true if the address N can be represented by a base register plus
2354 /// a signed 16-bit displacement [r+imm], and if it is not better
2355 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2356 /// displacements that are multiples of that value.
2357 bool PPCTargetLowering::SelectAddressRegImm(SDValue N
, SDValue
&Disp
,
2360 unsigned EncodingAlignment
) const {
2361 // FIXME dl should come from parent load or store, not from address
2363 // If this can be more profitably realized as r+r, fail.
2364 if (SelectAddressRegReg(N
, Disp
, Base
, DAG
, EncodingAlignment
))
2367 if (N
.getOpcode() == ISD::ADD
) {
2369 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2370 (!EncodingAlignment
|| (imm
% EncodingAlignment
) == 0)) {
2371 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2372 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2373 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2374 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2376 Base
= N
.getOperand(0);
2378 return true; // [r+i]
2379 } else if (N
.getOperand(1).getOpcode() == PPCISD::Lo
) {
2380 // Match LOAD (ADD (X, Lo(G))).
2381 assert(!cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))->getZExtValue()
2382 && "Cannot handle constant offsets yet!");
2383 Disp
= N
.getOperand(1).getOperand(0); // The global address.
2384 assert(Disp
.getOpcode() == ISD::TargetGlobalAddress
||
2385 Disp
.getOpcode() == ISD::TargetGlobalTLSAddress
||
2386 Disp
.getOpcode() == ISD::TargetConstantPool
||
2387 Disp
.getOpcode() == ISD::TargetJumpTable
);
2388 Base
= N
.getOperand(0);
2389 return true; // [&g+r]
2391 } else if (N
.getOpcode() == ISD::OR
) {
2393 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2394 (!EncodingAlignment
|| (imm
% EncodingAlignment
) == 0)) {
2395 // If this is an or of disjoint bitfields, we can codegen this as an add
2396 // (for better address arithmetic) if the LHS and RHS of the OR are
2397 // provably disjoint.
2398 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2400 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)imm
) == ~0ULL) {
2401 // If all of the bits are known zero on the LHS or RHS, the add won't
2403 if (FrameIndexSDNode
*FI
=
2404 dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2405 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2406 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2408 Base
= N
.getOperand(0);
2410 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2414 } else if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
)) {
2415 // Loading from a constant address.
2417 // If this address fits entirely in a 16-bit sext immediate field, codegen
2420 if (isIntS16Immediate(CN
, Imm
) &&
2421 (!EncodingAlignment
|| (Imm
% EncodingAlignment
) == 0)) {
2422 Disp
= DAG
.getTargetConstant(Imm
, dl
, CN
->getValueType(0));
2423 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2424 CN
->getValueType(0));
2428 // Handle 32-bit sext immediates with LIS + addr mode.
2429 if ((CN
->getValueType(0) == MVT::i32
||
2430 (int64_t)CN
->getZExtValue() == (int)CN
->getZExtValue()) &&
2431 (!EncodingAlignment
|| (CN
->getZExtValue() % EncodingAlignment
) == 0)) {
2432 int Addr
= (int)CN
->getZExtValue();
2434 // Otherwise, break this down into an LIS + disp.
2435 Disp
= DAG
.getTargetConstant((short)Addr
, dl
, MVT::i32
);
2437 Base
= DAG
.getTargetConstant((Addr
- (signed short)Addr
) >> 16, dl
,
2439 unsigned Opc
= CN
->getValueType(0) == MVT::i32
? PPC::LIS
: PPC::LIS8
;
2440 Base
= SDValue(DAG
.getMachineNode(Opc
, dl
, CN
->getValueType(0), Base
), 0);
2445 Disp
= DAG
.getTargetConstant(0, dl
, getPointerTy(DAG
.getDataLayout()));
2446 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
)) {
2447 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2448 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2451 return true; // [r+0]
2454 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2455 /// represented as an indexed [r+r] operation.
2456 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N
, SDValue
&Base
,
2458 SelectionDAG
&DAG
) const {
2459 // Check to see if we can easily represent this as an [r+r] address. This
2460 // will fail if it thinks that the address is more profitably represented as
2461 // reg+imm, e.g. where imm = 0.
2462 if (SelectAddressRegReg(N
, Base
, Index
, DAG
))
2465 // If the address is the result of an add, we will utilize the fact that the
2466 // address calculation includes an implicit add. However, we can reduce
2467 // register pressure if we do not materialize a constant just for use as the
2468 // index register. We only get rid of the add if it is not an add of a
2469 // value and a 16-bit signed constant and both have a single use.
2471 if (N
.getOpcode() == ISD::ADD
&&
2472 (!isIntS16Immediate(N
.getOperand(1), imm
) ||
2473 !N
.getOperand(1).hasOneUse() || !N
.getOperand(0).hasOneUse())) {
2474 Base
= N
.getOperand(0);
2475 Index
= N
.getOperand(1);
2479 // Otherwise, do it the hard way, using R0 as the base register.
2480 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2486 /// Returns true if we should use a direct load into vector instruction
2487 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2488 static bool usePartialVectorLoads(SDNode
*N
, const PPCSubtarget
& ST
) {
2490 // If there are any other uses other than scalar to vector, then we should
2491 // keep it as a scalar load -> direct move pattern to prevent multiple
2493 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
);
2497 EVT MemVT
= LD
->getMemoryVT();
2498 if (!MemVT
.isSimple())
2500 switch(MemVT
.getSimpleVT().SimpleTy
) {
2504 if (!ST
.hasP8Vector())
2509 if (!ST
.hasP9Vector())
2516 SDValue
LoadedVal(N
, 0);
2517 if (!LoadedVal
.hasOneUse())
2520 for (SDNode::use_iterator UI
= LD
->use_begin(), UE
= LD
->use_end();
2522 if (UI
.getUse().get().getResNo() == 0 &&
2523 UI
->getOpcode() != ISD::SCALAR_TO_VECTOR
)
2529 /// getPreIndexedAddressParts - returns true by value, base pointer and
2530 /// offset pointer and addressing mode by reference if the node's address
2531 /// can be legally represented as pre-indexed load / store address.
2532 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
2534 ISD::MemIndexedMode
&AM
,
2535 SelectionDAG
&DAG
) const {
2536 if (DisablePPCPreinc
) return false;
2542 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
2543 Ptr
= LD
->getBasePtr();
2544 VT
= LD
->getMemoryVT();
2545 Alignment
= LD
->getAlignment();
2546 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
2547 Ptr
= ST
->getBasePtr();
2548 VT
= ST
->getMemoryVT();
2549 Alignment
= ST
->getAlignment();
2554 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2555 // instructions because we can fold these into a more efficient instruction
2556 // instead, (such as LXSD).
2557 if (isLoad
&& usePartialVectorLoads(N
, Subtarget
)) {
2561 // PowerPC doesn't have preinc load/store instructions for vectors (except
2562 // for QPX, which does have preinc r+r forms).
2563 if (VT
.isVector()) {
2564 if (!Subtarget
.hasQPX() || (VT
!= MVT::v4f64
&& VT
!= MVT::v4f32
)) {
2566 } else if (SelectAddressRegRegOnly(Ptr
, Offset
, Base
, DAG
)) {
2572 if (SelectAddressRegReg(Ptr
, Base
, Offset
, DAG
)) {
2573 // Common code will reject creating a pre-inc form if the base pointer
2574 // is a frame index, or if N is a store and the base pointer is either
2575 // the same as or a predecessor of the value being stored. Check for
2576 // those situations here, and try with swapped Base/Offset instead.
2579 if (isa
<FrameIndexSDNode
>(Base
) || isa
<RegisterSDNode
>(Base
))
2582 SDValue Val
= cast
<StoreSDNode
>(N
)->getValue();
2583 if (Val
== Base
|| Base
.getNode()->isPredecessorOf(Val
.getNode()))
2588 std::swap(Base
, Offset
);
2594 // LDU/STU can only handle immediates that are a multiple of 4.
2595 if (VT
!= MVT::i64
) {
2596 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, 0))
2599 // LDU/STU need an address with at least 4-byte alignment.
2603 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, 4))
2607 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
2608 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2609 // sext i32 to i64 when addr mode is r+i.
2610 if (LD
->getValueType(0) == MVT::i64
&& LD
->getMemoryVT() == MVT::i32
&&
2611 LD
->getExtensionType() == ISD::SEXTLOAD
&&
2612 isa
<ConstantSDNode
>(Offset
))
2620 //===----------------------------------------------------------------------===//
2621 // LowerOperation implementation
2622 //===----------------------------------------------------------------------===//
2624 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2625 /// and LoOpFlags to the target MO flags.
2626 static void getLabelAccessInfo(bool IsPIC
, const PPCSubtarget
&Subtarget
,
2627 unsigned &HiOpFlags
, unsigned &LoOpFlags
,
2628 const GlobalValue
*GV
= nullptr) {
2629 HiOpFlags
= PPCII::MO_HA
;
2630 LoOpFlags
= PPCII::MO_LO
;
2632 // Don't use the pic base if not in PIC relocation model.
2634 HiOpFlags
|= PPCII::MO_PIC_FLAG
;
2635 LoOpFlags
|= PPCII::MO_PIC_FLAG
;
2638 // If this is a reference to a global value that requires a non-lazy-ptr, make
2639 // sure that instruction lowering adds it.
2640 if (GV
&& Subtarget
.hasLazyResolverStub(GV
)) {
2641 HiOpFlags
|= PPCII::MO_NLP_FLAG
;
2642 LoOpFlags
|= PPCII::MO_NLP_FLAG
;
2644 if (GV
->hasHiddenVisibility()) {
2645 HiOpFlags
|= PPCII::MO_NLP_HIDDEN_FLAG
;
2646 LoOpFlags
|= PPCII::MO_NLP_HIDDEN_FLAG
;
2651 static SDValue
LowerLabelRef(SDValue HiPart
, SDValue LoPart
, bool isPIC
,
2652 SelectionDAG
&DAG
) {
2654 EVT PtrVT
= HiPart
.getValueType();
2655 SDValue Zero
= DAG
.getConstant(0, DL
, PtrVT
);
2657 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, DL
, PtrVT
, HiPart
, Zero
);
2658 SDValue Lo
= DAG
.getNode(PPCISD::Lo
, DL
, PtrVT
, LoPart
, Zero
);
2660 // With PIC, the first instruction is actually "GR+hi(&G)".
2662 Hi
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
2663 DAG
.getNode(PPCISD::GlobalBaseReg
, DL
, PtrVT
), Hi
);
2665 // Generate non-pic code that has direct accesses to the constant pool.
2666 // The address of the global is just (hi(&g)+lo(&g)).
2667 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Hi
, Lo
);
2670 static void setUsesTOCBasePtr(MachineFunction
&MF
) {
2671 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
2672 FuncInfo
->setUsesTOCBasePtr();
2675 static void setUsesTOCBasePtr(SelectionDAG
&DAG
) {
2676 setUsesTOCBasePtr(DAG
.getMachineFunction());
2679 SDValue
PPCTargetLowering::getTOCEntry(SelectionDAG
&DAG
, const SDLoc
&dl
,
2681 const bool Is64Bit
= Subtarget
.isPPC64();
2682 EVT VT
= Is64Bit
? MVT::i64
: MVT::i32
;
2683 SDValue Reg
= Is64Bit
? DAG
.getRegister(PPC::X2
, VT
)
2684 : Subtarget
.isAIXABI()
2685 ? DAG
.getRegister(PPC::R2
, VT
)
2686 : DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, VT
);
2687 SDValue Ops
[] = { GA
, Reg
};
2688 return DAG
.getMemIntrinsicNode(
2689 PPCISD::TOC_ENTRY
, dl
, DAG
.getVTList(VT
, MVT::Other
), Ops
, VT
,
2690 MachinePointerInfo::getGOT(DAG
.getMachineFunction()), 0,
2691 MachineMemOperand::MOLoad
);
2694 SDValue
PPCTargetLowering::LowerConstantPool(SDValue Op
,
2695 SelectionDAG
&DAG
) const {
2696 EVT PtrVT
= Op
.getValueType();
2697 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
2698 const Constant
*C
= CP
->getConstVal();
2700 // 64-bit SVR4 ABI code is always position-independent.
2701 // The actual address of the GlobalValue is stored in the TOC.
2702 if (Subtarget
.is64BitELFABI()) {
2703 setUsesTOCBasePtr(DAG
);
2704 SDValue GA
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0);
2705 return getTOCEntry(DAG
, SDLoc(CP
), GA
);
2708 unsigned MOHiFlag
, MOLoFlag
;
2709 bool IsPIC
= isPositionIndependent();
2710 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2712 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2713 SDValue GA
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(),
2714 PPCII::MO_PIC_FLAG
);
2715 return getTOCEntry(DAG
, SDLoc(CP
), GA
);
2719 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0, MOHiFlag
);
2721 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0, MOLoFlag
);
2722 return LowerLabelRef(CPIHi
, CPILo
, IsPIC
, DAG
);
2725 // For 64-bit PowerPC, prefer the more compact relative encodings.
2726 // This trades 32 bits per jump table entry for one or two instructions
2727 // on the jump site.
2728 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2729 if (isJumpTableRelative())
2730 return MachineJumpTableInfo::EK_LabelDifference32
;
2732 return TargetLowering::getJumpTableEncoding();
2735 bool PPCTargetLowering::isJumpTableRelative() const {
2736 if (Subtarget
.isPPC64())
2738 return TargetLowering::isJumpTableRelative();
2741 SDValue
PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table
,
2742 SelectionDAG
&DAG
) const {
2743 if (!Subtarget
.isPPC64())
2744 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
2746 switch (getTargetMachine().getCodeModel()) {
2747 case CodeModel::Small
:
2748 case CodeModel::Medium
:
2749 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
2751 return DAG
.getNode(PPCISD::GlobalBaseReg
, SDLoc(),
2752 getPointerTy(DAG
.getDataLayout()));
2757 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction
*MF
,
2759 MCContext
&Ctx
) const {
2760 if (!Subtarget
.isPPC64())
2761 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
2763 switch (getTargetMachine().getCodeModel()) {
2764 case CodeModel::Small
:
2765 case CodeModel::Medium
:
2766 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
2768 return MCSymbolRefExpr::create(MF
->getPICBaseSymbol(), Ctx
);
2772 SDValue
PPCTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
2773 EVT PtrVT
= Op
.getValueType();
2774 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
2776 // 64-bit SVR4 ABI code is always position-independent.
2777 // The actual address of the GlobalValue is stored in the TOC.
2778 if (Subtarget
.is64BitELFABI()) {
2779 setUsesTOCBasePtr(DAG
);
2780 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
2781 return getTOCEntry(DAG
, SDLoc(JT
), GA
);
2784 unsigned MOHiFlag
, MOLoFlag
;
2785 bool IsPIC
= isPositionIndependent();
2786 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2788 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2789 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
,
2790 PPCII::MO_PIC_FLAG
);
2791 return getTOCEntry(DAG
, SDLoc(GA
), GA
);
2794 SDValue JTIHi
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOHiFlag
);
2795 SDValue JTILo
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOLoFlag
);
2796 return LowerLabelRef(JTIHi
, JTILo
, IsPIC
, DAG
);
2799 SDValue
PPCTargetLowering::LowerBlockAddress(SDValue Op
,
2800 SelectionDAG
&DAG
) const {
2801 EVT PtrVT
= Op
.getValueType();
2802 BlockAddressSDNode
*BASDN
= cast
<BlockAddressSDNode
>(Op
);
2803 const BlockAddress
*BA
= BASDN
->getBlockAddress();
2805 // 64-bit SVR4 ABI code is always position-independent.
2806 // The actual BlockAddress is stored in the TOC.
2807 if (Subtarget
.is64BitELFABI()) {
2808 setUsesTOCBasePtr(DAG
);
2809 SDValue GA
= DAG
.getTargetBlockAddress(BA
, PtrVT
, BASDN
->getOffset());
2810 return getTOCEntry(DAG
, SDLoc(BASDN
), GA
);
2813 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2814 if (Subtarget
.is32BitELFABI() && isPositionIndependent())
2817 DAG
.getTargetBlockAddress(BA
, PtrVT
, BASDN
->getOffset()));
2819 unsigned MOHiFlag
, MOLoFlag
;
2820 bool IsPIC
= isPositionIndependent();
2821 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2822 SDValue TgtBAHi
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOHiFlag
);
2823 SDValue TgtBALo
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOLoFlag
);
2824 return LowerLabelRef(TgtBAHi
, TgtBALo
, IsPIC
, DAG
);
2827 SDValue
PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
2828 SelectionDAG
&DAG
) const {
2829 // FIXME: TLS addresses currently use medium model code sequences,
2830 // which is the most useful form. Eventually support for small and
2831 // large models could be added if users need it, at the cost of
2832 // additional complexity.
2833 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
2834 if (DAG
.getTarget().useEmulatedTLS())
2835 return LowerToTLSEmulatedModel(GA
, DAG
);
2838 const GlobalValue
*GV
= GA
->getGlobal();
2839 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2840 bool is64bit
= Subtarget
.isPPC64();
2841 const Module
*M
= DAG
.getMachineFunction().getFunction().getParent();
2842 PICLevel::Level picLevel
= M
->getPICLevel();
2844 const TargetMachine
&TM
= getTargetMachine();
2845 TLSModel::Model Model
= TM
.getTLSModel(GV
);
2847 if (Model
== TLSModel::LocalExec
) {
2848 SDValue TGAHi
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2849 PPCII::MO_TPREL_HA
);
2850 SDValue TGALo
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2851 PPCII::MO_TPREL_LO
);
2852 SDValue TLSReg
= is64bit
? DAG
.getRegister(PPC::X13
, MVT::i64
)
2853 : DAG
.getRegister(PPC::R2
, MVT::i32
);
2855 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, dl
, PtrVT
, TGAHi
, TLSReg
);
2856 return DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, TGALo
, Hi
);
2859 if (Model
== TLSModel::InitialExec
) {
2860 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2861 SDValue TGATLS
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2865 setUsesTOCBasePtr(DAG
);
2866 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2867 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_GOT_TPREL_HA
, dl
,
2868 PtrVT
, GOTReg
, TGA
);
2870 if (!TM
.isPositionIndependent())
2871 GOTPtr
= DAG
.getNode(PPCISD::PPC32_GOT
, dl
, PtrVT
);
2872 else if (picLevel
== PICLevel::SmallPIC
)
2873 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2875 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2877 SDValue TPOffset
= DAG
.getNode(PPCISD::LD_GOT_TPREL_L
, dl
,
2878 PtrVT
, TGA
, GOTPtr
);
2879 return DAG
.getNode(PPCISD::ADD_TLS
, dl
, PtrVT
, TPOffset
, TGATLS
);
2882 if (Model
== TLSModel::GeneralDynamic
) {
2883 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2886 setUsesTOCBasePtr(DAG
);
2887 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2888 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSGD_HA
, dl
, PtrVT
,
2891 if (picLevel
== PICLevel::SmallPIC
)
2892 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2894 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2896 return DAG
.getNode(PPCISD::ADDI_TLSGD_L_ADDR
, dl
, PtrVT
,
2900 if (Model
== TLSModel::LocalDynamic
) {
2901 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2904 setUsesTOCBasePtr(DAG
);
2905 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2906 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSLD_HA
, dl
, PtrVT
,
2909 if (picLevel
== PICLevel::SmallPIC
)
2910 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2912 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2914 SDValue TLSAddr
= DAG
.getNode(PPCISD::ADDI_TLSLD_L_ADDR
, dl
,
2915 PtrVT
, GOTPtr
, TGA
, TGA
);
2916 SDValue DtvOffsetHi
= DAG
.getNode(PPCISD::ADDIS_DTPREL_HA
, dl
,
2917 PtrVT
, TLSAddr
, TGA
);
2918 return DAG
.getNode(PPCISD::ADDI_DTPREL_L
, dl
, PtrVT
, DtvOffsetHi
, TGA
);
2921 llvm_unreachable("Unknown TLS model!");
2924 SDValue
PPCTargetLowering::LowerGlobalAddress(SDValue Op
,
2925 SelectionDAG
&DAG
) const {
2926 EVT PtrVT
= Op
.getValueType();
2927 GlobalAddressSDNode
*GSDN
= cast
<GlobalAddressSDNode
>(Op
);
2929 const GlobalValue
*GV
= GSDN
->getGlobal();
2931 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
2932 // The actual address of the GlobalValue is stored in the TOC.
2933 if (Subtarget
.is64BitELFABI() || Subtarget
.isAIXABI()) {
2934 setUsesTOCBasePtr(DAG
);
2935 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset());
2936 return getTOCEntry(DAG
, DL
, GA
);
2939 unsigned MOHiFlag
, MOLoFlag
;
2940 bool IsPIC
= isPositionIndependent();
2941 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
, GV
);
2943 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2944 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
,
2946 PPCII::MO_PIC_FLAG
);
2947 return getTOCEntry(DAG
, DL
, GA
);
2951 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOHiFlag
);
2953 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOLoFlag
);
2955 SDValue Ptr
= LowerLabelRef(GAHi
, GALo
, IsPIC
, DAG
);
2957 // If the global reference is actually to a non-lazy-pointer, we have to do an
2958 // extra load to get the address of the global.
2959 if (MOHiFlag
& PPCII::MO_NLP_FLAG
)
2960 Ptr
= DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
2964 SDValue
PPCTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const {
2965 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
2968 if (Op
.getValueType() == MVT::v2i64
) {
2969 // When the operands themselves are v2i64 values, we need to do something
2970 // special because VSX has no underlying comparison operations for these.
2971 if (Op
.getOperand(0).getValueType() == MVT::v2i64
) {
2972 // Equality can be handled by casting to the legal type for Altivec
2973 // comparisons, everything else needs to be expanded.
2974 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
2975 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
,
2976 DAG
.getSetCC(dl
, MVT::v4i32
,
2977 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, Op
.getOperand(0)),
2978 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, Op
.getOperand(1)),
2985 // We handle most of these in the usual way.
2989 // If we're comparing for equality to zero, expose the fact that this is
2990 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2991 // fold the new nodes.
2992 if (SDValue V
= lowerCmpEqZeroToCtlzSrl(Op
, DAG
))
2995 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
2996 // Leave comparisons against 0 and -1 alone for now, since they're usually
2997 // optimized. FIXME: revisit this when we can custom lower all setcc
2999 if (C
->isAllOnesValue() || C
->isNullValue())
3003 // If we have an integer seteq/setne, turn it into a compare against zero
3004 // by xor'ing the rhs with the lhs, which is faster than setting a
3005 // condition register, reading it back out, and masking the correct bit. The
3006 // normal approach here uses sub to do this instead of xor. Using xor exposes
3007 // the result to other bit-twiddling opportunities.
3008 EVT LHSVT
= Op
.getOperand(0).getValueType();
3009 if (LHSVT
.isInteger() && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
)) {
3010 EVT VT
= Op
.getValueType();
3011 SDValue Sub
= DAG
.getNode(ISD::XOR
, dl
, LHSVT
, Op
.getOperand(0),
3013 return DAG
.getSetCC(dl
, VT
, Sub
, DAG
.getConstant(0, dl
, LHSVT
), CC
);
3018 SDValue
PPCTargetLowering::LowerVAARG(SDValue Op
, SelectionDAG
&DAG
) const {
3019 SDNode
*Node
= Op
.getNode();
3020 EVT VT
= Node
->getValueType(0);
3021 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3022 SDValue InChain
= Node
->getOperand(0);
3023 SDValue VAListPtr
= Node
->getOperand(1);
3024 const Value
*SV
= cast
<SrcValueSDNode
>(Node
->getOperand(2))->getValue();
3027 assert(!Subtarget
.isPPC64() && "LowerVAARG is PPC32 only");
3030 SDValue GprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
3031 VAListPtr
, MachinePointerInfo(SV
), MVT::i8
);
3032 InChain
= GprIndex
.getValue(1);
3034 if (VT
== MVT::i64
) {
3035 // Check if GprIndex is even
3036 SDValue GprAnd
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, GprIndex
,
3037 DAG
.getConstant(1, dl
, MVT::i32
));
3038 SDValue CC64
= DAG
.getSetCC(dl
, MVT::i32
, GprAnd
,
3039 DAG
.getConstant(0, dl
, MVT::i32
), ISD::SETNE
);
3040 SDValue GprIndexPlusOne
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, GprIndex
,
3041 DAG
.getConstant(1, dl
, MVT::i32
));
3042 // Align GprIndex to be even if it isn't
3043 GprIndex
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC64
, GprIndexPlusOne
,
3047 // fpr index is 1 byte after gpr
3048 SDValue FprPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3049 DAG
.getConstant(1, dl
, MVT::i32
));
3052 SDValue FprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
3053 FprPtr
, MachinePointerInfo(SV
), MVT::i8
);
3054 InChain
= FprIndex
.getValue(1);
3056 SDValue RegSaveAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3057 DAG
.getConstant(8, dl
, MVT::i32
));
3059 SDValue OverflowAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3060 DAG
.getConstant(4, dl
, MVT::i32
));
3063 SDValue OverflowArea
=
3064 DAG
.getLoad(MVT::i32
, dl
, InChain
, OverflowAreaPtr
, MachinePointerInfo());
3065 InChain
= OverflowArea
.getValue(1);
3067 SDValue RegSaveArea
=
3068 DAG
.getLoad(MVT::i32
, dl
, InChain
, RegSaveAreaPtr
, MachinePointerInfo());
3069 InChain
= RegSaveArea
.getValue(1);
3071 // select overflow_area if index > 8
3072 SDValue CC
= DAG
.getSetCC(dl
, MVT::i32
, VT
.isInteger() ? GprIndex
: FprIndex
,
3073 DAG
.getConstant(8, dl
, MVT::i32
), ISD::SETLT
);
3075 // adjustment constant gpr_index * 4/8
3076 SDValue RegConstant
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
,
3077 VT
.isInteger() ? GprIndex
: FprIndex
,
3078 DAG
.getConstant(VT
.isInteger() ? 4 : 8, dl
,
3081 // OurReg = RegSaveArea + RegConstant
3082 SDValue OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, RegSaveArea
,
3085 // Floating types are 32 bytes into RegSaveArea
3086 if (VT
.isFloatingPoint())
3087 OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OurReg
,
3088 DAG
.getConstant(32, dl
, MVT::i32
));
3090 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3091 SDValue IndexPlus1
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
,
3092 VT
.isInteger() ? GprIndex
: FprIndex
,
3093 DAG
.getConstant(VT
== MVT::i64
? 2 : 1, dl
,
3096 InChain
= DAG
.getTruncStore(InChain
, dl
, IndexPlus1
,
3097 VT
.isInteger() ? VAListPtr
: FprPtr
,
3098 MachinePointerInfo(SV
), MVT::i8
);
3100 // determine if we should load from reg_save_area or overflow_area
3101 SDValue Result
= DAG
.getNode(ISD::SELECT
, dl
, PtrVT
, CC
, OurReg
, OverflowArea
);
3103 // increase overflow_area by 4/8 if gpr/fpr > 8
3104 SDValue OverflowAreaPlusN
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OverflowArea
,
3105 DAG
.getConstant(VT
.isInteger() ? 4 : 8,
3108 OverflowArea
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC
, OverflowArea
,
3111 InChain
= DAG
.getTruncStore(InChain
, dl
, OverflowArea
, OverflowAreaPtr
,
3112 MachinePointerInfo(), MVT::i32
);
3114 return DAG
.getLoad(VT
, dl
, InChain
, Result
, MachinePointerInfo());
3117 SDValue
PPCTargetLowering::LowerVACOPY(SDValue Op
, SelectionDAG
&DAG
) const {
3118 assert(!Subtarget
.isPPC64() && "LowerVACOPY is PPC32 only");
3120 // We have to copy the entire va_list struct:
3121 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3122 return DAG
.getMemcpy(Op
.getOperand(0), Op
,
3123 Op
.getOperand(1), Op
.getOperand(2),
3124 DAG
.getConstant(12, SDLoc(Op
), MVT::i32
), 8, false, true,
3125 false, MachinePointerInfo(), MachinePointerInfo());
3128 SDValue
PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op
,
3129 SelectionDAG
&DAG
) const {
3130 return Op
.getOperand(0);
3133 SDValue
PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op
,
3134 SelectionDAG
&DAG
) const {
3135 SDValue Chain
= Op
.getOperand(0);
3136 SDValue Trmp
= Op
.getOperand(1); // trampoline
3137 SDValue FPtr
= Op
.getOperand(2); // nested function
3138 SDValue Nest
= Op
.getOperand(3); // 'nest' parameter value
3141 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3142 bool isPPC64
= (PtrVT
== MVT::i64
);
3143 Type
*IntPtrTy
= DAG
.getDataLayout().getIntPtrType(*DAG
.getContext());
3145 TargetLowering::ArgListTy Args
;
3146 TargetLowering::ArgListEntry Entry
;
3148 Entry
.Ty
= IntPtrTy
;
3149 Entry
.Node
= Trmp
; Args
.push_back(Entry
);
3151 // TrampSize == (isPPC64 ? 48 : 40);
3152 Entry
.Node
= DAG
.getConstant(isPPC64
? 48 : 40, dl
,
3153 isPPC64
? MVT::i64
: MVT::i32
);
3154 Args
.push_back(Entry
);
3156 Entry
.Node
= FPtr
; Args
.push_back(Entry
);
3157 Entry
.Node
= Nest
; Args
.push_back(Entry
);
3159 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3160 TargetLowering::CallLoweringInfo
CLI(DAG
);
3161 CLI
.setDebugLoc(dl
).setChain(Chain
).setLibCallee(
3162 CallingConv::C
, Type::getVoidTy(*DAG
.getContext()),
3163 DAG
.getExternalSymbol("__trampoline_setup", PtrVT
), std::move(Args
));
3165 std::pair
<SDValue
, SDValue
> CallResult
= LowerCallTo(CLI
);
3166 return CallResult
.second
;
3169 SDValue
PPCTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
3170 MachineFunction
&MF
= DAG
.getMachineFunction();
3171 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3172 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3176 if (Subtarget
.isDarwinABI() || Subtarget
.isPPC64()) {
3177 // vastart just stores the address of the VarArgsFrameIndex slot into the
3178 // memory location argument.
3179 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
3180 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3181 return DAG
.getStore(Op
.getOperand(0), dl
, FR
, Op
.getOperand(1),
3182 MachinePointerInfo(SV
));
3185 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3186 // We suppose the given va_list is already allocated.
3189 // char gpr; /* index into the array of 8 GPRs
3190 // * stored in the register save area
3191 // * gpr=0 corresponds to r3,
3192 // * gpr=1 to r4, etc.
3194 // char fpr; /* index into the array of 8 FPRs
3195 // * stored in the register save area
3196 // * fpr=0 corresponds to f1,
3197 // * fpr=1 to f2, etc.
3199 // char *overflow_arg_area;
3200 // /* location on stack that holds
3201 // * the next overflow argument
3203 // char *reg_save_area;
3204 // /* where r3:r10 and f1:f8 (if saved)
3209 SDValue ArgGPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumGPR(), dl
, MVT::i32
);
3210 SDValue ArgFPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumFPR(), dl
, MVT::i32
);
3211 SDValue StackOffsetFI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsStackOffset(),
3213 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
3216 uint64_t FrameOffset
= PtrVT
.getSizeInBits()/8;
3217 SDValue ConstFrameOffset
= DAG
.getConstant(FrameOffset
, dl
, PtrVT
);
3219 uint64_t StackOffset
= PtrVT
.getSizeInBits()/8 - 1;
3220 SDValue ConstStackOffset
= DAG
.getConstant(StackOffset
, dl
, PtrVT
);
3222 uint64_t FPROffset
= 1;
3223 SDValue ConstFPROffset
= DAG
.getConstant(FPROffset
, dl
, PtrVT
);
3225 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3227 // Store first byte : number of int regs
3228 SDValue firstStore
=
3229 DAG
.getTruncStore(Op
.getOperand(0), dl
, ArgGPR
, Op
.getOperand(1),
3230 MachinePointerInfo(SV
), MVT::i8
);
3231 uint64_t nextOffset
= FPROffset
;
3232 SDValue nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Op
.getOperand(1),
3235 // Store second byte : number of float regs
3236 SDValue secondStore
=
3237 DAG
.getTruncStore(firstStore
, dl
, ArgFPR
, nextPtr
,
3238 MachinePointerInfo(SV
, nextOffset
), MVT::i8
);
3239 nextOffset
+= StackOffset
;
3240 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstStackOffset
);
3242 // Store second word : arguments given on stack
3243 SDValue thirdStore
= DAG
.getStore(secondStore
, dl
, StackOffsetFI
, nextPtr
,
3244 MachinePointerInfo(SV
, nextOffset
));
3245 nextOffset
+= FrameOffset
;
3246 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstFrameOffset
);
3248 // Store third word : arguments given in registers
3249 return DAG
.getStore(thirdStore
, dl
, FR
, nextPtr
,
3250 MachinePointerInfo(SV
, nextOffset
));
3253 /// FPR - The set of FP registers that should be allocated for arguments
3254 /// on Darwin and AIX.
3255 static const MCPhysReg FPR
[] = {PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
,
3256 PPC::F6
, PPC::F7
, PPC::F8
, PPC::F9
, PPC::F10
,
3257 PPC::F11
, PPC::F12
, PPC::F13
};
3259 /// QFPR - The set of QPX registers that should be allocated for arguments.
3260 static const MCPhysReg QFPR
[] = {
3261 PPC::QF1
, PPC::QF2
, PPC::QF3
, PPC::QF4
, PPC::QF5
, PPC::QF6
, PPC::QF7
,
3262 PPC::QF8
, PPC::QF9
, PPC::QF10
, PPC::QF11
, PPC::QF12
, PPC::QF13
};
3264 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3266 static unsigned CalculateStackSlotSize(EVT ArgVT
, ISD::ArgFlagsTy Flags
,
3267 unsigned PtrByteSize
) {
3268 unsigned ArgSize
= ArgVT
.getStoreSize();
3269 if (Flags
.isByVal())
3270 ArgSize
= Flags
.getByValSize();
3272 // Round up to multiples of the pointer size, except for array members,
3273 // which are always packed.
3274 if (!Flags
.isInConsecutiveRegs())
3275 ArgSize
= ((ArgSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3280 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3282 static unsigned CalculateStackSlotAlignment(EVT ArgVT
, EVT OrigVT
,
3283 ISD::ArgFlagsTy Flags
,
3284 unsigned PtrByteSize
) {
3285 unsigned Align
= PtrByteSize
;
3287 // Altivec parameters are padded to a 16 byte boundary.
3288 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
3289 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
3290 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
3291 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
3293 // QPX vector types stored in double-precision are padded to a 32 byte
3295 else if (ArgVT
== MVT::v4f64
|| ArgVT
== MVT::v4i1
)
3298 // ByVal parameters are aligned as requested.
3299 if (Flags
.isByVal()) {
3300 unsigned BVAlign
= Flags
.getByValAlign();
3301 if (BVAlign
> PtrByteSize
) {
3302 if (BVAlign
% PtrByteSize
!= 0)
3304 "ByVal alignment is not a multiple of the pointer size");
3310 // Array members are always packed to their original alignment.
3311 if (Flags
.isInConsecutiveRegs()) {
3312 // If the array member was split into multiple registers, the first
3313 // needs to be aligned to the size of the full type. (Except for
3314 // ppcf128, which is only aligned as its f64 components.)
3315 if (Flags
.isSplit() && OrigVT
!= MVT::ppcf128
)
3316 Align
= OrigVT
.getStoreSize();
3318 Align
= ArgVT
.getStoreSize();
3324 /// CalculateStackSlotUsed - Return whether this argument will use its
3325 /// stack slot (instead of being passed in registers). ArgOffset,
3326 /// AvailableFPRs, and AvailableVRs must hold the current argument
3327 /// position, and will be updated to account for this argument.
3328 static bool CalculateStackSlotUsed(EVT ArgVT
, EVT OrigVT
,
3329 ISD::ArgFlagsTy Flags
,
3330 unsigned PtrByteSize
,
3331 unsigned LinkageSize
,
3332 unsigned ParamAreaSize
,
3333 unsigned &ArgOffset
,
3334 unsigned &AvailableFPRs
,
3335 unsigned &AvailableVRs
, bool HasQPX
) {
3336 bool UseMemory
= false;
3338 // Respect alignment of argument on the stack.
3340 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
3341 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
3342 // If there's no space left in the argument save area, we must
3343 // use memory (this check also catches zero-sized arguments).
3344 if (ArgOffset
>= LinkageSize
+ ParamAreaSize
)
3347 // Allocate argument on the stack.
3348 ArgOffset
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
3349 if (Flags
.isInConsecutiveRegsLast())
3350 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3351 // If we overran the argument save area, we must use memory
3352 // (this check catches arguments passed partially in memory)
3353 if (ArgOffset
> LinkageSize
+ ParamAreaSize
)
3356 // However, if the argument is actually passed in an FPR or a VR,
3357 // we don't use memory after all.
3358 if (!Flags
.isByVal()) {
3359 if (ArgVT
== MVT::f32
|| ArgVT
== MVT::f64
||
3360 // QPX registers overlap with the scalar FP registers.
3361 (HasQPX
&& (ArgVT
== MVT::v4f32
||
3362 ArgVT
== MVT::v4f64
||
3363 ArgVT
== MVT::v4i1
)))
3364 if (AvailableFPRs
> 0) {
3368 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
3369 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
3370 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
3371 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
3372 if (AvailableVRs
> 0) {
3381 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3382 /// ensure minimum alignment required for target.
3383 static unsigned EnsureStackAlignment(const PPCFrameLowering
*Lowering
,
3384 unsigned NumBytes
) {
3385 unsigned TargetAlign
= Lowering
->getStackAlignment();
3386 unsigned AlignMask
= TargetAlign
- 1;
3387 NumBytes
= (NumBytes
+ AlignMask
) & ~AlignMask
;
3391 SDValue
PPCTargetLowering::LowerFormalArguments(
3392 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3393 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3394 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3395 if (Subtarget
.is64BitELFABI())
3396 return LowerFormalArguments_64SVR4(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
3398 else if (Subtarget
.is32BitELFABI())
3399 return LowerFormalArguments_32SVR4(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
3402 // FIXME: We are using this for both AIX and Darwin. We should add appropriate
3403 // AIX testing, and rename it appropriately.
3404 return LowerFormalArguments_Darwin(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
3408 SDValue
PPCTargetLowering::LowerFormalArguments_32SVR4(
3409 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3410 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3411 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3413 // 32-bit SVR4 ABI Stack Frame Layout:
3414 // +-----------------------------------+
3415 // +--> | Back chain |
3416 // | +-----------------------------------+
3417 // | | Floating-point register save area |
3418 // | +-----------------------------------+
3419 // | | General register save area |
3420 // | +-----------------------------------+
3421 // | | CR save word |
3422 // | +-----------------------------------+
3423 // | | VRSAVE save word |
3424 // | +-----------------------------------+
3425 // | | Alignment padding |
3426 // | +-----------------------------------+
3427 // | | Vector register save area |
3428 // | +-----------------------------------+
3429 // | | Local variable space |
3430 // | +-----------------------------------+
3431 // | | Parameter list area |
3432 // | +-----------------------------------+
3433 // | | LR save word |
3434 // | +-----------------------------------+
3435 // SP--> +--- | Back chain |
3436 // +-----------------------------------+
3439 // System V Application Binary Interface PowerPC Processor Supplement
3440 // AltiVec Technology Programming Interface Manual
3442 MachineFunction
&MF
= DAG
.getMachineFunction();
3443 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3444 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3446 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3447 // Potential tail calls could cause overwriting of argument stack slots.
3448 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
3449 (CallConv
== CallingConv::Fast
));
3450 unsigned PtrByteSize
= 4;
3452 // Assign locations to all of the incoming arguments.
3453 SmallVector
<CCValAssign
, 16> ArgLocs
;
3454 PPCCCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), ArgLocs
,
3457 // Reserve space for the linkage area on the stack.
3458 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
3459 CCInfo
.AllocateStack(LinkageSize
, PtrByteSize
);
3461 CCInfo
.PreAnalyzeFormalArguments(Ins
);
3463 CCInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4
);
3464 CCInfo
.clearWasPPCF128();
3466 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
3467 CCValAssign
&VA
= ArgLocs
[i
];
3469 // Arguments stored in registers.
3470 if (VA
.isRegLoc()) {
3471 const TargetRegisterClass
*RC
;
3472 EVT ValVT
= VA
.getValVT();
3474 switch (ValVT
.getSimpleVT().SimpleTy
) {
3476 llvm_unreachable("ValVT not supported by formal arguments Lowering");
3479 RC
= &PPC::GPRCRegClass
;
3482 if (Subtarget
.hasP8Vector())
3483 RC
= &PPC::VSSRCRegClass
;
3484 else if (Subtarget
.hasSPE())
3485 RC
= &PPC::SPE4RCRegClass
;
3487 RC
= &PPC::F4RCRegClass
;
3490 if (Subtarget
.hasVSX())
3491 RC
= &PPC::VSFRCRegClass
;
3492 else if (Subtarget
.hasSPE())
3493 // SPE passes doubles in GPR pairs.
3494 RC
= &PPC::GPRCRegClass
;
3496 RC
= &PPC::F8RCRegClass
;
3501 RC
= &PPC::VRRCRegClass
;
3504 RC
= Subtarget
.hasQPX() ? &PPC::QSRCRegClass
: &PPC::VRRCRegClass
;
3508 RC
= &PPC::VRRCRegClass
;
3511 RC
= &PPC::QFRCRegClass
;
3514 RC
= &PPC::QBRCRegClass
;
3519 // Transform the arguments stored in physical registers into
3521 if (VA
.getLocVT() == MVT::f64
&& Subtarget
.hasSPE()) {
3522 assert(i
+ 1 < e
&& "No second half of double precision argument");
3523 unsigned RegLo
= MF
.addLiveIn(VA
.getLocReg(), RC
);
3524 unsigned RegHi
= MF
.addLiveIn(ArgLocs
[++i
].getLocReg(), RC
);
3525 SDValue ArgValueLo
= DAG
.getCopyFromReg(Chain
, dl
, RegLo
, MVT::i32
);
3526 SDValue ArgValueHi
= DAG
.getCopyFromReg(Chain
, dl
, RegHi
, MVT::i32
);
3527 if (!Subtarget
.isLittleEndian())
3528 std::swap (ArgValueLo
, ArgValueHi
);
3529 ArgValue
= DAG
.getNode(PPCISD::BUILD_SPE64
, dl
, MVT::f64
, ArgValueLo
,
3532 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
3533 ArgValue
= DAG
.getCopyFromReg(Chain
, dl
, Reg
,
3534 ValVT
== MVT::i1
? MVT::i32
: ValVT
);
3535 if (ValVT
== MVT::i1
)
3536 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, ArgValue
);
3539 InVals
.push_back(ArgValue
);
3541 // Argument stored in memory.
3542 assert(VA
.isMemLoc());
3544 // Get the extended size of the argument type in stack
3545 unsigned ArgSize
= VA
.getLocVT().getStoreSize();
3546 // Get the actual size of the argument type
3547 unsigned ObjSize
= VA
.getValVT().getStoreSize();
3548 unsigned ArgOffset
= VA
.getLocMemOffset();
3549 // Stack objects in PPC32 are right justified.
3550 ArgOffset
+= ArgSize
- ObjSize
;
3551 int FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, isImmutable
);
3553 // Create load nodes to retrieve arguments from the stack.
3554 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3556 DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
, MachinePointerInfo()));
3560 // Assign locations to all of the incoming aggregate by value arguments.
3561 // Aggregates passed by value are stored in the local variable space of the
3562 // caller's stack frame, right above the parameter list area.
3563 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
3564 CCState
CCByValInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
3565 ByValArgLocs
, *DAG
.getContext());
3567 // Reserve stack space for the allocations in CCInfo.
3568 CCByValInfo
.AllocateStack(CCInfo
.getNextStackOffset(), PtrByteSize
);
3570 CCByValInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4_ByVal
);
3572 // Area that is at least reserved in the caller of this function.
3573 unsigned MinReservedArea
= CCByValInfo
.getNextStackOffset();
3574 MinReservedArea
= std::max(MinReservedArea
, LinkageSize
);
3576 // Set the size that is at least reserved in caller of this function. Tail
3577 // call optimized function's reserved stack space needs to be aligned so that
3578 // taking the difference between two stack areas will result in an aligned
3581 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
3582 FuncInfo
->setMinReservedArea(MinReservedArea
);
3584 SmallVector
<SDValue
, 8> MemOps
;
3586 // If the function takes variable number of arguments, make a frame index for
3587 // the start of the first vararg value... for expansion of llvm.va_start.
3589 static const MCPhysReg GPArgRegs
[] = {
3590 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
3591 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
3593 const unsigned NumGPArgRegs
= array_lengthof(GPArgRegs
);
3595 static const MCPhysReg FPArgRegs
[] = {
3596 PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
, PPC::F6
, PPC::F7
,
3599 unsigned NumFPArgRegs
= array_lengthof(FPArgRegs
);
3601 if (useSoftFloat() || hasSPE())
3604 FuncInfo
->setVarArgsNumGPR(CCInfo
.getFirstUnallocated(GPArgRegs
));
3605 FuncInfo
->setVarArgsNumFPR(CCInfo
.getFirstUnallocated(FPArgRegs
));
3607 // Make room for NumGPArgRegs and NumFPArgRegs.
3608 int Depth
= NumGPArgRegs
* PtrVT
.getSizeInBits()/8 +
3609 NumFPArgRegs
* MVT(MVT::f64
).getSizeInBits()/8;
3611 FuncInfo
->setVarArgsStackOffset(
3612 MFI
.CreateFixedObject(PtrVT
.getSizeInBits()/8,
3613 CCInfo
.getNextStackOffset(), true));
3615 FuncInfo
->setVarArgsFrameIndex(MFI
.CreateStackObject(Depth
, 8, false));
3616 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
3618 // The fixed integer arguments of a variadic function are stored to the
3619 // VarArgsFrameIndex on the stack so that they may be loaded by
3620 // dereferencing the result of va_next.
3621 for (unsigned GPRIndex
= 0; GPRIndex
!= NumGPArgRegs
; ++GPRIndex
) {
3622 // Get an existing live-in vreg, or add a new one.
3623 unsigned VReg
= MF
.getRegInfo().getLiveInVirtReg(GPArgRegs
[GPRIndex
]);
3625 VReg
= MF
.addLiveIn(GPArgRegs
[GPRIndex
], &PPC::GPRCRegClass
);
3627 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3629 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
3630 MemOps
.push_back(Store
);
3631 // Increment the address by four for the next argument to store
3632 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, dl
, PtrVT
);
3633 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
3636 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3638 // The double arguments are stored to the VarArgsFrameIndex
3640 for (unsigned FPRIndex
= 0; FPRIndex
!= NumFPArgRegs
; ++FPRIndex
) {
3641 // Get an existing live-in vreg, or add a new one.
3642 unsigned VReg
= MF
.getRegInfo().getLiveInVirtReg(FPArgRegs
[FPRIndex
]);
3644 VReg
= MF
.addLiveIn(FPArgRegs
[FPRIndex
], &PPC::F8RCRegClass
);
3646 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::f64
);
3648 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
3649 MemOps
.push_back(Store
);
3650 // Increment the address by eight for the next argument to store
3651 SDValue PtrOff
= DAG
.getConstant(MVT(MVT::f64
).getSizeInBits()/8, dl
,
3653 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
3657 if (!MemOps
.empty())
3658 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
3663 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3664 // value to MVT::i64 and then truncate to the correct register size.
3665 SDValue
PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags
,
3666 EVT ObjectVT
, SelectionDAG
&DAG
,
3668 const SDLoc
&dl
) const {
3670 ArgVal
= DAG
.getNode(ISD::AssertSext
, dl
, MVT::i64
, ArgVal
,
3671 DAG
.getValueType(ObjectVT
));
3672 else if (Flags
.isZExt())
3673 ArgVal
= DAG
.getNode(ISD::AssertZext
, dl
, MVT::i64
, ArgVal
,
3674 DAG
.getValueType(ObjectVT
));
3676 return DAG
.getNode(ISD::TRUNCATE
, dl
, ObjectVT
, ArgVal
);
3679 SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
3680 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3681 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3682 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3683 // TODO: add description of PPC stack frame format, or at least some docs.
3685 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
3686 bool isLittleEndian
= Subtarget
.isLittleEndian();
3687 MachineFunction
&MF
= DAG
.getMachineFunction();
3688 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3689 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3691 assert(!(CallConv
== CallingConv::Fast
&& isVarArg
) &&
3692 "fastcc not supported on varargs functions");
3694 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3695 // Potential tail calls could cause overwriting of argument stack slots.
3696 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
3697 (CallConv
== CallingConv::Fast
));
3698 unsigned PtrByteSize
= 8;
3699 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
3701 static const MCPhysReg GPR
[] = {
3702 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
3703 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
3705 static const MCPhysReg VR
[] = {
3706 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
3707 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
3710 const unsigned Num_GPR_Regs
= array_lengthof(GPR
);
3711 const unsigned Num_FPR_Regs
= useSoftFloat() ? 0 : 13;
3712 const unsigned Num_VR_Regs
= array_lengthof(VR
);
3713 const unsigned Num_QFPR_Regs
= Num_FPR_Regs
;
3715 // Do a first pass over the arguments to determine whether the ABI
3716 // guarantees that our caller has allocated the parameter save area
3717 // on its stack frame. In the ELFv1 ABI, this is always the case;
3718 // in the ELFv2 ABI, it is true if this is a vararg function or if
3719 // any parameter is located in a stack slot.
3721 bool HasParameterArea
= !isELFv2ABI
|| isVarArg
;
3722 unsigned ParamAreaSize
= Num_GPR_Regs
* PtrByteSize
;
3723 unsigned NumBytes
= LinkageSize
;
3724 unsigned AvailableFPRs
= Num_FPR_Regs
;
3725 unsigned AvailableVRs
= Num_VR_Regs
;
3726 for (unsigned i
= 0, e
= Ins
.size(); i
!= e
; ++i
) {
3727 if (Ins
[i
].Flags
.isNest())
3730 if (CalculateStackSlotUsed(Ins
[i
].VT
, Ins
[i
].ArgVT
, Ins
[i
].Flags
,
3731 PtrByteSize
, LinkageSize
, ParamAreaSize
,
3732 NumBytes
, AvailableFPRs
, AvailableVRs
,
3733 Subtarget
.hasQPX()))
3734 HasParameterArea
= true;
3737 // Add DAG nodes to load the arguments or copy them out of registers. On
3738 // entry to a function on PPC, the arguments start after the linkage area,
3739 // although the first ones are often in registers.
3741 unsigned ArgOffset
= LinkageSize
;
3742 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
3743 unsigned &QFPR_idx
= FPR_idx
;
3744 SmallVector
<SDValue
, 8> MemOps
;
3745 Function::const_arg_iterator FuncArg
= MF
.getFunction().arg_begin();
3746 unsigned CurArgIdx
= 0;
3747 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
; ++ArgNo
) {
3749 bool needsLoad
= false;
3750 EVT ObjectVT
= Ins
[ArgNo
].VT
;
3751 EVT OrigVT
= Ins
[ArgNo
].ArgVT
;
3752 unsigned ObjSize
= ObjectVT
.getStoreSize();
3753 unsigned ArgSize
= ObjSize
;
3754 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
3755 if (Ins
[ArgNo
].isOrigArg()) {
3756 std::advance(FuncArg
, Ins
[ArgNo
].getOrigArgIndex() - CurArgIdx
);
3757 CurArgIdx
= Ins
[ArgNo
].getOrigArgIndex();
3759 // We re-align the argument offset for each argument, except when using the
3760 // fast calling convention, when we need to make sure we do that only when
3761 // we'll actually use a stack slot.
3762 unsigned CurArgOffset
, Align
;
3763 auto ComputeArgOffset
= [&]() {
3764 /* Respect alignment of argument on the stack. */
3765 Align
= CalculateStackSlotAlignment(ObjectVT
, OrigVT
, Flags
, PtrByteSize
);
3766 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
3767 CurArgOffset
= ArgOffset
;
3770 if (CallConv
!= CallingConv::Fast
) {
3773 /* Compute GPR index associated with argument offset. */
3774 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
3775 GPR_idx
= std::min(GPR_idx
, Num_GPR_Regs
);
3778 // FIXME the codegen can be much improved in some cases.
3779 // We do not have to keep everything in memory.
3780 if (Flags
.isByVal()) {
3781 assert(Ins
[ArgNo
].isOrigArg() && "Byval arguments cannot be implicit");
3783 if (CallConv
== CallingConv::Fast
)
3786 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3787 ObjSize
= Flags
.getByValSize();
3788 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3789 // Empty aggregate parameters do not take up registers. Examples:
3793 // etc. However, we have to provide a place-holder in InVals, so
3794 // pretend we have an 8-byte item at the current address for that
3797 int FI
= MFI
.CreateFixedObject(PtrByteSize
, ArgOffset
, true);
3798 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3799 InVals
.push_back(FIN
);
3803 // Create a stack object covering all stack doublewords occupied
3804 // by the argument. If the argument is (fully or partially) on
3805 // the stack, or if the argument is fully in registers but the
3806 // caller has allocated the parameter save anyway, we can refer
3807 // directly to the caller's stack frame. Otherwise, create a
3808 // local copy in our own frame.
3810 if (HasParameterArea
||
3811 ArgSize
+ ArgOffset
> LinkageSize
+ Num_GPR_Regs
* PtrByteSize
)
3812 FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, false, true);
3814 FI
= MFI
.CreateStackObject(ArgSize
, Align
, false);
3815 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3817 // Handle aggregates smaller than 8 bytes.
3818 if (ObjSize
< PtrByteSize
) {
3819 // The value of the object is its address, which differs from the
3820 // address of the enclosing doubleword on big-endian systems.
3822 if (!isLittleEndian
) {
3823 SDValue ArgOff
= DAG
.getConstant(PtrByteSize
- ObjSize
, dl
, PtrVT
);
3824 Arg
= DAG
.getNode(ISD::ADD
, dl
, ArgOff
.getValueType(), Arg
, ArgOff
);
3826 InVals
.push_back(Arg
);
3828 if (GPR_idx
!= Num_GPR_Regs
) {
3829 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3830 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3831 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3834 if (ObjSize
==1 || ObjSize
==2 || ObjSize
==4) {
3835 EVT ObjType
= (ObjSize
== 1 ? MVT::i8
:
3836 (ObjSize
== 2 ? MVT::i16
: MVT::i32
));
3837 Store
= DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, Arg
,
3838 MachinePointerInfo(&*FuncArg
), ObjType
);
3840 // For sizes that don't fit a truncating store (3, 5, 6, 7),
3841 // store the whole register as-is to the parameter save area
3843 Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
,
3844 MachinePointerInfo(&*FuncArg
));
3847 MemOps
.push_back(Store
);
3849 // Whether we copied from a register or not, advance the offset
3850 // into the parameter save area by a full doubleword.
3851 ArgOffset
+= PtrByteSize
;
3855 // The value of the object is its address, which is the address of
3856 // its first stack doubleword.
3857 InVals
.push_back(FIN
);
3859 // Store whatever pieces of the object are in registers to memory.
3860 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
3861 if (GPR_idx
== Num_GPR_Regs
)
3864 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
3865 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3866 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3869 SDValue Off
= DAG
.getConstant(j
, dl
, PtrVT
);
3870 Addr
= DAG
.getNode(ISD::ADD
, dl
, Off
.getValueType(), Addr
, Off
);
3872 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, Addr
,
3873 MachinePointerInfo(&*FuncArg
, j
));
3874 MemOps
.push_back(Store
);
3877 ArgOffset
+= ArgSize
;
3881 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
3882 default: llvm_unreachable("Unhandled argument type!");
3886 if (Flags
.isNest()) {
3887 // The 'nest' parameter, if any, is passed in R11.
3888 unsigned VReg
= MF
.addLiveIn(PPC::X11
, &PPC::G8RCRegClass
);
3889 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3891 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
3892 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
3897 // These can be scalar arguments or elements of an integer array type
3898 // passed directly. Clang may use those instead of "byval" aggregate
3899 // types to avoid forcing arguments to memory unnecessarily.
3900 if (GPR_idx
!= Num_GPR_Regs
) {
3901 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3902 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3903 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3905 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
3906 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3907 // value to MVT::i64 and then truncate to the correct register size.
3908 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
3910 if (CallConv
== CallingConv::Fast
)
3914 ArgSize
= PtrByteSize
;
3916 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
3922 // These can be scalar arguments or elements of a float array type
3923 // passed directly. The latter are used to implement ELFv2 homogenous
3924 // float aggregates.
3925 if (FPR_idx
!= Num_FPR_Regs
) {
3928 if (ObjectVT
== MVT::f32
)
3929 VReg
= MF
.addLiveIn(FPR
[FPR_idx
],
3930 Subtarget
.hasP8Vector()
3931 ? &PPC::VSSRCRegClass
3932 : &PPC::F4RCRegClass
);
3934 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], Subtarget
.hasVSX()
3935 ? &PPC::VSFRCRegClass
3936 : &PPC::F8RCRegClass
);
3938 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
3940 } else if (GPR_idx
!= Num_GPR_Regs
&& CallConv
!= CallingConv::Fast
) {
3941 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3942 // once we support fp <-> gpr moves.
3944 // This can only ever happen in the presence of f32 array types,
3945 // since otherwise we never run out of FPRs before running out
3947 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3948 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3949 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3951 if (ObjectVT
== MVT::f32
) {
3952 if ((ArgOffset
% PtrByteSize
) == (isLittleEndian
? 4 : 0))
3953 ArgVal
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, ArgVal
,
3954 DAG
.getConstant(32, dl
, MVT::i32
));
3955 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, ArgVal
);
3958 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, ObjectVT
, ArgVal
);
3960 if (CallConv
== CallingConv::Fast
)
3966 // When passing an array of floats, the array occupies consecutive
3967 // space in the argument area; only round up to the next doubleword
3968 // at the end of the array. Otherwise, each float takes 8 bytes.
3969 if (CallConv
!= CallingConv::Fast
|| needsLoad
) {
3970 ArgSize
= Flags
.isInConsecutiveRegs() ? ObjSize
: PtrByteSize
;
3971 ArgOffset
+= ArgSize
;
3972 if (Flags
.isInConsecutiveRegsLast())
3973 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3984 if (!Subtarget
.hasQPX()) {
3985 // These can be scalar arguments or elements of a vector array type
3986 // passed directly. The latter are used to implement ELFv2 homogenous
3987 // vector aggregates.
3988 if (VR_idx
!= Num_VR_Regs
) {
3989 unsigned VReg
= MF
.addLiveIn(VR
[VR_idx
], &PPC::VRRCRegClass
);
3990 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
3993 if (CallConv
== CallingConv::Fast
)
3997 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
4002 assert(ObjectVT
.getSimpleVT().SimpleTy
== MVT::v4f32
&&
4003 "Invalid QPX parameter type");
4008 // QPX vectors are treated like their scalar floating-point subregisters
4009 // (except that they're larger).
4010 unsigned Sz
= ObjectVT
.getSimpleVT().SimpleTy
== MVT::v4f32
? 16 : 32;
4011 if (QFPR_idx
!= Num_QFPR_Regs
) {
4012 const TargetRegisterClass
*RC
;
4013 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
4014 case MVT::v4f64
: RC
= &PPC::QFRCRegClass
; break;
4015 case MVT::v4f32
: RC
= &PPC::QSRCRegClass
; break;
4016 default: RC
= &PPC::QBRCRegClass
; break;
4019 unsigned VReg
= MF
.addLiveIn(QFPR
[QFPR_idx
], RC
);
4020 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4023 if (CallConv
== CallingConv::Fast
)
4027 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
4032 // We need to load the argument to a virtual register if we determined
4033 // above that we ran out of physical registers of the appropriate type.
4035 if (ObjSize
< ArgSize
&& !isLittleEndian
)
4036 CurArgOffset
+= ArgSize
- ObjSize
;
4037 int FI
= MFI
.CreateFixedObject(ObjSize
, CurArgOffset
, isImmutable
);
4038 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4039 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Chain
, FIN
, MachinePointerInfo());
4042 InVals
.push_back(ArgVal
);
4045 // Area that is at least reserved in the caller of this function.
4046 unsigned MinReservedArea
;
4047 if (HasParameterArea
)
4048 MinReservedArea
= std::max(ArgOffset
, LinkageSize
+ 8 * PtrByteSize
);
4050 MinReservedArea
= LinkageSize
;
4052 // Set the size that is at least reserved in caller of this function. Tail
4053 // call optimized functions' reserved stack space needs to be aligned so that
4054 // taking the difference between two stack areas will result in an aligned
4057 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
4058 FuncInfo
->setMinReservedArea(MinReservedArea
);
4060 // If the function takes variable number of arguments, make a frame index for
4061 // the start of the first vararg value... for expansion of llvm.va_start.
4063 int Depth
= ArgOffset
;
4065 FuncInfo
->setVarArgsFrameIndex(
4066 MFI
.CreateFixedObject(PtrByteSize
, Depth
, true));
4067 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4069 // If this function is vararg, store any remaining integer argument regs
4070 // to their spots on the stack so that they may be loaded by dereferencing
4071 // the result of va_next.
4072 for (GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
4073 GPR_idx
< Num_GPR_Regs
; ++GPR_idx
) {
4074 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4075 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4077 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4078 MemOps
.push_back(Store
);
4079 // Increment the address by four for the next argument to store
4080 SDValue PtrOff
= DAG
.getConstant(PtrByteSize
, dl
, PtrVT
);
4081 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4085 if (!MemOps
.empty())
4086 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
4091 SDValue
PPCTargetLowering::LowerFormalArguments_Darwin(
4092 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
4093 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
4094 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
4095 // TODO: add description of PPC stack frame format, or at least some docs.
4097 MachineFunction
&MF
= DAG
.getMachineFunction();
4098 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
4099 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
4101 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
4102 bool isPPC64
= PtrVT
== MVT::i64
;
4103 // Potential tail calls could cause overwriting of argument stack slots.
4104 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
4105 (CallConv
== CallingConv::Fast
));
4106 unsigned PtrByteSize
= isPPC64
? 8 : 4;
4107 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4108 unsigned ArgOffset
= LinkageSize
;
4109 // Area that is at least reserved in caller of this function.
4110 unsigned MinReservedArea
= ArgOffset
;
4112 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
4113 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
4114 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
4116 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
4117 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4118 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4120 static const MCPhysReg VR
[] = {
4121 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4122 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4125 const unsigned Num_GPR_Regs
= array_lengthof(GPR_32
);
4126 const unsigned Num_FPR_Regs
= useSoftFloat() ? 0 : 13;
4127 const unsigned Num_VR_Regs
= array_lengthof( VR
);
4129 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
4131 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
4133 // In 32-bit non-varargs functions, the stack space for vectors is after the
4134 // stack space for non-vectors. We do not use this space unless we have
4135 // too many vectors to fit in registers, something that only occurs in
4136 // constructed examples:), but we have to walk the arglist to figure
4137 // that out...for the pathological case, compute VecArgOffset as the
4138 // start of the vector parameter area. Computing VecArgOffset is the
4139 // entire point of the following loop.
4140 unsigned VecArgOffset
= ArgOffset
;
4141 if (!isVarArg
&& !isPPC64
) {
4142 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
;
4144 EVT ObjectVT
= Ins
[ArgNo
].VT
;
4145 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
4147 if (Flags
.isByVal()) {
4148 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4149 unsigned ObjSize
= Flags
.getByValSize();
4151 ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4152 VecArgOffset
+= ArgSize
;
4156 switch(ObjectVT
.getSimpleVT().SimpleTy
) {
4157 default: llvm_unreachable("Unhandled argument type!");
4163 case MVT::i64
: // PPC64
4165 // FIXME: We are guaranteed to be !isPPC64 at this point.
4166 // Does MVT::i64 apply?
4173 // Nothing to do, we're only looking at Nonvector args here.
4178 // We've found where the vector parameter area in memory is. Skip the
4179 // first 12 parameters; these don't use that memory.
4180 VecArgOffset
= ((VecArgOffset
+15)/16)*16;
4181 VecArgOffset
+= 12*16;
4183 // Add DAG nodes to load the arguments or copy them out of registers. On
4184 // entry to a function on PPC, the arguments start after the linkage area,
4185 // although the first ones are often in registers.
4187 SmallVector
<SDValue
, 8> MemOps
;
4188 unsigned nAltivecParamsAtEnd
= 0;
4189 Function::const_arg_iterator FuncArg
= MF
.getFunction().arg_begin();
4190 unsigned CurArgIdx
= 0;
4191 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
; ++ArgNo
) {
4193 bool needsLoad
= false;
4194 EVT ObjectVT
= Ins
[ArgNo
].VT
;
4195 unsigned ObjSize
= ObjectVT
.getSizeInBits()/8;
4196 unsigned ArgSize
= ObjSize
;
4197 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
4198 if (Ins
[ArgNo
].isOrigArg()) {
4199 std::advance(FuncArg
, Ins
[ArgNo
].getOrigArgIndex() - CurArgIdx
);
4200 CurArgIdx
= Ins
[ArgNo
].getOrigArgIndex();
4202 unsigned CurArgOffset
= ArgOffset
;
4204 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4205 if (ObjectVT
==MVT::v4f32
|| ObjectVT
==MVT::v4i32
||
4206 ObjectVT
==MVT::v8i16
|| ObjectVT
==MVT::v16i8
) {
4207 if (isVarArg
|| isPPC64
) {
4208 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
4209 MinReservedArea
+= CalculateStackSlotSize(ObjectVT
,
4212 } else nAltivecParamsAtEnd
++;
4214 // Calculate min reserved area.
4215 MinReservedArea
+= CalculateStackSlotSize(Ins
[ArgNo
].VT
,
4219 // FIXME the codegen can be much improved in some cases.
4220 // We do not have to keep everything in memory.
4221 if (Flags
.isByVal()) {
4222 assert(Ins
[ArgNo
].isOrigArg() && "Byval arguments cannot be implicit");
4224 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4225 ObjSize
= Flags
.getByValSize();
4226 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4227 // Objects of size 1 and 2 are right justified, everything else is
4228 // left justified. This means the memory address is adjusted forwards.
4229 if (ObjSize
==1 || ObjSize
==2) {
4230 CurArgOffset
= CurArgOffset
+ (4 - ObjSize
);
4232 // The value of the object is its address.
4233 int FI
= MFI
.CreateFixedObject(ObjSize
, CurArgOffset
, false, true);
4234 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4235 InVals
.push_back(FIN
);
4236 if (ObjSize
==1 || ObjSize
==2) {
4237 if (GPR_idx
!= Num_GPR_Regs
) {
4240 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4242 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4243 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4244 EVT ObjType
= ObjSize
== 1 ? MVT::i8
: MVT::i16
;
4246 DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, FIN
,
4247 MachinePointerInfo(&*FuncArg
), ObjType
);
4248 MemOps
.push_back(Store
);
4252 ArgOffset
+= PtrByteSize
;
4256 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
4257 // Store whatever pieces of the object are in registers
4258 // to memory. ArgOffset will be the address of the beginning
4260 if (GPR_idx
!= Num_GPR_Regs
) {
4263 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4265 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4266 int FI
= MFI
.CreateFixedObject(PtrByteSize
, ArgOffset
, true);
4267 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4268 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4269 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
,
4270 MachinePointerInfo(&*FuncArg
, j
));
4271 MemOps
.push_back(Store
);
4273 ArgOffset
+= PtrByteSize
;
4275 ArgOffset
+= ArgSize
- (ArgOffset
-CurArgOffset
);
4282 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
4283 default: llvm_unreachable("Unhandled argument type!");
4287 if (GPR_idx
!= Num_GPR_Regs
) {
4288 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4289 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i32
);
4291 if (ObjectVT
== MVT::i1
)
4292 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, ArgVal
);
4297 ArgSize
= PtrByteSize
;
4299 // All int arguments reserve stack space in the Darwin ABI.
4300 ArgOffset
+= PtrByteSize
;
4304 case MVT::i64
: // PPC64
4305 if (GPR_idx
!= Num_GPR_Regs
) {
4306 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4307 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
4309 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
4310 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4311 // value to MVT::i64 and then truncate to the correct register size.
4312 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
4317 ArgSize
= PtrByteSize
;
4319 // All int arguments reserve stack space in the Darwin ABI.
4325 // Every 4 bytes of argument space consumes one of the GPRs available for
4326 // argument passing.
4327 if (GPR_idx
!= Num_GPR_Regs
) {
4329 if (ObjSize
== 8 && GPR_idx
!= Num_GPR_Regs
&& !isPPC64
)
4332 if (FPR_idx
!= Num_FPR_Regs
) {
4335 if (ObjectVT
== MVT::f32
)
4336 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], &PPC::F4RCRegClass
);
4338 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], &PPC::F8RCRegClass
);
4340 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4346 // All FP arguments reserve stack space in the Darwin ABI.
4347 ArgOffset
+= isPPC64
? 8 : ObjSize
;
4353 // Note that vector arguments in registers don't reserve stack space,
4354 // except in varargs functions.
4355 if (VR_idx
!= Num_VR_Regs
) {
4356 unsigned VReg
= MF
.addLiveIn(VR
[VR_idx
], &PPC::VRRCRegClass
);
4357 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4359 while ((ArgOffset
% 16) != 0) {
4360 ArgOffset
+= PtrByteSize
;
4361 if (GPR_idx
!= Num_GPR_Regs
)
4365 GPR_idx
= std::min(GPR_idx
+4, Num_GPR_Regs
); // FIXME correct for ppc64?
4369 if (!isVarArg
&& !isPPC64
) {
4370 // Vectors go after all the nonvectors.
4371 CurArgOffset
= VecArgOffset
;
4374 // Vectors are aligned.
4375 ArgOffset
= ((ArgOffset
+15)/16)*16;
4376 CurArgOffset
= ArgOffset
;
4384 // We need to load the argument to a virtual register if we determined above
4385 // that we ran out of physical registers of the appropriate type.
4387 int FI
= MFI
.CreateFixedObject(ObjSize
,
4388 CurArgOffset
+ (ArgSize
- ObjSize
),
4390 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4391 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Chain
, FIN
, MachinePointerInfo());
4394 InVals
.push_back(ArgVal
);
4397 // Allow for Altivec parameters at the end, if needed.
4398 if (nAltivecParamsAtEnd
) {
4399 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
4400 MinReservedArea
+= 16*nAltivecParamsAtEnd
;
4403 // Area that is at least reserved in the caller of this function.
4404 MinReservedArea
= std::max(MinReservedArea
, LinkageSize
+ 8 * PtrByteSize
);
4406 // Set the size that is at least reserved in caller of this function. Tail
4407 // call optimized functions' reserved stack space needs to be aligned so that
4408 // taking the difference between two stack areas will result in an aligned
4411 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
4412 FuncInfo
->setMinReservedArea(MinReservedArea
);
4414 // If the function takes variable number of arguments, make a frame index for
4415 // the start of the first vararg value... for expansion of llvm.va_start.
4417 int Depth
= ArgOffset
;
4419 FuncInfo
->setVarArgsFrameIndex(
4420 MFI
.CreateFixedObject(PtrVT
.getSizeInBits()/8,
4422 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4424 // If this function is vararg, store any remaining integer argument regs
4425 // to their spots on the stack so that they may be loaded by dereferencing
4426 // the result of va_next.
4427 for (; GPR_idx
!= Num_GPR_Regs
; ++GPR_idx
) {
4431 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4433 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4435 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4437 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4438 MemOps
.push_back(Store
);
4439 // Increment the address by four for the next argument to store
4440 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, dl
, PtrVT
);
4441 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4445 if (!MemOps
.empty())
4446 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
4451 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4452 /// adjusted to accommodate the arguments for the tailcall.
4453 static int CalculateTailCallSPDiff(SelectionDAG
& DAG
, bool isTailCall
,
4454 unsigned ParamSize
) {
4456 if (!isTailCall
) return 0;
4458 PPCFunctionInfo
*FI
= DAG
.getMachineFunction().getInfo
<PPCFunctionInfo
>();
4459 unsigned CallerMinReservedArea
= FI
->getMinReservedArea();
4460 int SPDiff
= (int)CallerMinReservedArea
- (int)ParamSize
;
4461 // Remember only if the new adjustment is bigger.
4462 if (SPDiff
< FI
->getTailCallSPDelta())
4463 FI
->setTailCallSPDelta(SPDiff
);
4468 static bool isFunctionGlobalAddress(SDValue Callee
);
4471 callsShareTOCBase(const Function
*Caller
, SDValue Callee
,
4472 const TargetMachine
&TM
) {
4473 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4474 // don't have enough information to determine if the caller and calle share
4475 // the same TOC base, so we have to pessimistically assume they don't for
4477 GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
4481 const GlobalValue
*GV
= G
->getGlobal();
4482 // The medium and large code models are expected to provide a sufficiently
4483 // large TOC to provide all data addressing needs of a module with a
4484 // single TOC. Since each module will be addressed with a single TOC then we
4485 // only need to check that caller and callee don't cross dso boundaries.
4486 if (CodeModel::Medium
== TM
.getCodeModel() ||
4487 CodeModel::Large
== TM
.getCodeModel())
4488 return TM
.shouldAssumeDSOLocal(*Caller
->getParent(), GV
);
4490 // Otherwise we need to ensure callee and caller are in the same section,
4491 // since the linker may allocate multiple TOCs, and we don't know which
4492 // sections will belong to the same TOC base.
4494 if (!GV
->isStrongDefinitionForLinker())
4497 // Any explicitly-specified sections and section prefixes must also match.
4498 // Also, if we're using -ffunction-sections, then each function is always in
4499 // a different section (the same is true for COMDAT functions).
4500 if (TM
.getFunctionSections() || GV
->hasComdat() || Caller
->hasComdat() ||
4501 GV
->getSection() != Caller
->getSection())
4503 if (const auto *F
= dyn_cast
<Function
>(GV
)) {
4504 if (F
->getSectionPrefix() != Caller
->getSectionPrefix())
4508 // If the callee might be interposed, then we can't assume the ultimate call
4509 // target will be in the same section. Even in cases where we can assume that
4510 // interposition won't happen, in any case where the linker might insert a
4511 // stub to allow for interposition, we must generate code as though
4512 // interposition might occur. To understand why this matters, consider a
4513 // situation where: a -> b -> c where the arrows indicate calls. b and c are
4514 // in the same section, but a is in a different module (i.e. has a different
4515 // TOC base pointer). If the linker allows for interposition between b and c,
4516 // then it will generate a stub for the call edge between b and c which will
4517 // save the TOC pointer into the designated stack slot allocated by b. If we
4518 // return true here, and therefore allow a tail call between b and c, that
4519 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4520 // pointer into the stack slot allocated by a (where the a -> b stub saved
4521 // a's TOC base pointer). If we're not considering a tail call, but rather,
4522 // whether a nop is needed after the call instruction in b, because the linker
4523 // will insert a stub, it might complain about a missing nop if we omit it
4524 // (although many don't complain in this case).
4525 if (!TM
.shouldAssumeDSOLocal(*Caller
->getParent(), GV
))
4532 needStackSlotPassParameters(const PPCSubtarget
&Subtarget
,
4533 const SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
4534 assert(Subtarget
.is64BitELFABI());
4536 const unsigned PtrByteSize
= 8;
4537 const unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4539 static const MCPhysReg GPR
[] = {
4540 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4541 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4543 static const MCPhysReg VR
[] = {
4544 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4545 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4548 const unsigned NumGPRs
= array_lengthof(GPR
);
4549 const unsigned NumFPRs
= 13;
4550 const unsigned NumVRs
= array_lengthof(VR
);
4551 const unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
4553 unsigned NumBytes
= LinkageSize
;
4554 unsigned AvailableFPRs
= NumFPRs
;
4555 unsigned AvailableVRs
= NumVRs
;
4557 for (const ISD::OutputArg
& Param
: Outs
) {
4558 if (Param
.Flags
.isNest()) continue;
4560 if (CalculateStackSlotUsed(Param
.VT
, Param
.ArgVT
, Param
.Flags
,
4561 PtrByteSize
, LinkageSize
, ParamAreaSize
,
4562 NumBytes
, AvailableFPRs
, AvailableVRs
,
4563 Subtarget
.hasQPX()))
4570 hasSameArgumentList(const Function
*CallerFn
, ImmutableCallSite CS
) {
4571 if (CS
.arg_size() != CallerFn
->arg_size())
4574 ImmutableCallSite::arg_iterator CalleeArgIter
= CS
.arg_begin();
4575 ImmutableCallSite::arg_iterator CalleeArgEnd
= CS
.arg_end();
4576 Function::const_arg_iterator CallerArgIter
= CallerFn
->arg_begin();
4578 for (; CalleeArgIter
!= CalleeArgEnd
; ++CalleeArgIter
, ++CallerArgIter
) {
4579 const Value
* CalleeArg
= *CalleeArgIter
;
4580 const Value
* CallerArg
= &(*CallerArgIter
);
4581 if (CalleeArg
== CallerArg
)
4584 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4585 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4587 // 1st argument of callee is undef and has the same type as caller.
4588 if (CalleeArg
->getType() == CallerArg
->getType() &&
4589 isa
<UndefValue
>(CalleeArg
))
4598 // Returns true if TCO is possible between the callers and callees
4599 // calling conventions.
4601 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC
,
4602 CallingConv::ID CalleeCC
) {
4603 // Tail calls are possible with fastcc and ccc.
4604 auto isTailCallableCC
= [] (CallingConv::ID CC
){
4605 return CC
== CallingConv::C
|| CC
== CallingConv::Fast
;
4607 if (!isTailCallableCC(CallerCC
) || !isTailCallableCC(CalleeCC
))
4610 // We can safely tail call both fastcc and ccc callees from a c calling
4611 // convention caller. If the caller is fastcc, we may have less stack space
4612 // than a non-fastcc caller with the same signature so disable tail-calls in
4614 return CallerCC
== CallingConv::C
|| CallerCC
== CalleeCC
;
4618 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4620 CallingConv::ID CalleeCC
,
4621 ImmutableCallSite CS
,
4623 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
4624 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
4625 SelectionDAG
& DAG
) const {
4626 bool TailCallOpt
= getTargetMachine().Options
.GuaranteedTailCallOpt
;
4628 if (DisableSCO
&& !TailCallOpt
) return false;
4630 // Variadic argument functions are not supported.
4631 if (isVarArg
) return false;
4633 auto &Caller
= DAG
.getMachineFunction().getFunction();
4634 // Check that the calling conventions are compatible for tco.
4635 if (!areCallingConvEligibleForTCO_64SVR4(Caller
.getCallingConv(), CalleeCC
))
4638 // Caller contains any byval parameter is not supported.
4639 if (any_of(Ins
, [](const ISD::InputArg
&IA
) { return IA
.Flags
.isByVal(); }))
4642 // Callee contains any byval parameter is not supported, too.
4643 // Note: This is a quick work around, because in some cases, e.g.
4644 // caller's stack size > callee's stack size, we are still able to apply
4645 // sibling call optimization. For example, gcc is able to do SCO for caller1
4646 // in the following example, but not for caller2.
4651 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4655 // void caller1(struct test a, struct test c, struct test *b) {
4656 // callee(gTest, b); }
4657 // void caller2(struct test *b) { callee(gTest, b); }
4658 if (any_of(Outs
, [](const ISD::OutputArg
& OA
) { return OA
.Flags
.isByVal(); }))
4661 // If callee and caller use different calling conventions, we cannot pass
4662 // parameters on stack since offsets for the parameter area may be different.
4663 if (Caller
.getCallingConv() != CalleeCC
&&
4664 needStackSlotPassParameters(Subtarget
, Outs
))
4667 // No TCO/SCO on indirect call because Caller have to restore its TOC
4668 if (!isFunctionGlobalAddress(Callee
) &&
4669 !isa
<ExternalSymbolSDNode
>(Callee
))
4672 // If the caller and callee potentially have different TOC bases then we
4673 // cannot tail call since we need to restore the TOC pointer after the call.
4674 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4675 if (!callsShareTOCBase(&Caller
, Callee
, getTargetMachine()))
4678 // TCO allows altering callee ABI, so we don't have to check further.
4679 if (CalleeCC
== CallingConv::Fast
&& TailCallOpt
)
4682 if (DisableSCO
) return false;
4684 // If callee use the same argument list that caller is using, then we can
4685 // apply SCO on this case. If it is not, then we need to check if callee needs
4686 // stack for passing arguments.
4687 if (!hasSameArgumentList(&Caller
, CS
) &&
4688 needStackSlotPassParameters(Subtarget
, Outs
)) {
4695 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4696 /// for tail call optimization. Targets which want to do tail call
4697 /// optimization should implement this function.
4699 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee
,
4700 CallingConv::ID CalleeCC
,
4702 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
4703 SelectionDAG
& DAG
) const {
4704 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
4707 // Variable argument functions are not supported.
4711 MachineFunction
&MF
= DAG
.getMachineFunction();
4712 CallingConv::ID CallerCC
= MF
.getFunction().getCallingConv();
4713 if (CalleeCC
== CallingConv::Fast
&& CallerCC
== CalleeCC
) {
4714 // Functions containing by val parameters are not supported.
4715 for (unsigned i
= 0; i
!= Ins
.size(); i
++) {
4716 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
4717 if (Flags
.isByVal()) return false;
4720 // Non-PIC/GOT tail calls are supported.
4721 if (getTargetMachine().getRelocationModel() != Reloc::PIC_
)
4724 // At the moment we can only do local tail calls (in same module, hidden
4725 // or protected) if we are generating PIC.
4726 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
4727 return G
->getGlobal()->hasHiddenVisibility()
4728 || G
->getGlobal()->hasProtectedVisibility();
4734 /// isCallCompatibleAddress - Return the immediate to use if the specified
4735 /// 32-bit value is representable in the immediate field of a BxA instruction.
4736 static SDNode
*isBLACompatibleAddress(SDValue Op
, SelectionDAG
&DAG
) {
4737 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
4738 if (!C
) return nullptr;
4740 int Addr
= C
->getZExtValue();
4741 if ((Addr
& 3) != 0 || // Low 2 bits are implicitly zero.
4742 SignExtend32
<26>(Addr
) != Addr
)
4743 return nullptr; // Top 6 bits have to be sext of immediate.
4747 (int)C
->getZExtValue() >> 2, SDLoc(Op
),
4748 DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout()))
4754 struct TailCallArgumentInfo
{
4759 TailCallArgumentInfo() = default;
4762 } // end anonymous namespace
4764 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4765 static void StoreTailCallArgumentsToStackSlot(
4766 SelectionDAG
&DAG
, SDValue Chain
,
4767 const SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArgs
,
4768 SmallVectorImpl
<SDValue
> &MemOpChains
, const SDLoc
&dl
) {
4769 for (unsigned i
= 0, e
= TailCallArgs
.size(); i
!= e
; ++i
) {
4770 SDValue Arg
= TailCallArgs
[i
].Arg
;
4771 SDValue FIN
= TailCallArgs
[i
].FrameIdxOp
;
4772 int FI
= TailCallArgs
[i
].FrameIdx
;
4773 // Store relative to framepointer.
4774 MemOpChains
.push_back(DAG
.getStore(
4775 Chain
, dl
, Arg
, FIN
,
4776 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
)));
4780 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4781 /// the appropriate stack slot for the tail call optimized function call.
4782 static SDValue
EmitTailCallStoreFPAndRetAddr(SelectionDAG
&DAG
, SDValue Chain
,
4783 SDValue OldRetAddr
, SDValue OldFP
,
4784 int SPDiff
, const SDLoc
&dl
) {
4786 // Calculate the new stack slot for the return address.
4787 MachineFunction
&MF
= DAG
.getMachineFunction();
4788 const PPCSubtarget
&Subtarget
= MF
.getSubtarget
<PPCSubtarget
>();
4789 const PPCFrameLowering
*FL
= Subtarget
.getFrameLowering();
4790 bool isPPC64
= Subtarget
.isPPC64();
4791 int SlotSize
= isPPC64
? 8 : 4;
4792 int NewRetAddrLoc
= SPDiff
+ FL
->getReturnSaveOffset();
4793 int NewRetAddr
= MF
.getFrameInfo().CreateFixedObject(SlotSize
,
4794 NewRetAddrLoc
, true);
4795 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
4796 SDValue NewRetAddrFrIdx
= DAG
.getFrameIndex(NewRetAddr
, VT
);
4797 Chain
= DAG
.getStore(Chain
, dl
, OldRetAddr
, NewRetAddrFrIdx
,
4798 MachinePointerInfo::getFixedStack(MF
, NewRetAddr
));
4800 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4801 // slot as the FP is never overwritten.
4802 if (Subtarget
.isDarwinABI()) {
4803 int NewFPLoc
= SPDiff
+ FL
->getFramePointerSaveOffset();
4804 int NewFPIdx
= MF
.getFrameInfo().CreateFixedObject(SlotSize
, NewFPLoc
,
4806 SDValue NewFramePtrIdx
= DAG
.getFrameIndex(NewFPIdx
, VT
);
4807 Chain
= DAG
.getStore(Chain
, dl
, OldFP
, NewFramePtrIdx
,
4808 MachinePointerInfo::getFixedStack(
4809 DAG
.getMachineFunction(), NewFPIdx
));
4815 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4816 /// the position of the argument.
4818 CalculateTailCallArgDest(SelectionDAG
&DAG
, MachineFunction
&MF
, bool isPPC64
,
4819 SDValue Arg
, int SPDiff
, unsigned ArgOffset
,
4820 SmallVectorImpl
<TailCallArgumentInfo
>& TailCallArguments
) {
4821 int Offset
= ArgOffset
+ SPDiff
;
4822 uint32_t OpSize
= (Arg
.getValueSizeInBits() + 7) / 8;
4823 int FI
= MF
.getFrameInfo().CreateFixedObject(OpSize
, Offset
, true);
4824 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
4825 SDValue FIN
= DAG
.getFrameIndex(FI
, VT
);
4826 TailCallArgumentInfo Info
;
4828 Info
.FrameIdxOp
= FIN
;
4830 TailCallArguments
.push_back(Info
);
4833 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4834 /// stack slot. Returns the chain as result and the loaded frame pointers in
4835 /// LROpOut/FPOpout. Used when tail calling.
4836 SDValue
PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4837 SelectionDAG
&DAG
, int SPDiff
, SDValue Chain
, SDValue
&LROpOut
,
4838 SDValue
&FPOpOut
, const SDLoc
&dl
) const {
4840 // Load the LR and FP stack slot for later adjusting.
4841 EVT VT
= Subtarget
.isPPC64() ? MVT::i64
: MVT::i32
;
4842 LROpOut
= getReturnAddrFrameIndex(DAG
);
4843 LROpOut
= DAG
.getLoad(VT
, dl
, Chain
, LROpOut
, MachinePointerInfo());
4844 Chain
= SDValue(LROpOut
.getNode(), 1);
4846 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4847 // slot as the FP is never overwritten.
4848 if (Subtarget
.isDarwinABI()) {
4849 FPOpOut
= getFramePointerFrameIndex(DAG
);
4850 FPOpOut
= DAG
.getLoad(VT
, dl
, Chain
, FPOpOut
, MachinePointerInfo());
4851 Chain
= SDValue(FPOpOut
.getNode(), 1);
4857 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4858 /// by "Src" to address "Dst" of size "Size". Alignment information is
4859 /// specified by the specific parameter attribute. The copy will be passed as
4860 /// a byval function parameter.
4861 /// Sometimes what we are copying is the end of a larger object, the part that
4862 /// does not fit in registers.
4863 static SDValue
CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
,
4864 SDValue Chain
, ISD::ArgFlagsTy Flags
,
4865 SelectionDAG
&DAG
, const SDLoc
&dl
) {
4866 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), dl
, MVT::i32
);
4867 return DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
4868 false, false, false, MachinePointerInfo(),
4869 MachinePointerInfo());
4872 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4874 static void LowerMemOpCallTo(
4875 SelectionDAG
&DAG
, MachineFunction
&MF
, SDValue Chain
, SDValue Arg
,
4876 SDValue PtrOff
, int SPDiff
, unsigned ArgOffset
, bool isPPC64
,
4877 bool isTailCall
, bool isVector
, SmallVectorImpl
<SDValue
> &MemOpChains
,
4878 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
, const SDLoc
&dl
) {
4879 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
4884 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
4886 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
4887 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
4888 DAG
.getConstant(ArgOffset
, dl
, PtrVT
));
4890 MemOpChains
.push_back(
4891 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
4892 // Calculate and remember argument location.
4893 } else CalculateTailCallArgDest(DAG
, MF
, isPPC64
, Arg
, SPDiff
, ArgOffset
,
4898 PrepareTailCall(SelectionDAG
&DAG
, SDValue
&InFlag
, SDValue
&Chain
,
4899 const SDLoc
&dl
, int SPDiff
, unsigned NumBytes
, SDValue LROp
,
4901 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
) {
4902 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4903 // might overwrite each other in case of tail call optimization.
4904 SmallVector
<SDValue
, 8> MemOpChains2
;
4905 // Do not flag preceding copytoreg stuff together with the following stuff.
4907 StoreTailCallArgumentsToStackSlot(DAG
, Chain
, TailCallArguments
,
4909 if (!MemOpChains2
.empty())
4910 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains2
);
4912 // Store the return address to the appropriate stack slot.
4913 Chain
= EmitTailCallStoreFPAndRetAddr(DAG
, Chain
, LROp
, FPOp
, SPDiff
, dl
);
4915 // Emit callseq_end just before tailcall node.
4916 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
4917 DAG
.getIntPtrConstant(0, dl
, true), InFlag
, dl
);
4918 InFlag
= Chain
.getValue(1);
4921 // Is this global address that of a function that can be called by name? (as
4922 // opposed to something that must hold a descriptor for an indirect call).
4923 static bool isFunctionGlobalAddress(SDValue Callee
) {
4924 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
4925 if (Callee
.getOpcode() == ISD::GlobalTLSAddress
||
4926 Callee
.getOpcode() == ISD::TargetGlobalTLSAddress
)
4929 return G
->getGlobal()->getValueType()->isFunctionTy();
4936 PrepareCall(SelectionDAG
&DAG
, SDValue
&Callee
, SDValue
&InFlag
, SDValue
&Chain
,
4937 SDValue CallSeqStart
, const SDLoc
&dl
, int SPDiff
, bool isTailCall
,
4938 bool isPatchPoint
, bool hasNest
,
4939 SmallVectorImpl
<std::pair
<unsigned, SDValue
>> &RegsToPass
,
4940 SmallVectorImpl
<SDValue
> &Ops
, std::vector
<EVT
> &NodeTys
,
4941 ImmutableCallSite CS
, const PPCSubtarget
&Subtarget
) {
4942 bool isPPC64
= Subtarget
.isPPC64();
4943 bool isSVR4ABI
= Subtarget
.isSVR4ABI();
4944 bool is64BitELFv1ABI
= isPPC64
&& isSVR4ABI
&& !Subtarget
.isELFv2ABI();
4945 bool isAIXABI
= Subtarget
.isAIXABI();
4947 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
4948 NodeTys
.push_back(MVT::Other
); // Returns a chain
4949 NodeTys
.push_back(MVT::Glue
); // Returns a flag for retval copy to use.
4951 unsigned CallOpc
= PPCISD::CALL
;
4953 bool needIndirectCall
= true;
4954 if (!isSVR4ABI
|| !isPPC64
)
4955 if (SDNode
*Dest
= isBLACompatibleAddress(Callee
, DAG
)) {
4956 // If this is an absolute destination address, use the munged value.
4957 Callee
= SDValue(Dest
, 0);
4958 needIndirectCall
= false;
4961 // PC-relative references to external symbols should go through $stub, unless
4962 // we're building with the leopard linker or later, which automatically
4963 // synthesizes these stubs.
4964 const TargetMachine
&TM
= DAG
.getTarget();
4965 const Module
*Mod
= DAG
.getMachineFunction().getFunction().getParent();
4966 const GlobalValue
*GV
= nullptr;
4967 if (auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
4968 GV
= G
->getGlobal();
4969 bool Local
= TM
.shouldAssumeDSOLocal(*Mod
, GV
);
4970 bool UsePlt
= !Local
&& Subtarget
.isTargetELF() && !isPPC64
;
4972 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4973 // every direct call is) turn it into a TargetGlobalAddress /
4974 // TargetExternalSymbol node so that legalize doesn't hack it.
4975 if (isFunctionGlobalAddress(Callee
)) {
4976 GlobalAddressSDNode
*G
= cast
<GlobalAddressSDNode
>(Callee
);
4978 // A call to a TLS address is actually an indirect call to a
4979 // thread-specific pointer.
4980 unsigned OpFlags
= 0;
4982 OpFlags
= PPCII::MO_PLT
;
4984 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
,
4985 Callee
.getValueType(), 0, OpFlags
);
4986 needIndirectCall
= false;
4989 if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
4990 unsigned char OpFlags
= 0;
4993 OpFlags
= PPCII::MO_PLT
;
4995 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), Callee
.getValueType(),
4997 needIndirectCall
= false;
5001 // We'll form an invalid direct call when lowering a patchpoint; the full
5002 // sequence for an indirect call is complicated, and many of the
5003 // instructions introduced might have side effects (and, thus, can't be
5004 // removed later). The call itself will be removed as soon as the
5005 // argument/return lowering is complete, so the fact that it has the wrong
5006 // kind of operands should not really matter.
5007 needIndirectCall
= false;
5010 if (needIndirectCall
) {
5011 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
5012 // to do the call, we can't use PPCISD::CALL.
5013 SDValue MTCTROps
[] = {Chain
, Callee
, InFlag
};
5015 if (is64BitELFv1ABI
) {
5016 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5017 // entry point, but to the function descriptor (the function entry point
5018 // address is part of the function descriptor though).
5019 // The function descriptor is a three doubleword structure with the
5020 // following fields: function entry point, TOC base address and
5021 // environment pointer.
5022 // Thus for a call through a function pointer, the following actions need
5024 // 1. Save the TOC of the caller in the TOC save area of its stack
5025 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5026 // 2. Load the address of the function entry point from the function
5028 // 3. Load the TOC of the callee from the function descriptor into r2.
5029 // 4. Load the environment pointer from the function descriptor into
5031 // 5. Branch to the function entry point address.
5032 // 6. On return of the callee, the TOC of the caller needs to be
5033 // restored (this is done in FinishCall()).
5035 // The loads are scheduled at the beginning of the call sequence, and the
5036 // register copies are flagged together to ensure that no other
5037 // operations can be scheduled in between. E.g. without flagging the
5038 // copies together, a TOC access in the caller could be scheduled between
5039 // the assignment of the callee TOC and the branch to the callee, which
5040 // results in the TOC access going through the TOC of the callee instead
5041 // of going through the TOC of the caller, which leads to incorrect code.
5043 // Load the address of the function entry point from the function
5045 SDValue LDChain
= CallSeqStart
.getValue(CallSeqStart
->getNumValues()-1);
5046 if (LDChain
.getValueType() == MVT::Glue
)
5047 LDChain
= CallSeqStart
.getValue(CallSeqStart
->getNumValues()-2);
5049 auto MMOFlags
= Subtarget
.hasInvariantFunctionDescriptors()
5050 ? (MachineMemOperand::MODereferenceable
|
5051 MachineMemOperand::MOInvariant
)
5052 : MachineMemOperand::MONone
;
5054 MachinePointerInfo
MPI(CS
? CS
.getCalledValue() : nullptr);
5055 SDValue LoadFuncPtr
= DAG
.getLoad(MVT::i64
, dl
, LDChain
, Callee
, MPI
,
5056 /* Alignment = */ 8, MMOFlags
);
5058 // Load environment pointer into r11.
5059 SDValue PtrOff
= DAG
.getIntPtrConstant(16, dl
);
5060 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, Callee
, PtrOff
);
5061 SDValue LoadEnvPtr
=
5062 DAG
.getLoad(MVT::i64
, dl
, LDChain
, AddPtr
, MPI
.getWithOffset(16),
5063 /* Alignment = */ 8, MMOFlags
);
5065 SDValue TOCOff
= DAG
.getIntPtrConstant(8, dl
);
5066 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, Callee
, TOCOff
);
5068 DAG
.getLoad(MVT::i64
, dl
, LDChain
, AddTOC
, MPI
.getWithOffset(8),
5069 /* Alignment = */ 8, MMOFlags
);
5071 setUsesTOCBasePtr(DAG
);
5072 SDValue TOCVal
= DAG
.getCopyToReg(Chain
, dl
, PPC::X2
, TOCPtr
,
5074 Chain
= TOCVal
.getValue(0);
5075 InFlag
= TOCVal
.getValue(1);
5077 // If the function call has an explicit 'nest' parameter, it takes the
5078 // place of the environment pointer.
5080 SDValue EnvVal
= DAG
.getCopyToReg(Chain
, dl
, PPC::X11
, LoadEnvPtr
,
5083 Chain
= EnvVal
.getValue(0);
5084 InFlag
= EnvVal
.getValue(1);
5087 MTCTROps
[0] = Chain
;
5088 MTCTROps
[1] = LoadFuncPtr
;
5089 MTCTROps
[2] = InFlag
;
5092 Chain
= DAG
.getNode(PPCISD::MTCTR
, dl
, NodeTys
,
5093 makeArrayRef(MTCTROps
, InFlag
.getNode() ? 3 : 2));
5094 InFlag
= Chain
.getValue(1);
5097 NodeTys
.push_back(MVT::Other
);
5098 NodeTys
.push_back(MVT::Glue
);
5099 Ops
.push_back(Chain
);
5100 CallOpc
= PPCISD::BCTRL
;
5101 Callee
.setNode(nullptr);
5102 // Add use of X11 (holding environment pointer)
5103 if (is64BitELFv1ABI
&& !hasNest
)
5104 Ops
.push_back(DAG
.getRegister(PPC::X11
, PtrVT
));
5105 // Add CTR register as callee so a bctr can be emitted later.
5107 Ops
.push_back(DAG
.getRegister(isPPC64
? PPC::CTR8
: PPC::CTR
, PtrVT
));
5110 // If this is a direct call, pass the chain and the callee.
5111 if (Callee
.getNode()) {
5112 Ops
.push_back(Chain
);
5113 Ops
.push_back(Callee
);
5115 // If this is a tail call add stack pointer delta.
5117 Ops
.push_back(DAG
.getConstant(SPDiff
, dl
, MVT::i32
));
5119 // Add argument registers to the end of the list so that they are known live
5121 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
5122 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
5123 RegsToPass
[i
].second
.getValueType()));
5125 // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
5126 // live into the call.
5127 // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
5128 if ((isSVR4ABI
&& isPPC64
) || isAIXABI
) {
5129 setUsesTOCBasePtr(DAG
);
5131 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5132 // no way to mark dependencies as implicit here.
5133 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5135 Ops
.push_back(DAG
.getRegister(isPPC64
? PPC::X2
5142 SDValue
PPCTargetLowering::LowerCallResult(
5143 SDValue Chain
, SDValue InFlag
, CallingConv::ID CallConv
, bool isVarArg
,
5144 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5145 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
5146 SmallVector
<CCValAssign
, 16> RVLocs
;
5147 CCState
CCRetInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
5150 CCRetInfo
.AnalyzeCallResult(
5151 Ins
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
5155 // Copy all of the result registers out of their specified physreg.
5156 for (unsigned i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
5157 CCValAssign
&VA
= RVLocs
[i
];
5158 assert(VA
.isRegLoc() && "Can only return in registers!");
5162 if (Subtarget
.hasSPE() && VA
.getLocVT() == MVT::f64
) {
5163 SDValue Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
5165 Chain
= Lo
.getValue(1);
5166 InFlag
= Lo
.getValue(2);
5167 VA
= RVLocs
[++i
]; // skip ahead to next loc
5168 SDValue Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
5170 Chain
= Hi
.getValue(1);
5171 InFlag
= Hi
.getValue(2);
5172 if (!Subtarget
.isLittleEndian())
5174 Val
= DAG
.getNode(PPCISD::BUILD_SPE64
, dl
, MVT::f64
, Lo
, Hi
);
5176 Val
= DAG
.getCopyFromReg(Chain
, dl
,
5177 VA
.getLocReg(), VA
.getLocVT(), InFlag
);
5178 Chain
= Val
.getValue(1);
5179 InFlag
= Val
.getValue(2);
5182 switch (VA
.getLocInfo()) {
5183 default: llvm_unreachable("Unknown loc info!");
5184 case CCValAssign::Full
: break;
5185 case CCValAssign::AExt
:
5186 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5188 case CCValAssign::ZExt
:
5189 Val
= DAG
.getNode(ISD::AssertZext
, dl
, VA
.getLocVT(), Val
,
5190 DAG
.getValueType(VA
.getValVT()));
5191 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5193 case CCValAssign::SExt
:
5194 Val
= DAG
.getNode(ISD::AssertSext
, dl
, VA
.getLocVT(), Val
,
5195 DAG
.getValueType(VA
.getValVT()));
5196 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5200 InVals
.push_back(Val
);
5206 SDValue
PPCTargetLowering::FinishCall(
5207 CallingConv::ID CallConv
, const SDLoc
&dl
, bool isTailCall
, bool isVarArg
,
5208 bool isPatchPoint
, bool hasNest
, SelectionDAG
&DAG
,
5209 SmallVector
<std::pair
<unsigned, SDValue
>, 8> &RegsToPass
, SDValue InFlag
,
5210 SDValue Chain
, SDValue CallSeqStart
, SDValue
&Callee
, int SPDiff
,
5211 unsigned NumBytes
, const SmallVectorImpl
<ISD::InputArg
> &Ins
,
5212 SmallVectorImpl
<SDValue
> &InVals
, ImmutableCallSite CS
) const {
5213 std::vector
<EVT
> NodeTys
;
5214 SmallVector
<SDValue
, 8> Ops
;
5215 unsigned CallOpc
= PrepareCall(DAG
, Callee
, InFlag
, Chain
, CallSeqStart
, dl
,
5216 SPDiff
, isTailCall
, isPatchPoint
, hasNest
,
5217 RegsToPass
, Ops
, NodeTys
, CS
, Subtarget
);
5219 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5220 if (isVarArg
&& Subtarget
.isSVR4ABI() && !Subtarget
.isPPC64())
5221 Ops
.push_back(DAG
.getRegister(PPC::CR1EQ
, MVT::i32
));
5223 // When performing tail call optimization the callee pops its arguments off
5224 // the stack. Account for this here so these bytes can be pushed back on in
5225 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5226 int BytesCalleePops
=
5227 (CallConv
== CallingConv::Fast
&&
5228 getTargetMachine().Options
.GuaranteedTailCallOpt
) ? NumBytes
: 0;
5230 // Add a register mask operand representing the call-preserved registers.
5231 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
5232 const uint32_t *Mask
=
5233 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CallConv
);
5234 assert(Mask
&& "Missing call preserved mask for calling convention");
5235 Ops
.push_back(DAG
.getRegisterMask(Mask
));
5237 if (InFlag
.getNode())
5238 Ops
.push_back(InFlag
);
5242 assert(((Callee
.getOpcode() == ISD::Register
&&
5243 cast
<RegisterSDNode
>(Callee
)->getReg() == PPC::CTR
) ||
5244 Callee
.getOpcode() == ISD::TargetExternalSymbol
||
5245 Callee
.getOpcode() == ISD::TargetGlobalAddress
||
5246 isa
<ConstantSDNode
>(Callee
)) &&
5247 "Expecting an global address, external symbol, absolute value or register");
5249 DAG
.getMachineFunction().getFrameInfo().setHasTailCall();
5250 return DAG
.getNode(PPCISD::TC_RETURN
, dl
, MVT::Other
, Ops
);
5253 // Add a NOP immediately after the branch instruction when using the 64-bit
5254 // SVR4 or the AIX ABI.
5255 // At link time, if caller and callee are in a different module and
5256 // thus have a different TOC, the call will be replaced with a call to a stub
5257 // function which saves the current TOC, loads the TOC of the callee and
5258 // branches to the callee. The NOP will be replaced with a load instruction
5259 // which restores the TOC of the caller from the TOC save slot of the current
5260 // stack frame. If caller and callee belong to the same module (and have the
5261 // same TOC), the NOP will remain unchanged, or become some other NOP.
5263 MachineFunction
&MF
= DAG
.getMachineFunction();
5264 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5265 if (!isTailCall
&& !isPatchPoint
&&
5266 ((Subtarget
.isSVR4ABI() && Subtarget
.isPPC64()) ||
5267 Subtarget
.isAIXABI())) {
5268 if (CallOpc
== PPCISD::BCTRL
) {
5269 if (Subtarget
.isAIXABI())
5270 report_fatal_error("Indirect call on AIX is not implemented.");
5272 // This is a call through a function pointer.
5273 // Restore the caller TOC from the save area into R2.
5274 // See PrepareCall() for more information about calls through function
5275 // pointers in the 64-bit SVR4 ABI.
5276 // We are using a target-specific load with r2 hard coded, because the
5277 // result of a target-independent load would never go directly into r2,
5278 // since r2 is a reserved register (which prevents the register allocator
5279 // from allocating it), resulting in an additional register being
5280 // allocated and an unnecessary move instruction being generated.
5281 CallOpc
= PPCISD::BCTRL_LOAD_TOC
;
5283 SDValue StackPtr
= DAG
.getRegister(PPC::X1
, PtrVT
);
5284 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
5285 SDValue TOCOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
5286 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, StackPtr
, TOCOff
);
5288 // The address needs to go after the chain input but before the flag (or
5289 // any other variadic arguments).
5290 Ops
.insert(std::next(Ops
.begin()), AddTOC
);
5291 } else if (CallOpc
== PPCISD::CALL
&&
5292 !callsShareTOCBase(&MF
.getFunction(), Callee
, DAG
.getTarget())) {
5293 // Otherwise insert NOP for non-local calls.
5294 CallOpc
= PPCISD::CALL_NOP
;
5298 if (Subtarget
.isAIXABI() && isFunctionGlobalAddress(Callee
)) {
5299 // On AIX, direct function calls reference the symbol for the function's
5300 // entry point, which is named by inserting a "." before the function's
5302 GlobalAddressSDNode
*G
= cast
<GlobalAddressSDNode
>(Callee
);
5303 auto &Context
= DAG
.getMachineFunction().getMMI().getContext();
5304 MCSymbol
*S
= Context
.getOrCreateSymbol(Twine(".") +
5305 Twine(G
->getGlobal()->getName()));
5306 Callee
= DAG
.getMCSymbol(S
, PtrVT
);
5307 // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
5311 Chain
= DAG
.getNode(CallOpc
, dl
, NodeTys
, Ops
);
5312 InFlag
= Chain
.getValue(1);
5314 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
5315 DAG
.getIntPtrConstant(BytesCalleePops
, dl
, true),
5318 InFlag
= Chain
.getValue(1);
5320 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
,
5321 Ins
, dl
, DAG
, InVals
);
5325 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
5326 SmallVectorImpl
<SDValue
> &InVals
) const {
5327 SelectionDAG
&DAG
= CLI
.DAG
;
5329 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
5330 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
5331 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
5332 SDValue Chain
= CLI
.Chain
;
5333 SDValue Callee
= CLI
.Callee
;
5334 bool &isTailCall
= CLI
.IsTailCall
;
5335 CallingConv::ID CallConv
= CLI
.CallConv
;
5336 bool isVarArg
= CLI
.IsVarArg
;
5337 bool isPatchPoint
= CLI
.IsPatchPoint
;
5338 ImmutableCallSite CS
= CLI
.CS
;
5341 if (Subtarget
.useLongCalls() && !(CS
&& CS
.isMustTailCall()))
5343 else if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64())
5345 IsEligibleForTailCallOptimization_64SVR4(Callee
, CallConv
, CS
,
5346 isVarArg
, Outs
, Ins
, DAG
);
5348 isTailCall
= IsEligibleForTailCallOptimization(Callee
, CallConv
, isVarArg
,
5352 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
5355 assert(isa
<GlobalAddressSDNode
>(Callee
) &&
5356 "Callee should be an llvm::Function object.");
5358 const GlobalValue
*GV
=
5359 cast
<GlobalAddressSDNode
>(Callee
)->getGlobal();
5360 const unsigned Width
=
5361 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
5362 dbgs() << "TCO caller: "
5363 << left_justify(DAG
.getMachineFunction().getName(), Width
)
5364 << ", callee linkage: " << GV
->getVisibility() << ", "
5365 << GV
->getLinkage() << "\n");
5369 if (!isTailCall
&& CS
&& CS
.isMustTailCall())
5370 report_fatal_error("failed to perform tail call elimination on a call "
5371 "site marked musttail");
5373 // When long calls (i.e. indirect calls) are always used, calls are always
5374 // made via function pointer. If we have a function name, first translate it
5376 if (Subtarget
.useLongCalls() && isa
<GlobalAddressSDNode
>(Callee
) &&
5378 Callee
= LowerGlobalAddress(Callee
, DAG
);
5380 if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64())
5381 return LowerCall_64SVR4(Chain
, Callee
, CallConv
, isVarArg
,
5382 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5383 dl
, DAG
, InVals
, CS
);
5385 if (Subtarget
.isSVR4ABI())
5386 return LowerCall_32SVR4(Chain
, Callee
, CallConv
, isVarArg
,
5387 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5388 dl
, DAG
, InVals
, CS
);
5390 if (Subtarget
.isAIXABI())
5391 return LowerCall_AIX(Chain
, Callee
, CallConv
, isVarArg
,
5392 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5393 dl
, DAG
, InVals
, CS
);
5395 return LowerCall_Darwin(Chain
, Callee
, CallConv
, isVarArg
,
5396 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5397 dl
, DAG
, InVals
, CS
);
5400 SDValue
PPCTargetLowering::LowerCall_32SVR4(
5401 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
5402 bool isTailCall
, bool isPatchPoint
,
5403 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5404 const SmallVectorImpl
<SDValue
> &OutVals
,
5405 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5406 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
5407 ImmutableCallSite CS
) const {
5408 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5409 // of the 32-bit SVR4 ABI stack frame layout.
5411 assert((CallConv
== CallingConv::C
||
5412 CallConv
== CallingConv::Cold
||
5413 CallConv
== CallingConv::Fast
) && "Unknown calling convention!");
5415 unsigned PtrByteSize
= 4;
5417 MachineFunction
&MF
= DAG
.getMachineFunction();
5419 // Mark this function as potentially containing a function that contains a
5420 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5421 // and restoring the callers stack pointer in this functions epilog. This is
5422 // done because by tail calling the called function might overwrite the value
5423 // in this function's (MF) stack pointer stack slot 0(SP).
5424 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5425 CallConv
== CallingConv::Fast
)
5426 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
5428 // Count how many bytes are to be pushed on the stack, including the linkage
5429 // area, parameter list area and the part of the local variable space which
5430 // contains copies of aggregates which are passed by value.
5432 // Assign locations to all of the outgoing arguments.
5433 SmallVector
<CCValAssign
, 16> ArgLocs
;
5434 PPCCCState
CCInfo(CallConv
, isVarArg
, MF
, ArgLocs
, *DAG
.getContext());
5436 // Reserve space for the linkage area on the stack.
5437 CCInfo
.AllocateStack(Subtarget
.getFrameLowering()->getLinkageSize(),
5440 CCInfo
.PreAnalyzeCallOperands(Outs
);
5443 // Handle fixed and variable vector arguments differently.
5444 // Fixed vector arguments go into registers as long as registers are
5445 // available. Variable vector arguments always go into memory.
5446 unsigned NumArgs
= Outs
.size();
5448 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
5449 MVT ArgVT
= Outs
[i
].VT
;
5450 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
5453 if (Outs
[i
].IsFixed
) {
5454 Result
= CC_PPC32_SVR4(i
, ArgVT
, ArgVT
, CCValAssign::Full
, ArgFlags
,
5457 Result
= CC_PPC32_SVR4_VarArg(i
, ArgVT
, ArgVT
, CCValAssign::Full
,
5463 errs() << "Call operand #" << i
<< " has unhandled type "
5464 << EVT(ArgVT
).getEVTString() << "\n";
5466 llvm_unreachable(nullptr);
5470 // All arguments are treated the same.
5471 CCInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4
);
5473 CCInfo
.clearWasPPCF128();
5475 // Assign locations to all of the outgoing aggregate by value arguments.
5476 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
5477 CCState
CCByValInfo(CallConv
, isVarArg
, MF
, ByValArgLocs
, *DAG
.getContext());
5479 // Reserve stack space for the allocations in CCInfo.
5480 CCByValInfo
.AllocateStack(CCInfo
.getNextStackOffset(), PtrByteSize
);
5482 CCByValInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4_ByVal
);
5484 // Size of the linkage area, parameter list area and the part of the local
5485 // space variable where copies of aggregates which are passed by value are
5487 unsigned NumBytes
= CCByValInfo
.getNextStackOffset();
5489 // Calculate by how many bytes the stack has to be adjusted in case of tail
5490 // call optimization.
5491 int SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
5493 // Adjust the stack pointer for the new arguments...
5494 // These operations are automatically eliminated by the prolog/epilog pass
5495 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
5496 SDValue CallSeqStart
= Chain
;
5498 // Load the return address and frame pointer so it can be moved somewhere else
5501 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
5503 // Set up a copy of the stack pointer for use loading and storing any
5504 // arguments that may not fit in the registers available for argument
5506 SDValue StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
5508 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
5509 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
5510 SmallVector
<SDValue
, 8> MemOpChains
;
5512 bool seenFloatArg
= false;
5513 // Walk the register/memloc assignments, inserting copies/loads.
5514 // i - Tracks the index into the list of registers allocated for the call
5515 // RealArgIdx - Tracks the index into the list of actual function arguments
5516 // j - Tracks the index into the list of byval arguments
5517 for (unsigned i
= 0, RealArgIdx
= 0, j
= 0, e
= ArgLocs
.size();
5519 ++i
, ++RealArgIdx
) {
5520 CCValAssign
&VA
= ArgLocs
[i
];
5521 SDValue Arg
= OutVals
[RealArgIdx
];
5522 ISD::ArgFlagsTy Flags
= Outs
[RealArgIdx
].Flags
;
5524 if (Flags
.isByVal()) {
5525 // Argument is an aggregate which is passed by value, thus we need to
5526 // create a copy of it in the local variable space of the current stack
5527 // frame (which is the stack frame of the caller) and pass the address of
5528 // this copy to the callee.
5529 assert((j
< ByValArgLocs
.size()) && "Index out of bounds!");
5530 CCValAssign
&ByValVA
= ByValArgLocs
[j
++];
5531 assert((VA
.getValNo() == ByValVA
.getValNo()) && "ValNo mismatch!");
5533 // Memory reserved in the local variable space of the callers stack frame.
5534 unsigned LocMemOffset
= ByValVA
.getLocMemOffset();
5536 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
5537 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
5540 // Create a copy of the argument in the local area of the current
5542 SDValue MemcpyCall
=
5543 CreateCopyOfByValArgument(Arg
, PtrOff
,
5544 CallSeqStart
.getNode()->getOperand(0),
5547 // This must go outside the CALLSEQ_START..END.
5548 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, NumBytes
, 0,
5550 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
5551 NewCallSeqStart
.getNode());
5552 Chain
= CallSeqStart
= NewCallSeqStart
;
5554 // Pass the address of the aggregate copy on the stack either in a
5555 // physical register or in the parameter list area of the current stack
5556 // frame to the callee.
5560 // When useCRBits() is true, there can be i1 arguments.
5561 // It is because getRegisterType(MVT::i1) => MVT::i1,
5562 // and for other integer types getRegisterType() => MVT::i32.
5563 // Extend i1 and ensure callee will get i32.
5564 if (Arg
.getValueType() == MVT::i1
)
5565 Arg
= DAG
.getNode(Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
,
5568 if (VA
.isRegLoc()) {
5569 seenFloatArg
|= VA
.getLocVT().isFloatingPoint();
5570 // Put argument in a physical register.
5571 if (Subtarget
.hasSPE() && Arg
.getValueType() == MVT::f64
) {
5572 bool IsLE
= Subtarget
.isLittleEndian();
5573 SDValue SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
5574 DAG
.getIntPtrConstant(IsLE
? 0 : 1, dl
));
5575 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), SVal
.getValue(0)));
5576 SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
5577 DAG
.getIntPtrConstant(IsLE
? 1 : 0, dl
));
5578 RegsToPass
.push_back(std::make_pair(ArgLocs
[++i
].getLocReg(),
5581 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
5583 // Put argument in the parameter list area of the current stack frame.
5584 assert(VA
.isMemLoc());
5585 unsigned LocMemOffset
= VA
.getLocMemOffset();
5588 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
5589 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
5592 MemOpChains
.push_back(
5593 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
5595 // Calculate and remember argument location.
5596 CalculateTailCallArgDest(DAG
, MF
, false, Arg
, SPDiff
, LocMemOffset
,
5602 if (!MemOpChains
.empty())
5603 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
5605 // Build a sequence of copy-to-reg nodes chained together with token chain
5606 // and flag operands which copy the outgoing args into the appropriate regs.
5608 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
5609 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
5610 RegsToPass
[i
].second
, InFlag
);
5611 InFlag
= Chain
.getValue(1);
5614 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5617 SDVTList VTs
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
5618 SDValue Ops
[] = { Chain
, InFlag
};
5620 Chain
= DAG
.getNode(seenFloatArg
? PPCISD::CR6SET
: PPCISD::CR6UNSET
,
5621 dl
, VTs
, makeArrayRef(Ops
, InFlag
.getNode() ? 2 : 1));
5623 InFlag
= Chain
.getValue(1);
5627 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
5630 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
5631 /* unused except on PPC64 ELFv1 */ false, DAG
,
5632 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
5633 NumBytes
, Ins
, InVals
, CS
);
5636 // Copy an argument into memory, being careful to do this outside the
5637 // call sequence for the call to which the argument belongs.
5638 SDValue
PPCTargetLowering::createMemcpyOutsideCallSeq(
5639 SDValue Arg
, SDValue PtrOff
, SDValue CallSeqStart
, ISD::ArgFlagsTy Flags
,
5640 SelectionDAG
&DAG
, const SDLoc
&dl
) const {
5641 SDValue MemcpyCall
= CreateCopyOfByValArgument(Arg
, PtrOff
,
5642 CallSeqStart
.getNode()->getOperand(0),
5644 // The MEMCPY must go outside the CALLSEQ_START..END.
5645 int64_t FrameSize
= CallSeqStart
.getConstantOperandVal(1);
5646 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, FrameSize
, 0,
5648 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
5649 NewCallSeqStart
.getNode());
5650 return NewCallSeqStart
;
5653 SDValue
PPCTargetLowering::LowerCall_64SVR4(
5654 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
5655 bool isTailCall
, bool isPatchPoint
,
5656 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5657 const SmallVectorImpl
<SDValue
> &OutVals
,
5658 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5659 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
5660 ImmutableCallSite CS
) const {
5661 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
5662 bool isLittleEndian
= Subtarget
.isLittleEndian();
5663 unsigned NumOps
= Outs
.size();
5664 bool hasNest
= false;
5665 bool IsSibCall
= false;
5667 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5668 unsigned PtrByteSize
= 8;
5670 MachineFunction
&MF
= DAG
.getMachineFunction();
5672 if (isTailCall
&& !getTargetMachine().Options
.GuaranteedTailCallOpt
)
5675 // Mark this function as potentially containing a function that contains a
5676 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5677 // and restoring the callers stack pointer in this functions epilog. This is
5678 // done because by tail calling the called function might overwrite the value
5679 // in this function's (MF) stack pointer stack slot 0(SP).
5680 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5681 CallConv
== CallingConv::Fast
)
5682 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
5684 assert(!(CallConv
== CallingConv::Fast
&& isVarArg
) &&
5685 "fastcc not supported on varargs functions");
5687 // Count how many bytes are to be pushed on the stack, including the linkage
5688 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5689 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5690 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5691 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
5692 unsigned NumBytes
= LinkageSize
;
5693 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
5694 unsigned &QFPR_idx
= FPR_idx
;
5696 static const MCPhysReg GPR
[] = {
5697 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
5698 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
5700 static const MCPhysReg VR
[] = {
5701 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
5702 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
5705 const unsigned NumGPRs
= array_lengthof(GPR
);
5706 const unsigned NumFPRs
= useSoftFloat() ? 0 : 13;
5707 const unsigned NumVRs
= array_lengthof(VR
);
5708 const unsigned NumQFPRs
= NumFPRs
;
5710 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5711 // can be passed to the callee in registers.
5712 // For the fast calling convention, there is another check below.
5713 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5714 bool HasParameterArea
= !isELFv2ABI
|| isVarArg
|| CallConv
== CallingConv::Fast
;
5715 if (!HasParameterArea
) {
5716 unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
5717 unsigned AvailableFPRs
= NumFPRs
;
5718 unsigned AvailableVRs
= NumVRs
;
5719 unsigned NumBytesTmp
= NumBytes
;
5720 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5721 if (Outs
[i
].Flags
.isNest()) continue;
5722 if (CalculateStackSlotUsed(Outs
[i
].VT
, Outs
[i
].ArgVT
, Outs
[i
].Flags
,
5723 PtrByteSize
, LinkageSize
, ParamAreaSize
,
5724 NumBytesTmp
, AvailableFPRs
, AvailableVRs
,
5725 Subtarget
.hasQPX()))
5726 HasParameterArea
= true;
5730 // When using the fast calling convention, we don't provide backing for
5731 // arguments that will be in registers.
5732 unsigned NumGPRsUsed
= 0, NumFPRsUsed
= 0, NumVRsUsed
= 0;
5734 // Avoid allocating parameter area for fastcc functions if all the arguments
5735 // can be passed in the registers.
5736 if (CallConv
== CallingConv::Fast
)
5737 HasParameterArea
= false;
5739 // Add up all the space actually used.
5740 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5741 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5742 EVT ArgVT
= Outs
[i
].VT
;
5743 EVT OrigVT
= Outs
[i
].ArgVT
;
5748 if (CallConv
== CallingConv::Fast
) {
5749 if (Flags
.isByVal()) {
5750 NumGPRsUsed
+= (Flags
.getByValSize()+7)/8;
5751 if (NumGPRsUsed
> NumGPRs
)
5752 HasParameterArea
= true;
5754 switch (ArgVT
.getSimpleVT().SimpleTy
) {
5755 default: llvm_unreachable("Unexpected ValueType for argument!");
5759 if (++NumGPRsUsed
<= NumGPRs
)
5769 if (++NumVRsUsed
<= NumVRs
)
5773 // When using QPX, this is handled like a FP register, otherwise, it
5774 // is an Altivec register.
5775 if (Subtarget
.hasQPX()) {
5776 if (++NumFPRsUsed
<= NumFPRs
)
5779 if (++NumVRsUsed
<= NumVRs
)
5785 case MVT::v4f64
: // QPX
5786 case MVT::v4i1
: // QPX
5787 if (++NumFPRsUsed
<= NumFPRs
)
5791 HasParameterArea
= true;
5795 /* Respect alignment of argument on the stack. */
5797 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
5798 NumBytes
= ((NumBytes
+ Align
- 1) / Align
) * Align
;
5800 NumBytes
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
5801 if (Flags
.isInConsecutiveRegsLast())
5802 NumBytes
= ((NumBytes
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
5805 unsigned NumBytesActuallyUsed
= NumBytes
;
5807 // In the old ELFv1 ABI,
5808 // the prolog code of the callee may store up to 8 GPR argument registers to
5809 // the stack, allowing va_start to index over them in memory if its varargs.
5810 // Because we cannot tell if this is needed on the caller side, we have to
5811 // conservatively assume that it is needed. As such, make sure we have at
5812 // least enough stack space for the caller to store the 8 GPRs.
5813 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5814 // really requires memory operands, e.g. a vararg function.
5815 if (HasParameterArea
)
5816 NumBytes
= std::max(NumBytes
, LinkageSize
+ 8 * PtrByteSize
);
5818 NumBytes
= LinkageSize
;
5820 // Tail call needs the stack to be aligned.
5821 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5822 CallConv
== CallingConv::Fast
)
5823 NumBytes
= EnsureStackAlignment(Subtarget
.getFrameLowering(), NumBytes
);
5827 // Calculate by how many bytes the stack has to be adjusted in case of tail
5828 // call optimization.
5830 SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
5832 // To protect arguments on the stack from being clobbered in a tail call,
5833 // force all the loads to happen before doing any other lowering.
5835 Chain
= DAG
.getStackArgumentTokenFactor(Chain
);
5837 // Adjust the stack pointer for the new arguments...
5838 // These operations are automatically eliminated by the prolog/epilog pass
5840 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
5841 SDValue CallSeqStart
= Chain
;
5843 // Load the return address and frame pointer so it can be move somewhere else
5846 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
5848 // Set up a copy of the stack pointer for use loading and storing any
5849 // arguments that may not fit in the registers available for argument
5851 SDValue StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
5853 // Figure out which arguments are going to go in registers, and which in
5854 // memory. Also, if this is a vararg function, floating point operations
5855 // must be stored to our stack, and loaded into integer regs as well, if
5856 // any integer regs are available for argument passing.
5857 unsigned ArgOffset
= LinkageSize
;
5859 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
5860 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
5862 SmallVector
<SDValue
, 8> MemOpChains
;
5863 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5864 SDValue Arg
= OutVals
[i
];
5865 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5866 EVT ArgVT
= Outs
[i
].VT
;
5867 EVT OrigVT
= Outs
[i
].ArgVT
;
5869 // PtrOff will be used to store the current argument to the stack if a
5870 // register cannot be found for it.
5873 // We re-align the argument offset for each argument, except when using the
5874 // fast calling convention, when we need to make sure we do that only when
5875 // we'll actually use a stack slot.
5876 auto ComputePtrOff
= [&]() {
5877 /* Respect alignment of argument on the stack. */
5879 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
5880 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
5882 PtrOff
= DAG
.getConstant(ArgOffset
, dl
, StackPtr
.getValueType());
5884 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
5887 if (CallConv
!= CallingConv::Fast
) {
5890 /* Compute GPR index associated with argument offset. */
5891 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
5892 GPR_idx
= std::min(GPR_idx
, NumGPRs
);
5895 // Promote integers to 64-bit values.
5896 if (Arg
.getValueType() == MVT::i32
|| Arg
.getValueType() == MVT::i1
) {
5897 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5898 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
5899 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
5902 // FIXME memcpy is used way more than necessary. Correctness first.
5903 // Note: "by value" is code for passing a structure by value, not
5905 if (Flags
.isByVal()) {
5906 // Note: Size includes alignment padding, so
5907 // struct x { short a; char b; }
5908 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5909 // These are the proper values we need for right-justifying the
5910 // aggregate in a parameter register.
5911 unsigned Size
= Flags
.getByValSize();
5913 // An empty aggregate parameter takes up no storage and no
5918 if (CallConv
== CallingConv::Fast
)
5921 // All aggregates smaller than 8 bytes must be passed right-justified.
5922 if (Size
==1 || Size
==2 || Size
==4) {
5923 EVT VT
= (Size
==1) ? MVT::i8
: ((Size
==2) ? MVT::i16
: MVT::i32
);
5924 if (GPR_idx
!= NumGPRs
) {
5925 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
5926 MachinePointerInfo(), VT
);
5927 MemOpChains
.push_back(Load
.getValue(1));
5928 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
5930 ArgOffset
+= PtrByteSize
;
5935 if (GPR_idx
== NumGPRs
&& Size
< 8) {
5936 SDValue AddPtr
= PtrOff
;
5937 if (!isLittleEndian
) {
5938 SDValue Const
= DAG
.getConstant(PtrByteSize
- Size
, dl
,
5939 PtrOff
.getValueType());
5940 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
5942 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
5945 ArgOffset
+= PtrByteSize
;
5948 // Copy entire object into memory. There are cases where gcc-generated
5949 // code assumes it is there, even if it could be put entirely into
5950 // registers. (This is not what the doc says.)
5952 // FIXME: The above statement is likely due to a misunderstanding of the
5953 // documents. All arguments must be copied into the parameter area BY
5954 // THE CALLEE in the event that the callee takes the address of any
5955 // formal argument. That has not yet been implemented. However, it is
5956 // reasonable to use the stack area as a staging area for the register
5959 // Skip this for small aggregates, as we will use the same slot for a
5960 // right-justified copy, below.
5962 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, PtrOff
,
5966 // When a register is available, pass a small aggregate right-justified.
5967 if (Size
< 8 && GPR_idx
!= NumGPRs
) {
5968 // The easiest way to get this right-justified in a register
5969 // is to copy the structure into the rightmost portion of a
5970 // local variable slot, then load the whole slot into the
5972 // FIXME: The memcpy seems to produce pretty awful code for
5973 // small aggregates, particularly for packed ones.
5974 // FIXME: It would be preferable to use the slot in the
5975 // parameter save area instead of a new local variable.
5976 SDValue AddPtr
= PtrOff
;
5977 if (!isLittleEndian
) {
5978 SDValue Const
= DAG
.getConstant(8 - Size
, dl
, PtrOff
.getValueType());
5979 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
5981 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
5985 // Load the slot into the register.
5987 DAG
.getLoad(PtrVT
, dl
, Chain
, PtrOff
, MachinePointerInfo());
5988 MemOpChains
.push_back(Load
.getValue(1));
5989 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
5991 // Done with this argument.
5992 ArgOffset
+= PtrByteSize
;
5996 // For aggregates larger than PtrByteSize, copy the pieces of the
5997 // object that fit into registers from the parameter save area.
5998 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
5999 SDValue Const
= DAG
.getConstant(j
, dl
, PtrOff
.getValueType());
6000 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
6001 if (GPR_idx
!= NumGPRs
) {
6003 DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
, MachinePointerInfo());
6004 MemOpChains
.push_back(Load
.getValue(1));
6005 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6006 ArgOffset
+= PtrByteSize
;
6008 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
6015 switch (Arg
.getSimpleValueType().SimpleTy
) {
6016 default: llvm_unreachable("Unexpected ValueType for argument!");
6020 if (Flags
.isNest()) {
6021 // The 'nest' parameter, if any, is passed in R11.
6022 RegsToPass
.push_back(std::make_pair(PPC::X11
, Arg
));
6027 // These can be scalar arguments or elements of an integer array type
6028 // passed directly. Clang may use those instead of "byval" aggregate
6029 // types to avoid forcing arguments to memory unnecessarily.
6030 if (GPR_idx
!= NumGPRs
) {
6031 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6033 if (CallConv
== CallingConv::Fast
)
6036 assert(HasParameterArea
&&
6037 "Parameter area must exist to pass an argument in memory.");
6038 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6039 true, isTailCall
, false, MemOpChains
,
6040 TailCallArguments
, dl
);
6041 if (CallConv
== CallingConv::Fast
)
6042 ArgOffset
+= PtrByteSize
;
6044 if (CallConv
!= CallingConv::Fast
)
6045 ArgOffset
+= PtrByteSize
;
6049 // These can be scalar arguments or elements of a float array type
6050 // passed directly. The latter are used to implement ELFv2 homogenous
6051 // float aggregates.
6053 // Named arguments go into FPRs first, and once they overflow, the
6054 // remaining arguments go into GPRs and then the parameter save area.
6055 // Unnamed arguments for vararg functions always go to GPRs and
6056 // then the parameter save area. For now, put all arguments to vararg
6057 // routines always in both locations (FPR *and* GPR or stack slot).
6058 bool NeedGPROrStack
= isVarArg
|| FPR_idx
== NumFPRs
;
6059 bool NeededLoad
= false;
6061 // First load the argument into the next available FPR.
6062 if (FPR_idx
!= NumFPRs
)
6063 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6065 // Next, load the argument into GPR or stack slot if needed.
6066 if (!NeedGPROrStack
)
6068 else if (GPR_idx
!= NumGPRs
&& CallConv
!= CallingConv::Fast
) {
6069 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6070 // once we support fp <-> gpr moves.
6072 // In the non-vararg case, this can only ever happen in the
6073 // presence of f32 array types, since otherwise we never run
6074 // out of FPRs before running out of GPRs.
6077 // Double values are always passed in a single GPR.
6078 if (Arg
.getValueType() != MVT::f32
) {
6079 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i64
, Arg
);
6081 // Non-array float values are extended and passed in a GPR.
6082 } else if (!Flags
.isInConsecutiveRegs()) {
6083 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6084 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
6086 // If we have an array of floats, we collect every odd element
6087 // together with its predecessor into one GPR.
6088 } else if (ArgOffset
% PtrByteSize
!= 0) {
6090 Lo
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, OutVals
[i
- 1]);
6091 Hi
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6092 if (!isLittleEndian
)
6094 ArgVal
= DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Lo
, Hi
);
6096 // The final element, if even, goes into the first half of a GPR.
6097 } else if (Flags
.isInConsecutiveRegsLast()) {
6098 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6099 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
6100 if (!isLittleEndian
)
6101 ArgVal
= DAG
.getNode(ISD::SHL
, dl
, MVT::i64
, ArgVal
,
6102 DAG
.getConstant(32, dl
, MVT::i32
));
6104 // Non-final even elements are skipped; they will be handled
6105 // together the with subsequent argument on the next go-around.
6109 if (ArgVal
.getNode())
6110 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], ArgVal
));
6112 if (CallConv
== CallingConv::Fast
)
6115 // Single-precision floating-point values are mapped to the
6116 // second (rightmost) word of the stack doubleword.
6117 if (Arg
.getValueType() == MVT::f32
&&
6118 !isLittleEndian
&& !Flags
.isInConsecutiveRegs()) {
6119 SDValue ConstFour
= DAG
.getConstant(4, dl
, PtrOff
.getValueType());
6120 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
6123 assert(HasParameterArea
&&
6124 "Parameter area must exist to pass an argument in memory.");
6125 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6126 true, isTailCall
, false, MemOpChains
,
6127 TailCallArguments
, dl
);
6131 // When passing an array of floats, the array occupies consecutive
6132 // space in the argument area; only round up to the next doubleword
6133 // at the end of the array. Otherwise, each float takes 8 bytes.
6134 if (CallConv
!= CallingConv::Fast
|| NeededLoad
) {
6135 ArgOffset
+= (Arg
.getValueType() == MVT::f32
&&
6136 Flags
.isInConsecutiveRegs()) ? 4 : 8;
6137 if (Flags
.isInConsecutiveRegsLast())
6138 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
6150 if (!Subtarget
.hasQPX()) {
6151 // These can be scalar arguments or elements of a vector array type
6152 // passed directly. The latter are used to implement ELFv2 homogenous
6153 // vector aggregates.
6155 // For a varargs call, named arguments go into VRs or on the stack as
6156 // usual; unnamed arguments always go to the stack or the corresponding
6157 // GPRs when within range. For now, we always put the value in both
6158 // locations (or even all three).
6160 assert(HasParameterArea
&&
6161 "Parameter area must exist if we have a varargs call.");
6162 // We could elide this store in the case where the object fits
6163 // entirely in R registers. Maybe later.
6165 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6166 MemOpChains
.push_back(Store
);
6167 if (VR_idx
!= NumVRs
) {
6169 DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, MachinePointerInfo());
6170 MemOpChains
.push_back(Load
.getValue(1));
6171 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
6174 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
6175 if (GPR_idx
== NumGPRs
)
6177 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6178 DAG
.getConstant(i
, dl
, PtrVT
));
6180 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6181 MemOpChains
.push_back(Load
.getValue(1));
6182 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6187 // Non-varargs Altivec params go into VRs or on the stack.
6188 if (VR_idx
!= NumVRs
) {
6189 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
6191 if (CallConv
== CallingConv::Fast
)
6194 assert(HasParameterArea
&&
6195 "Parameter area must exist to pass an argument in memory.");
6196 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6197 true, isTailCall
, true, MemOpChains
,
6198 TailCallArguments
, dl
);
6199 if (CallConv
== CallingConv::Fast
)
6203 if (CallConv
!= CallingConv::Fast
)
6208 assert(Arg
.getValueType().getSimpleVT().SimpleTy
== MVT::v4f32
&&
6209 "Invalid QPX parameter type");
6214 bool IsF32
= Arg
.getValueType().getSimpleVT().SimpleTy
== MVT::v4f32
;
6216 assert(HasParameterArea
&&
6217 "Parameter area must exist if we have a varargs call.");
6218 // We could elide this store in the case where the object fits
6219 // entirely in R registers. Maybe later.
6221 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6222 MemOpChains
.push_back(Store
);
6223 if (QFPR_idx
!= NumQFPRs
) {
6224 SDValue Load
= DAG
.getLoad(IsF32
? MVT::v4f32
: MVT::v4f64
, dl
, Store
,
6225 PtrOff
, MachinePointerInfo());
6226 MemOpChains
.push_back(Load
.getValue(1));
6227 RegsToPass
.push_back(std::make_pair(QFPR
[QFPR_idx
++], Load
));
6229 ArgOffset
+= (IsF32
? 16 : 32);
6230 for (unsigned i
= 0; i
< (IsF32
? 16U : 32U); i
+= PtrByteSize
) {
6231 if (GPR_idx
== NumGPRs
)
6233 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6234 DAG
.getConstant(i
, dl
, PtrVT
));
6236 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6237 MemOpChains
.push_back(Load
.getValue(1));
6238 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6243 // Non-varargs QPX params go into registers or on the stack.
6244 if (QFPR_idx
!= NumQFPRs
) {
6245 RegsToPass
.push_back(std::make_pair(QFPR
[QFPR_idx
++], Arg
));
6247 if (CallConv
== CallingConv::Fast
)
6250 assert(HasParameterArea
&&
6251 "Parameter area must exist to pass an argument in memory.");
6252 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6253 true, isTailCall
, true, MemOpChains
,
6254 TailCallArguments
, dl
);
6255 if (CallConv
== CallingConv::Fast
)
6256 ArgOffset
+= (IsF32
? 16 : 32);
6259 if (CallConv
!= CallingConv::Fast
)
6260 ArgOffset
+= (IsF32
? 16 : 32);
6266 assert((!HasParameterArea
|| NumBytesActuallyUsed
== ArgOffset
) &&
6267 "mismatch in size of parameter area");
6268 (void)NumBytesActuallyUsed
;
6270 if (!MemOpChains
.empty())
6271 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6273 // Check if this is an indirect call (MTCTR/BCTRL).
6274 // See PrepareCall() for more information about calls through function
6275 // pointers in the 64-bit SVR4 ABI.
6276 if (!isTailCall
&& !isPatchPoint
&&
6277 !isFunctionGlobalAddress(Callee
) &&
6278 !isa
<ExternalSymbolSDNode
>(Callee
)) {
6279 // Load r2 into a virtual register and store it to the TOC save area.
6280 setUsesTOCBasePtr(DAG
);
6281 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, PPC::X2
, MVT::i64
);
6282 // TOC save area offset.
6283 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
6284 SDValue PtrOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
6285 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6286 Chain
= DAG
.getStore(
6287 Val
.getValue(1), dl
, Val
, AddPtr
,
6288 MachinePointerInfo::getStack(DAG
.getMachineFunction(), TOCSaveOffset
));
6289 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6290 // This does not mean the MTCTR instruction must use R12; it's easier
6291 // to model this as an extra parameter, so do that.
6292 if (isELFv2ABI
&& !isPatchPoint
)
6293 RegsToPass
.push_back(std::make_pair((unsigned)PPC::X12
, Callee
));
6296 // Build a sequence of copy-to-reg nodes chained together with token chain
6297 // and flag operands which copy the outgoing args into the appropriate regs.
6299 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6300 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6301 RegsToPass
[i
].second
, InFlag
);
6302 InFlag
= Chain
.getValue(1);
6305 if (isTailCall
&& !IsSibCall
)
6306 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6309 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
, hasNest
,
6310 DAG
, RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
,
6311 SPDiff
, NumBytes
, Ins
, InVals
, CS
);
6314 SDValue
PPCTargetLowering::LowerCall_Darwin(
6315 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
6316 bool isTailCall
, bool isPatchPoint
,
6317 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6318 const SmallVectorImpl
<SDValue
> &OutVals
,
6319 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
6320 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
6321 ImmutableCallSite CS
) const {
6322 unsigned NumOps
= Outs
.size();
6324 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6325 bool isPPC64
= PtrVT
== MVT::i64
;
6326 unsigned PtrByteSize
= isPPC64
? 8 : 4;
6328 MachineFunction
&MF
= DAG
.getMachineFunction();
6330 // Mark this function as potentially containing a function that contains a
6331 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6332 // and restoring the callers stack pointer in this functions epilog. This is
6333 // done because by tail calling the called function might overwrite the value
6334 // in this function's (MF) stack pointer stack slot 0(SP).
6335 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
6336 CallConv
== CallingConv::Fast
)
6337 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
6339 // Count how many bytes are to be pushed on the stack, including the linkage
6340 // area, and parameter passing area. We start with 24/48 bytes, which is
6341 // prereserved space for [SP][CR][LR][3 x unused].
6342 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
6343 unsigned NumBytes
= LinkageSize
;
6345 // Add up all the space actually used.
6346 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6347 // they all go in registers, but we must reserve stack space for them for
6348 // possible use by the caller. In varargs or 64-bit calls, parameters are
6349 // assigned stack space in order, with padding so Altivec parameters are
6351 unsigned nAltivecParamsAtEnd
= 0;
6352 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6353 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6354 EVT ArgVT
= Outs
[i
].VT
;
6355 // Varargs Altivec parameters are padded to a 16 byte boundary.
6356 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
6357 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
6358 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
) {
6359 if (!isVarArg
&& !isPPC64
) {
6360 // Non-varargs Altivec parameters go after all the non-Altivec
6361 // parameters; handle those later so we know how much padding we need.
6362 nAltivecParamsAtEnd
++;
6365 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6366 NumBytes
= ((NumBytes
+15)/16)*16;
6368 NumBytes
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
6371 // Allow for Altivec parameters at the end, if needed.
6372 if (nAltivecParamsAtEnd
) {
6373 NumBytes
= ((NumBytes
+15)/16)*16;
6374 NumBytes
+= 16*nAltivecParamsAtEnd
;
6377 // The prolog code of the callee may store up to 8 GPR argument registers to
6378 // the stack, allowing va_start to index over them in memory if its varargs.
6379 // Because we cannot tell if this is needed on the caller side, we have to
6380 // conservatively assume that it is needed. As such, make sure we have at
6381 // least enough stack space for the caller to store the 8 GPRs.
6382 NumBytes
= std::max(NumBytes
, LinkageSize
+ 8 * PtrByteSize
);
6384 // Tail call needs the stack to be aligned.
6385 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
6386 CallConv
== CallingConv::Fast
)
6387 NumBytes
= EnsureStackAlignment(Subtarget
.getFrameLowering(), NumBytes
);
6389 // Calculate by how many bytes the stack has to be adjusted in case of tail
6390 // call optimization.
6391 int SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
6393 // To protect arguments on the stack from being clobbered in a tail call,
6394 // force all the loads to happen before doing any other lowering.
6396 Chain
= DAG
.getStackArgumentTokenFactor(Chain
);
6398 // Adjust the stack pointer for the new arguments...
6399 // These operations are automatically eliminated by the prolog/epilog pass
6400 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
6401 SDValue CallSeqStart
= Chain
;
6403 // Load the return address and frame pointer so it can be move somewhere else
6406 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
6408 // Set up a copy of the stack pointer for use loading and storing any
6409 // arguments that may not fit in the registers available for argument
6413 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
6415 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
6417 // Figure out which arguments are going to go in registers, and which in
6418 // memory. Also, if this is a vararg function, floating point operations
6419 // must be stored to our stack, and loaded into integer regs as well, if
6420 // any integer regs are available for argument passing.
6421 unsigned ArgOffset
= LinkageSize
;
6422 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
6424 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
6425 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
6426 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
6428 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
6429 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
6430 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
6432 static const MCPhysReg VR
[] = {
6433 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
6434 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
6436 const unsigned NumGPRs
= array_lengthof(GPR_32
);
6437 const unsigned NumFPRs
= 13;
6438 const unsigned NumVRs
= array_lengthof(VR
);
6440 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
6442 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
6443 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
6445 SmallVector
<SDValue
, 8> MemOpChains
;
6446 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6447 SDValue Arg
= OutVals
[i
];
6448 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6450 // PtrOff will be used to store the current argument to the stack if a
6451 // register cannot be found for it.
6454 PtrOff
= DAG
.getConstant(ArgOffset
, dl
, StackPtr
.getValueType());
6456 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6458 // On PPC64, promote integers to 64-bit values.
6459 if (isPPC64
&& Arg
.getValueType() == MVT::i32
) {
6460 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6461 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
6462 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
6465 // FIXME memcpy is used way more than necessary. Correctness first.
6466 // Note: "by value" is code for passing a structure by value, not
6468 if (Flags
.isByVal()) {
6469 unsigned Size
= Flags
.getByValSize();
6470 // Very small objects are passed right-justified. Everything else is
6471 // passed left-justified.
6472 if (Size
==1 || Size
==2) {
6473 EVT VT
= (Size
==1) ? MVT::i8
: MVT::i16
;
6474 if (GPR_idx
!= NumGPRs
) {
6475 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
6476 MachinePointerInfo(), VT
);
6477 MemOpChains
.push_back(Load
.getValue(1));
6478 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6480 ArgOffset
+= PtrByteSize
;
6482 SDValue Const
= DAG
.getConstant(PtrByteSize
- Size
, dl
,
6483 PtrOff
.getValueType());
6484 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
6485 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
6488 ArgOffset
+= PtrByteSize
;
6492 // Copy entire object into memory. There are cases where gcc-generated
6493 // code assumes it is there, even if it could be put entirely into
6494 // registers. (This is not what the doc says.)
6495 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, PtrOff
,
6499 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6500 // copy the pieces of the object that fit into registers from the
6501 // parameter save area.
6502 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
6503 SDValue Const
= DAG
.getConstant(j
, dl
, PtrOff
.getValueType());
6504 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
6505 if (GPR_idx
!= NumGPRs
) {
6507 DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
, MachinePointerInfo());
6508 MemOpChains
.push_back(Load
.getValue(1));
6509 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6510 ArgOffset
+= PtrByteSize
;
6512 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
6519 switch (Arg
.getSimpleValueType().SimpleTy
) {
6520 default: llvm_unreachable("Unexpected ValueType for argument!");
6524 if (GPR_idx
!= NumGPRs
) {
6525 if (Arg
.getValueType() == MVT::i1
)
6526 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, PtrVT
, Arg
);
6528 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6530 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6531 isPPC64
, isTailCall
, false, MemOpChains
,
6532 TailCallArguments
, dl
);
6534 ArgOffset
+= PtrByteSize
;
6538 if (FPR_idx
!= NumFPRs
) {
6539 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6543 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6544 MemOpChains
.push_back(Store
);
6546 // Float varargs are always shadowed in available integer registers
6547 if (GPR_idx
!= NumGPRs
) {
6549 DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, MachinePointerInfo());
6550 MemOpChains
.push_back(Load
.getValue(1));
6551 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6553 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&& !isPPC64
){
6554 SDValue ConstFour
= DAG
.getConstant(4, dl
, PtrOff
.getValueType());
6555 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
6557 DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, MachinePointerInfo());
6558 MemOpChains
.push_back(Load
.getValue(1));
6559 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6562 // If we have any FPRs remaining, we may also have GPRs remaining.
6563 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6565 if (GPR_idx
!= NumGPRs
)
6567 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&&
6568 !isPPC64
) // PPC64 has 64-bit GPR's obviously :)
6572 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6573 isPPC64
, isTailCall
, false, MemOpChains
,
6574 TailCallArguments
, dl
);
6578 ArgOffset
+= Arg
.getValueType() == MVT::f32
? 4 : 8;
6585 // These go aligned on the stack, or in the corresponding R registers
6586 // when within range. The Darwin PPC ABI doc claims they also go in
6587 // V registers; in fact gcc does this only for arguments that are
6588 // prototyped, not for those that match the ... We do it for all
6589 // arguments, seems to work.
6590 while (ArgOffset
% 16 !=0) {
6591 ArgOffset
+= PtrByteSize
;
6592 if (GPR_idx
!= NumGPRs
)
6595 // We could elide this store in the case where the object fits
6596 // entirely in R registers. Maybe later.
6597 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
6598 DAG
.getConstant(ArgOffset
, dl
, PtrVT
));
6600 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6601 MemOpChains
.push_back(Store
);
6602 if (VR_idx
!= NumVRs
) {
6604 DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, MachinePointerInfo());
6605 MemOpChains
.push_back(Load
.getValue(1));
6606 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
6609 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
6610 if (GPR_idx
== NumGPRs
)
6612 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6613 DAG
.getConstant(i
, dl
, PtrVT
));
6615 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6616 MemOpChains
.push_back(Load
.getValue(1));
6617 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6622 // Non-varargs Altivec params generally go in registers, but have
6623 // stack space allocated at the end.
6624 if (VR_idx
!= NumVRs
) {
6625 // Doesn't have GPR space allocated.
6626 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
6627 } else if (nAltivecParamsAtEnd
==0) {
6628 // We are emitting Altivec params in order.
6629 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6630 isPPC64
, isTailCall
, true, MemOpChains
,
6631 TailCallArguments
, dl
);
6637 // If all Altivec parameters fit in registers, as they usually do,
6638 // they get stack space following the non-Altivec parameters. We
6639 // don't track this here because nobody below needs it.
6640 // If there are more Altivec parameters than fit in registers emit
6642 if (!isVarArg
&& nAltivecParamsAtEnd
> NumVRs
) {
6644 // Offset is aligned; skip 1st 12 params which go in V registers.
6645 ArgOffset
= ((ArgOffset
+15)/16)*16;
6647 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6648 SDValue Arg
= OutVals
[i
];
6649 EVT ArgType
= Outs
[i
].VT
;
6650 if (ArgType
==MVT::v4f32
|| ArgType
==MVT::v4i32
||
6651 ArgType
==MVT::v8i16
|| ArgType
==MVT::v16i8
) {
6654 // We are emitting Altivec params in order.
6655 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6656 isPPC64
, isTailCall
, true, MemOpChains
,
6657 TailCallArguments
, dl
);
6664 if (!MemOpChains
.empty())
6665 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6667 // On Darwin, R12 must contain the address of an indirect callee. This does
6668 // not mean the MTCTR instruction must use R12; it's easier to model this as
6669 // an extra parameter, so do that.
6671 !isFunctionGlobalAddress(Callee
) &&
6672 !isa
<ExternalSymbolSDNode
>(Callee
) &&
6673 !isBLACompatibleAddress(Callee
, DAG
))
6674 RegsToPass
.push_back(std::make_pair((unsigned)(isPPC64
? PPC::X12
:
6675 PPC::R12
), Callee
));
6677 // Build a sequence of copy-to-reg nodes chained together with token chain
6678 // and flag operands which copy the outgoing args into the appropriate regs.
6680 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6681 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6682 RegsToPass
[i
].second
, InFlag
);
6683 InFlag
= Chain
.getValue(1);
6687 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6690 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
6691 /* unused except on PPC64 ELFv1 */ false, DAG
,
6692 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
6693 NumBytes
, Ins
, InVals
, CS
);
6697 SDValue
PPCTargetLowering::LowerCall_AIX(
6698 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
6699 bool isTailCall
, bool isPatchPoint
,
6700 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6701 const SmallVectorImpl
<SDValue
> &OutVals
,
6702 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
6703 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
6704 ImmutableCallSite CS
) const {
6706 assert((CallConv
== CallingConv::C
|| CallConv
== CallingConv::Fast
) &&
6707 "Unimplemented calling convention!");
6708 if (isVarArg
|| isPatchPoint
)
6709 report_fatal_error("This call type is unimplemented on AIX.");
6711 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6712 bool isPPC64
= PtrVT
== MVT::i64
;
6713 unsigned PtrByteSize
= isPPC64
? 8 : 4;
6714 unsigned NumOps
= Outs
.size();
6717 // Count how many bytes are to be pushed on the stack, including the linkage
6718 // area, parameter list area.
6719 // On XCOFF, we start with 24/48, which is reserved space for
6720 // [SP][CR][LR][2 x reserved][TOC].
6721 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
6723 // The prolog code of the callee may store up to 8 GPR argument registers to
6724 // the stack, allowing va_start to index over them in memory if the callee
6726 // Because we cannot tell if this is needed on the caller side, we have to
6727 // conservatively assume that it is needed. As such, make sure we have at
6728 // least enough stack space for the caller to store the 8 GPRs.
6729 unsigned NumBytes
= LinkageSize
+ 8 * PtrByteSize
;
6731 // Adjust the stack pointer for the new arguments...
6732 // These operations are automatically eliminated by the prolog/epilog
6734 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
6735 SDValue CallSeqStart
= Chain
;
6737 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
6738 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
6739 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
6741 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
6742 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
6743 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
6746 const unsigned NumGPRs
= isPPC64
? array_lengthof(GPR_64
)
6747 : array_lengthof(GPR_32
);
6748 const unsigned NumFPRs
= array_lengthof(FPR
);
6749 assert(NumFPRs
== 13 && "Only FPR 1-13 could be used for parameter passing "
6752 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
6753 unsigned GPR_idx
= 0, FPR_idx
= 0;
6755 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
6758 report_fatal_error("Handling of tail call is unimplemented!");
6761 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6762 SDValue Arg
= OutVals
[i
];
6763 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6765 // Promote integers if needed.
6766 if (Arg
.getValueType() == MVT::i1
||
6767 (isPPC64
&& Arg
.getValueType() == MVT::i32
)) {
6768 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
6769 Arg
= DAG
.getNode(ExtOp
, dl
, PtrVT
, Arg
);
6772 // Note: "by value" is code for passing a structure by value, not
6774 if (Flags
.isByVal())
6775 report_fatal_error("Passing structure by value is unimplemented!");
6777 switch (Arg
.getSimpleValueType().SimpleTy
) {
6778 default: llvm_unreachable("Unexpected ValueType for argument!");
6782 if (GPR_idx
!= NumGPRs
)
6783 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6785 report_fatal_error("Handling of placing parameters on the stack is "
6790 if (FPR_idx
!= NumFPRs
) {
6791 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6793 // If we have any FPRs remaining, we may also have GPRs remaining.
6794 // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
6796 if (GPR_idx
!= NumGPRs
)
6798 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&& !isPPC64
)
6801 report_fatal_error("Handling of placing parameters on the stack is "
6814 report_fatal_error("Handling of this parameter type is unimplemented!");
6818 if (!isFunctionGlobalAddress(Callee
) &&
6819 !isa
<ExternalSymbolSDNode
>(Callee
))
6820 report_fatal_error("Handling of indirect call is unimplemented!");
6822 // Build a sequence of copy-to-reg nodes chained together with token chain
6823 // and flag operands which copy the outgoing args into the appropriate regs.
6825 for (auto Reg
: RegsToPass
) {
6826 Chain
= DAG
.getCopyToReg(Chain
, dl
, Reg
.first
, Reg
.second
, InFlag
);
6827 InFlag
= Chain
.getValue(1);
6830 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
6831 /* unused except on PPC64 ELFv1 */ false, DAG
,
6832 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
6833 NumBytes
, Ins
, InVals
, CS
);
6837 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv
,
6838 MachineFunction
&MF
, bool isVarArg
,
6839 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6840 LLVMContext
&Context
) const {
6841 SmallVector
<CCValAssign
, 16> RVLocs
;
6842 CCState
CCInfo(CallConv
, isVarArg
, MF
, RVLocs
, Context
);
6843 return CCInfo
.CheckReturn(
6844 Outs
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
6850 PPCTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
6852 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6853 const SmallVectorImpl
<SDValue
> &OutVals
,
6854 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
6855 SmallVector
<CCValAssign
, 16> RVLocs
;
6856 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
6858 CCInfo
.AnalyzeReturn(Outs
,
6859 (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
6864 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
6866 // Copy the result values into the output registers.
6867 for (unsigned i
= 0, RealResIdx
= 0; i
!= RVLocs
.size(); ++i
, ++RealResIdx
) {
6868 CCValAssign
&VA
= RVLocs
[i
];
6869 assert(VA
.isRegLoc() && "Can only return in registers!");
6871 SDValue Arg
= OutVals
[RealResIdx
];
6873 switch (VA
.getLocInfo()) {
6874 default: llvm_unreachable("Unknown loc info!");
6875 case CCValAssign::Full
: break;
6876 case CCValAssign::AExt
:
6877 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6879 case CCValAssign::ZExt
:
6880 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6882 case CCValAssign::SExt
:
6883 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6886 if (Subtarget
.hasSPE() && VA
.getLocVT() == MVT::f64
) {
6887 bool isLittleEndian
= Subtarget
.isLittleEndian();
6888 // Legalize ret f64 -> ret 2 x i32.
6890 DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
6891 DAG
.getIntPtrConstant(isLittleEndian
? 0 : 1, dl
));
6892 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), SVal
, Flag
);
6893 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
6894 SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
6895 DAG
.getIntPtrConstant(isLittleEndian
? 1 : 0, dl
));
6896 Flag
= Chain
.getValue(1);
6897 VA
= RVLocs
[++i
]; // skip ahead to next loc
6898 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), SVal
, Flag
);
6900 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Arg
, Flag
);
6901 Flag
= Chain
.getValue(1);
6902 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
6905 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
6906 const MCPhysReg
*I
=
6907 TRI
->getCalleeSavedRegsViaCopy(&DAG
.getMachineFunction());
6911 if (PPC::G8RCRegClass
.contains(*I
))
6912 RetOps
.push_back(DAG
.getRegister(*I
, MVT::i64
));
6913 else if (PPC::F8RCRegClass
.contains(*I
))
6914 RetOps
.push_back(DAG
.getRegister(*I
, MVT::getFloatingPointVT(64)));
6915 else if (PPC::CRRCRegClass
.contains(*I
))
6916 RetOps
.push_back(DAG
.getRegister(*I
, MVT::i1
));
6917 else if (PPC::VRRCRegClass
.contains(*I
))
6918 RetOps
.push_back(DAG
.getRegister(*I
, MVT::Other
));
6920 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6924 RetOps
[0] = Chain
; // Update chain.
6926 // Add the flag if we have it.
6928 RetOps
.push_back(Flag
);
6930 return DAG
.getNode(PPCISD::RET_FLAG
, dl
, MVT::Other
, RetOps
);
6934 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op
,
6935 SelectionDAG
&DAG
) const {
6938 // Get the correct type for integers.
6939 EVT IntVT
= Op
.getValueType();
6942 SDValue Chain
= Op
.getOperand(0);
6943 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
6944 // Build a DYNAREAOFFSET node.
6945 SDValue Ops
[2] = {Chain
, FPSIdx
};
6946 SDVTList VTs
= DAG
.getVTList(IntVT
);
6947 return DAG
.getNode(PPCISD::DYNAREAOFFSET
, dl
, VTs
, Ops
);
6950 SDValue
PPCTargetLowering::LowerSTACKRESTORE(SDValue Op
,
6951 SelectionDAG
&DAG
) const {
6952 // When we pop the dynamic allocation we need to restore the SP link.
6955 // Get the correct type for pointers.
6956 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6958 // Construct the stack pointer operand.
6959 bool isPPC64
= Subtarget
.isPPC64();
6960 unsigned SP
= isPPC64
? PPC::X1
: PPC::R1
;
6961 SDValue StackPtr
= DAG
.getRegister(SP
, PtrVT
);
6963 // Get the operands for the STACKRESTORE.
6964 SDValue Chain
= Op
.getOperand(0);
6965 SDValue SaveSP
= Op
.getOperand(1);
6967 // Load the old link SP.
6968 SDValue LoadLinkSP
=
6969 DAG
.getLoad(PtrVT
, dl
, Chain
, StackPtr
, MachinePointerInfo());
6971 // Restore the stack pointer.
6972 Chain
= DAG
.getCopyToReg(LoadLinkSP
.getValue(1), dl
, SP
, SaveSP
);
6974 // Store the old link SP.
6975 return DAG
.getStore(Chain
, dl
, LoadLinkSP
, StackPtr
, MachinePointerInfo());
6978 SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG
&DAG
) const {
6979 MachineFunction
&MF
= DAG
.getMachineFunction();
6980 bool isPPC64
= Subtarget
.isPPC64();
6981 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
6983 // Get current frame pointer save index. The users of this index will be
6984 // primarily DYNALLOC instructions.
6985 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
6986 int RASI
= FI
->getReturnAddrSaveIndex();
6988 // If the frame pointer save index hasn't been defined yet.
6990 // Find out what the fix offset of the frame pointer save area.
6991 int LROffset
= Subtarget
.getFrameLowering()->getReturnSaveOffset();
6992 // Allocate the frame index for frame pointer save area.
6993 RASI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, LROffset
, false);
6995 FI
->setReturnAddrSaveIndex(RASI
);
6997 return DAG
.getFrameIndex(RASI
, PtrVT
);
7001 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG
& DAG
) const {
7002 MachineFunction
&MF
= DAG
.getMachineFunction();
7003 bool isPPC64
= Subtarget
.isPPC64();
7004 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7006 // Get current frame pointer save index. The users of this index will be
7007 // primarily DYNALLOC instructions.
7008 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
7009 int FPSI
= FI
->getFramePointerSaveIndex();
7011 // If the frame pointer save index hasn't been defined yet.
7013 // Find out what the fix offset of the frame pointer save area.
7014 int FPOffset
= Subtarget
.getFrameLowering()->getFramePointerSaveOffset();
7015 // Allocate the frame index for frame pointer save area.
7016 FPSI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, FPOffset
, true);
7018 FI
->setFramePointerSaveIndex(FPSI
);
7020 return DAG
.getFrameIndex(FPSI
, PtrVT
);
7023 SDValue
PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
7024 SelectionDAG
&DAG
) const {
7026 SDValue Chain
= Op
.getOperand(0);
7027 SDValue Size
= Op
.getOperand(1);
7030 // Get the correct type for pointers.
7031 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7033 SDValue NegSize
= DAG
.getNode(ISD::SUB
, dl
, PtrVT
,
7034 DAG
.getConstant(0, dl
, PtrVT
), Size
);
7035 // Construct a node for the frame pointer save index.
7036 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
7037 // Build a DYNALLOC node.
7038 SDValue Ops
[3] = { Chain
, NegSize
, FPSIdx
};
7039 SDVTList VTs
= DAG
.getVTList(PtrVT
, MVT::Other
);
7040 return DAG
.getNode(PPCISD::DYNALLOC
, dl
, VTs
, Ops
);
7043 SDValue
PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op
,
7044 SelectionDAG
&DAG
) const {
7045 MachineFunction
&MF
= DAG
.getMachineFunction();
7047 bool isPPC64
= Subtarget
.isPPC64();
7048 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7050 int FI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, 0, false);
7051 return DAG
.getFrameIndex(FI
, PtrVT
);
7054 SDValue
PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op
,
7055 SelectionDAG
&DAG
) const {
7057 return DAG
.getNode(PPCISD::EH_SJLJ_SETJMP
, DL
,
7058 DAG
.getVTList(MVT::i32
, MVT::Other
),
7059 Op
.getOperand(0), Op
.getOperand(1));
7062 SDValue
PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op
,
7063 SelectionDAG
&DAG
) const {
7065 return DAG
.getNode(PPCISD::EH_SJLJ_LONGJMP
, DL
, MVT::Other
,
7066 Op
.getOperand(0), Op
.getOperand(1));
7069 SDValue
PPCTargetLowering::LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
7070 if (Op
.getValueType().isVector())
7071 return LowerVectorLoad(Op
, DAG
);
7073 assert(Op
.getValueType() == MVT::i1
&&
7074 "Custom lowering only for i1 loads");
7076 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7079 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op
);
7081 SDValue Chain
= LD
->getChain();
7082 SDValue BasePtr
= LD
->getBasePtr();
7083 MachineMemOperand
*MMO
= LD
->getMemOperand();
7086 DAG
.getExtLoad(ISD::EXTLOAD
, dl
, getPointerTy(DAG
.getDataLayout()), Chain
,
7087 BasePtr
, MVT::i8
, MMO
);
7088 SDValue Result
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewLD
);
7090 SDValue Ops
[] = { Result
, SDValue(NewLD
.getNode(), 1) };
7091 return DAG
.getMergeValues(Ops
, dl
);
7094 SDValue
PPCTargetLowering::LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
7095 if (Op
.getOperand(1).getValueType().isVector())
7096 return LowerVectorStore(Op
, DAG
);
7098 assert(Op
.getOperand(1).getValueType() == MVT::i1
&&
7099 "Custom lowering only for i1 stores");
7101 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7104 StoreSDNode
*ST
= cast
<StoreSDNode
>(Op
);
7106 SDValue Chain
= ST
->getChain();
7107 SDValue BasePtr
= ST
->getBasePtr();
7108 SDValue Value
= ST
->getValue();
7109 MachineMemOperand
*MMO
= ST
->getMemOperand();
7111 Value
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, getPointerTy(DAG
.getDataLayout()),
7113 return DAG
.getTruncStore(Chain
, dl
, Value
, BasePtr
, MVT::i8
, MMO
);
7116 // FIXME: Remove this once the ANDI glue bug is fixed:
7117 SDValue
PPCTargetLowering::LowerTRUNCATE(SDValue Op
, SelectionDAG
&DAG
) const {
7118 assert(Op
.getValueType() == MVT::i1
&&
7119 "Custom lowering only for i1 results");
7122 return DAG
.getNode(PPCISD::ANDIo_1_GT_BIT
, DL
, MVT::i1
,
7126 SDValue
PPCTargetLowering::LowerTRUNCATEVector(SDValue Op
,
7127 SelectionDAG
&DAG
) const {
7129 // Implements a vector truncate that fits in a vector register as a shuffle.
7130 // We want to legalize vector truncates down to where the source fits in
7131 // a vector register (and target is therefore smaller than vector register
7132 // size). At that point legalization will try to custom lower the sub-legal
7133 // result and get here - where we can contain the truncate as a single target
7136 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7137 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7139 // We will implement it for big-endian ordering as this (where x denotes
7141 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7142 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7144 // The same operation in little-endian ordering will be:
7145 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7146 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7148 assert(Op
.getValueType().isVector() && "Vector type expected.");
7151 SDValue N1
= Op
.getOperand(0);
7152 unsigned SrcSize
= N1
.getValueType().getSizeInBits();
7153 assert(SrcSize
<= 128 && "Source must fit in an Altivec/VSX vector");
7154 SDValue WideSrc
= SrcSize
== 128 ? N1
: widenVec(DAG
, N1
, DL
);
7156 EVT TrgVT
= Op
.getValueType();
7157 unsigned TrgNumElts
= TrgVT
.getVectorNumElements();
7158 EVT EltVT
= TrgVT
.getVectorElementType();
7159 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
7160 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
7162 // First list the elements we want to keep.
7163 unsigned SizeMult
= SrcSize
/ TrgVT
.getSizeInBits();
7164 SmallVector
<int, 16> ShuffV
;
7165 if (Subtarget
.isLittleEndian())
7166 for (unsigned i
= 0; i
< TrgNumElts
; ++i
)
7167 ShuffV
.push_back(i
* SizeMult
);
7169 for (unsigned i
= 1; i
<= TrgNumElts
; ++i
)
7170 ShuffV
.push_back(i
* SizeMult
- 1);
7172 // Populate the remaining elements with undefs.
7173 for (unsigned i
= TrgNumElts
; i
< WideNumElts
; ++i
)
7174 // ShuffV.push_back(i + WideNumElts);
7175 ShuffV
.push_back(WideNumElts
+ 1);
7177 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, DL
, WideVT
, WideSrc
);
7178 return DAG
.getVectorShuffle(WideVT
, DL
, Conv
, DAG
.getUNDEF(WideVT
), ShuffV
);
7181 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7183 SDValue
PPCTargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const {
7184 // Not FP? Not a fsel.
7185 if (!Op
.getOperand(0).getValueType().isFloatingPoint() ||
7186 !Op
.getOperand(2).getValueType().isFloatingPoint())
7189 // We might be able to do better than this under some circumstances, but in
7190 // general, fsel-based lowering of select is a finite-math-only optimization.
7191 // For more information, see section F.3 of the 2.06 ISA specification.
7192 if (!DAG
.getTarget().Options
.NoInfsFPMath
||
7193 !DAG
.getTarget().Options
.NoNaNsFPMath
)
7195 // TODO: Propagate flags from the select rather than global settings.
7197 Flags
.setNoInfs(true);
7198 Flags
.setNoNaNs(true);
7200 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
7202 EVT ResVT
= Op
.getValueType();
7203 EVT CmpVT
= Op
.getOperand(0).getValueType();
7204 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
7205 SDValue TV
= Op
.getOperand(2), FV
= Op
.getOperand(3);
7208 // If the RHS of the comparison is a 0.0, we don't need to do the
7209 // subtraction at all.
7211 if (isFloatingPointZero(RHS
))
7213 default: break; // SETUO etc aren't handled by fsel.
7218 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7219 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
7220 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
7221 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7222 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
7223 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
7224 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), Sel1
, FV
);
7227 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
7231 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7232 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
7233 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
7236 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
7240 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7241 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
7242 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
7243 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), TV
, FV
);
7248 default: break; // SETUO etc aren't handled by fsel.
7253 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
7254 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7255 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7256 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
7257 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7258 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
7259 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
7260 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, Cmp
), Sel1
, FV
);
7263 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
7264 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7265 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7266 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
7269 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
7270 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7271 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7272 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
7275 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
7276 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7277 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7278 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
7281 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
7282 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7283 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7284 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
7289 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op
, ReuseLoadInfo
&RLI
,
7291 const SDLoc
&dl
) const {
7292 assert(Op
.getOperand(0).getValueType().isFloatingPoint());
7293 SDValue Src
= Op
.getOperand(0);
7294 if (Src
.getValueType() == MVT::f32
)
7295 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
7298 switch (Op
.getSimpleValueType().SimpleTy
) {
7299 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7302 Op
.getOpcode() == ISD::FP_TO_SINT
7304 : (Subtarget
.hasFPCVT() ? PPCISD::FCTIWUZ
: PPCISD::FCTIDZ
),
7308 assert((Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT()) &&
7309 "i64 FP_TO_UINT is supported only with FPCVT");
7310 Tmp
= DAG
.getNode(Op
.getOpcode()==ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
7316 // Convert the FP value to an int value through memory.
7317 bool i32Stack
= Op
.getValueType() == MVT::i32
&& Subtarget
.hasSTFIWX() &&
7318 (Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT());
7319 SDValue FIPtr
= DAG
.CreateStackTemporary(i32Stack
? MVT::i32
: MVT::f64
);
7320 int FI
= cast
<FrameIndexSDNode
>(FIPtr
)->getIndex();
7321 MachinePointerInfo MPI
=
7322 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
);
7324 // Emit a store to the stack slot.
7327 MachineFunction
&MF
= DAG
.getMachineFunction();
7328 MachineMemOperand
*MMO
=
7329 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOStore
, 4, 4);
7330 SDValue Ops
[] = { DAG
.getEntryNode(), Tmp
, FIPtr
};
7331 Chain
= DAG
.getMemIntrinsicNode(PPCISD::STFIWX
, dl
,
7332 DAG
.getVTList(MVT::Other
), Ops
, MVT::i32
, MMO
);
7334 Chain
= DAG
.getStore(DAG
.getEntryNode(), dl
, Tmp
, FIPtr
, MPI
);
7336 // Result is a load from the stack slot. If loading 4 bytes, make sure to
7337 // add in a bias on big endian.
7338 if (Op
.getValueType() == MVT::i32
&& !i32Stack
) {
7339 FIPtr
= DAG
.getNode(ISD::ADD
, dl
, FIPtr
.getValueType(), FIPtr
,
7340 DAG
.getConstant(4, dl
, FIPtr
.getValueType()));
7341 MPI
= MPI
.getWithOffset(Subtarget
.isLittleEndian() ? 0 : 4);
7349 /// Custom lowers floating point to integer conversions to use
7350 /// the direct move instructions available in ISA 2.07 to avoid the
7351 /// need for load/store combinations.
7352 SDValue
PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op
,
7354 const SDLoc
&dl
) const {
7355 assert(Op
.getOperand(0).getValueType().isFloatingPoint());
7356 SDValue Src
= Op
.getOperand(0);
7358 if (Src
.getValueType() == MVT::f32
)
7359 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
7362 switch (Op
.getSimpleValueType().SimpleTy
) {
7363 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7366 Op
.getOpcode() == ISD::FP_TO_SINT
7368 : (Subtarget
.hasFPCVT() ? PPCISD::FCTIWUZ
: PPCISD::FCTIDZ
),
7370 Tmp
= DAG
.getNode(PPCISD::MFVSR
, dl
, MVT::i32
, Tmp
);
7373 assert((Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT()) &&
7374 "i64 FP_TO_UINT is supported only with FPCVT");
7375 Tmp
= DAG
.getNode(Op
.getOpcode()==ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
7378 Tmp
= DAG
.getNode(PPCISD::MFVSR
, dl
, MVT::i64
, Tmp
);
7384 SDValue
PPCTargetLowering::LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
,
7385 const SDLoc
&dl
) const {
7387 // FP to INT conversions are legal for f128.
7388 if (EnableQuadPrecision
&& (Op
->getOperand(0).getValueType() == MVT::f128
))
7391 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7392 // PPC (the libcall is not available).
7393 if (Op
.getOperand(0).getValueType() == MVT::ppcf128
) {
7394 if (Op
.getValueType() == MVT::i32
) {
7395 if (Op
.getOpcode() == ISD::FP_TO_SINT
) {
7396 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
7397 MVT::f64
, Op
.getOperand(0),
7398 DAG
.getIntPtrConstant(0, dl
));
7399 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
7400 MVT::f64
, Op
.getOperand(0),
7401 DAG
.getIntPtrConstant(1, dl
));
7403 // Add the two halves of the long double in round-to-zero mode.
7404 SDValue Res
= DAG
.getNode(PPCISD::FADDRTZ
, dl
, MVT::f64
, Lo
, Hi
);
7406 // Now use a smaller FP_TO_SINT.
7407 return DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, Res
);
7409 if (Op
.getOpcode() == ISD::FP_TO_UINT
) {
7410 const uint64_t TwoE31
[] = {0x41e0000000000000LL
, 0};
7411 APFloat APF
= APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31
));
7412 SDValue Tmp
= DAG
.getConstantFP(APF
, dl
, MVT::ppcf128
);
7413 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7414 // FIXME: generated code sucks.
7415 // TODO: Are there fast-math-flags to propagate to this FSUB?
7416 SDValue True
= DAG
.getNode(ISD::FSUB
, dl
, MVT::ppcf128
,
7417 Op
.getOperand(0), Tmp
);
7418 True
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, True
);
7419 True
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, True
,
7420 DAG
.getConstant(0x80000000, dl
, MVT::i32
));
7421 SDValue False
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
,
7423 return DAG
.getSelectCC(dl
, Op
.getOperand(0), Tmp
, True
, False
,
7431 if (Subtarget
.hasDirectMove() && Subtarget
.isPPC64())
7432 return LowerFP_TO_INTDirectMove(Op
, DAG
, dl
);
7435 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
7437 return DAG
.getLoad(Op
.getValueType(), dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
7438 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
7441 // We're trying to insert a regular store, S, and then a load, L. If the
7442 // incoming value, O, is a load, we might just be able to have our load use the
7443 // address used by O. However, we don't know if anything else will store to
7444 // that address before we can load from it. To prevent this situation, we need
7445 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7446 // the same chain operand as O, we create a token factor from the chain results
7447 // of O and L, and we replace all uses of O's chain result with that token
7448 // factor (see spliceIntoChain below for this last part).
7449 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op
, EVT MemVT
,
7452 ISD::LoadExtType ET
) const {
7454 if (ET
== ISD::NON_EXTLOAD
&&
7455 (Op
.getOpcode() == ISD::FP_TO_UINT
||
7456 Op
.getOpcode() == ISD::FP_TO_SINT
) &&
7457 isOperationLegalOrCustom(Op
.getOpcode(),
7458 Op
.getOperand(0).getValueType())) {
7460 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
7464 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
);
7465 if (!LD
|| LD
->getExtensionType() != ET
|| LD
->isVolatile() ||
7466 LD
->isNonTemporal())
7468 if (LD
->getMemoryVT() != MemVT
)
7471 RLI
.Ptr
= LD
->getBasePtr();
7472 if (LD
->isIndexed() && !LD
->getOffset().isUndef()) {
7473 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
7474 "Non-pre-inc AM on PPC?");
7475 RLI
.Ptr
= DAG
.getNode(ISD::ADD
, dl
, RLI
.Ptr
.getValueType(), RLI
.Ptr
,
7479 RLI
.Chain
= LD
->getChain();
7480 RLI
.MPI
= LD
->getPointerInfo();
7481 RLI
.IsDereferenceable
= LD
->isDereferenceable();
7482 RLI
.IsInvariant
= LD
->isInvariant();
7483 RLI
.Alignment
= LD
->getAlignment();
7484 RLI
.AAInfo
= LD
->getAAInfo();
7485 RLI
.Ranges
= LD
->getRanges();
7487 RLI
.ResChain
= SDValue(LD
, LD
->isIndexed() ? 2 : 1);
7491 // Given the head of the old chain, ResChain, insert a token factor containing
7492 // it and NewResChain, and make users of ResChain now be users of that token
7494 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7495 void PPCTargetLowering::spliceIntoChain(SDValue ResChain
,
7496 SDValue NewResChain
,
7497 SelectionDAG
&DAG
) const {
7501 SDLoc
dl(NewResChain
);
7503 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
7504 NewResChain
, DAG
.getUNDEF(MVT::Other
));
7505 assert(TF
.getNode() != NewResChain
.getNode() &&
7506 "A new TF really is required here");
7508 DAG
.ReplaceAllUsesOfValueWith(ResChain
, TF
);
7509 DAG
.UpdateNodeOperands(TF
.getNode(), ResChain
, NewResChain
);
7512 /// Analyze profitability of direct move
7513 /// prefer float load to int load plus direct move
7514 /// when there is no integer use of int load
7515 bool PPCTargetLowering::directMoveIsProfitable(const SDValue
&Op
) const {
7516 SDNode
*Origin
= Op
.getOperand(0).getNode();
7517 if (Origin
->getOpcode() != ISD::LOAD
)
7520 // If there is no LXSIBZX/LXSIHZX, like Power8,
7521 // prefer direct move if the memory size is 1 or 2 bytes.
7522 MachineMemOperand
*MMO
= cast
<LoadSDNode
>(Origin
)->getMemOperand();
7523 if (!Subtarget
.hasP9Vector() && MMO
->getSize() <= 2)
7526 for (SDNode::use_iterator UI
= Origin
->use_begin(),
7527 UE
= Origin
->use_end();
7530 // Only look at the users of the loaded value.
7531 if (UI
.getUse().get().getResNo() != 0)
7534 if (UI
->getOpcode() != ISD::SINT_TO_FP
&&
7535 UI
->getOpcode() != ISD::UINT_TO_FP
)
7542 /// Custom lowers integer to floating point conversions to use
7543 /// the direct move instructions available in ISA 2.07 to avoid the
7544 /// need for load/store combinations.
7545 SDValue
PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op
,
7547 const SDLoc
&dl
) const {
7548 assert((Op
.getValueType() == MVT::f32
||
7549 Op
.getValueType() == MVT::f64
) &&
7550 "Invalid floating point type as target of conversion");
7551 assert(Subtarget
.hasFPCVT() &&
7552 "Int to FP conversions with direct moves require FPCVT");
7554 SDValue Src
= Op
.getOperand(0);
7555 bool SinglePrec
= Op
.getValueType() == MVT::f32
;
7556 bool WordInt
= Src
.getSimpleValueType().SimpleTy
== MVT::i32
;
7557 bool Signed
= Op
.getOpcode() == ISD::SINT_TO_FP
;
7558 unsigned ConvOp
= Signed
? (SinglePrec
? PPCISD::FCFIDS
: PPCISD::FCFID
) :
7559 (SinglePrec
? PPCISD::FCFIDUS
: PPCISD::FCFIDU
);
7562 FP
= DAG
.getNode(Signed
? PPCISD::MTVSRA
: PPCISD::MTVSRZ
,
7564 FP
= DAG
.getNode(ConvOp
, dl
, SinglePrec
? MVT::f32
: MVT::f64
, FP
);
7567 FP
= DAG
.getNode(PPCISD::MTVSRA
, dl
, MVT::f64
, Src
);
7568 FP
= DAG
.getNode(ConvOp
, dl
, SinglePrec
? MVT::f32
: MVT::f64
, FP
);
7574 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
) {
7576 EVT VecVT
= Vec
.getValueType();
7577 assert(VecVT
.isVector() && "Expected a vector type.");
7578 assert(VecVT
.getSizeInBits() < 128 && "Vector is already full width.");
7580 EVT EltVT
= VecVT
.getVectorElementType();
7581 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
7582 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
7584 unsigned NumConcat
= WideNumElts
/ VecVT
.getVectorNumElements();
7585 SmallVector
<SDValue
, 16> Ops(NumConcat
);
7587 SDValue UndefVec
= DAG
.getUNDEF(VecVT
);
7588 for (unsigned i
= 1; i
< NumConcat
; ++i
)
7591 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, WideVT
, Ops
);
7594 SDValue
PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op
, SelectionDAG
&DAG
,
7595 const SDLoc
&dl
) const {
7597 unsigned Opc
= Op
.getOpcode();
7598 assert((Opc
== ISD::UINT_TO_FP
|| Opc
== ISD::SINT_TO_FP
) &&
7599 "Unexpected conversion type");
7600 assert((Op
.getValueType() == MVT::v2f64
|| Op
.getValueType() == MVT::v4f32
) &&
7601 "Supports conversions to v2f64/v4f32 only.");
7603 bool SignedConv
= Opc
== ISD::SINT_TO_FP
;
7604 bool FourEltRes
= Op
.getValueType() == MVT::v4f32
;
7606 SDValue Wide
= widenVec(DAG
, Op
.getOperand(0), dl
);
7607 EVT WideVT
= Wide
.getValueType();
7608 unsigned WideNumElts
= WideVT
.getVectorNumElements();
7609 MVT IntermediateVT
= FourEltRes
? MVT::v4i32
: MVT::v2i64
;
7611 SmallVector
<int, 16> ShuffV
;
7612 for (unsigned i
= 0; i
< WideNumElts
; ++i
)
7613 ShuffV
.push_back(i
+ WideNumElts
);
7615 int Stride
= FourEltRes
? WideNumElts
/ 4 : WideNumElts
/ 2;
7616 int SaveElts
= FourEltRes
? 4 : 2;
7617 if (Subtarget
.isLittleEndian())
7618 for (int i
= 0; i
< SaveElts
; i
++)
7619 ShuffV
[i
* Stride
] = i
;
7621 for (int i
= 1; i
<= SaveElts
; i
++)
7622 ShuffV
[i
* Stride
- 1] = i
- 1;
7624 SDValue ShuffleSrc2
=
7625 SignedConv
? DAG
.getUNDEF(WideVT
) : DAG
.getConstant(0, dl
, WideVT
);
7626 SDValue Arrange
= DAG
.getVectorShuffle(WideVT
, dl
, Wide
, ShuffleSrc2
, ShuffV
);
7628 SignedConv
? (unsigned)PPCISD::SExtVElems
: (unsigned)ISD::BITCAST
;
7631 if (!Subtarget
.hasP9Altivec() && SignedConv
) {
7632 Arrange
= DAG
.getBitcast(IntermediateVT
, Arrange
);
7633 Extend
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, IntermediateVT
, Arrange
,
7634 DAG
.getValueType(Op
.getOperand(0).getValueType()));
7636 Extend
= DAG
.getNode(ExtendOp
, dl
, IntermediateVT
, Arrange
);
7638 return DAG
.getNode(Opc
, dl
, Op
.getValueType(), Extend
);
7641 SDValue
PPCTargetLowering::LowerINT_TO_FP(SDValue Op
,
7642 SelectionDAG
&DAG
) const {
7645 EVT InVT
= Op
.getOperand(0).getValueType();
7646 EVT OutVT
= Op
.getValueType();
7647 if (OutVT
.isVector() && OutVT
.isFloatingPoint() &&
7648 isOperationCustom(Op
.getOpcode(), InVT
))
7649 return LowerINT_TO_FPVector(Op
, DAG
, dl
);
7651 // Conversions to f128 are legal.
7652 if (EnableQuadPrecision
&& (Op
.getValueType() == MVT::f128
))
7655 if (Subtarget
.hasQPX() && Op
.getOperand(0).getValueType() == MVT::v4i1
) {
7656 if (Op
.getValueType() != MVT::v4f32
&& Op
.getValueType() != MVT::v4f64
)
7659 SDValue Value
= Op
.getOperand(0);
7660 // The values are now known to be -1 (false) or 1 (true). To convert this
7661 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7662 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7663 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
7665 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
7667 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
7669 if (Op
.getValueType() != MVT::v4f64
)
7670 Value
= DAG
.getNode(ISD::FP_ROUND
, dl
,
7671 Op
.getValueType(), Value
,
7672 DAG
.getIntPtrConstant(1, dl
));
7676 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7677 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
7680 if (Op
.getOperand(0).getValueType() == MVT::i1
)
7681 return DAG
.getNode(ISD::SELECT
, dl
, Op
.getValueType(), Op
.getOperand(0),
7682 DAG
.getConstantFP(1.0, dl
, Op
.getValueType()),
7683 DAG
.getConstantFP(0.0, dl
, Op
.getValueType()));
7685 // If we have direct moves, we can do all the conversion, skip the store/load
7686 // however, without FPCVT we can't do most conversions.
7687 if (Subtarget
.hasDirectMove() && directMoveIsProfitable(Op
) &&
7688 Subtarget
.isPPC64() && Subtarget
.hasFPCVT())
7689 return LowerINT_TO_FPDirectMove(Op
, DAG
, dl
);
7691 assert((Op
.getOpcode() == ISD::SINT_TO_FP
|| Subtarget
.hasFPCVT()) &&
7692 "UINT_TO_FP is supported only with FPCVT");
7694 // If we have FCFIDS, then use it when converting to single-precision.
7695 // Otherwise, convert to double-precision and then round.
7696 unsigned FCFOp
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
7697 ? (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDUS
7699 : (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDU
7701 MVT FCFTy
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
7705 if (Op
.getOperand(0).getValueType() == MVT::i64
) {
7706 SDValue SINT
= Op
.getOperand(0);
7707 // When converting to single-precision, we actually need to convert
7708 // to double-precision first and then round to single-precision.
7709 // To avoid double-rounding effects during that operation, we have
7710 // to prepare the input operand. Bits that might be truncated when
7711 // converting to double-precision are replaced by a bit that won't
7712 // be lost at this stage, but is below the single-precision rounding
7715 // However, if -enable-unsafe-fp-math is in effect, accept double
7716 // rounding to avoid the extra overhead.
7717 if (Op
.getValueType() == MVT::f32
&&
7718 !Subtarget
.hasFPCVT() &&
7719 !DAG
.getTarget().Options
.UnsafeFPMath
) {
7721 // Twiddle input to make sure the low 11 bits are zero. (If this
7722 // is the case, we are guaranteed the value will fit into the 53 bit
7723 // mantissa of an IEEE double-precision value without rounding.)
7724 // If any of those low 11 bits were not zero originally, make sure
7725 // bit 12 (value 2048) is set instead, so that the final rounding
7726 // to single-precision gets the correct result.
7727 SDValue Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
7728 SINT
, DAG
.getConstant(2047, dl
, MVT::i64
));
7729 Round
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
7730 Round
, DAG
.getConstant(2047, dl
, MVT::i64
));
7731 Round
= DAG
.getNode(ISD::OR
, dl
, MVT::i64
, Round
, SINT
);
7732 Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
7733 Round
, DAG
.getConstant(-2048, dl
, MVT::i64
));
7735 // However, we cannot use that value unconditionally: if the magnitude
7736 // of the input value is small, the bit-twiddling we did above might
7737 // end up visibly changing the output. Fortunately, in that case, we
7738 // don't need to twiddle bits since the original input will convert
7739 // exactly to double-precision floating-point already. Therefore,
7740 // construct a conditional to use the original value if the top 11
7741 // bits are all sign-bit copies, and use the rounded value computed
7743 SDValue Cond
= DAG
.getNode(ISD::SRA
, dl
, MVT::i64
,
7744 SINT
, DAG
.getConstant(53, dl
, MVT::i32
));
7745 Cond
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
7746 Cond
, DAG
.getConstant(1, dl
, MVT::i64
));
7747 Cond
= DAG
.getSetCC(dl
, MVT::i32
,
7748 Cond
, DAG
.getConstant(1, dl
, MVT::i64
), ISD::SETUGT
);
7750 SINT
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i64
, Cond
, Round
, SINT
);
7756 MachineFunction
&MF
= DAG
.getMachineFunction();
7757 if (canReuseLoadAddress(SINT
, MVT::i64
, RLI
, DAG
)) {
7758 Bits
= DAG
.getLoad(MVT::f64
, dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
7759 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
7760 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7761 } else if (Subtarget
.hasLFIWAX() &&
7762 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::SEXTLOAD
)) {
7763 MachineMemOperand
*MMO
=
7764 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7765 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7766 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7767 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWAX
, dl
,
7768 DAG
.getVTList(MVT::f64
, MVT::Other
),
7769 Ops
, MVT::i32
, MMO
);
7770 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7771 } else if (Subtarget
.hasFPCVT() &&
7772 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::ZEXTLOAD
)) {
7773 MachineMemOperand
*MMO
=
7774 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7775 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7776 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7777 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWZX
, dl
,
7778 DAG
.getVTList(MVT::f64
, MVT::Other
),
7779 Ops
, MVT::i32
, MMO
);
7780 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7781 } else if (((Subtarget
.hasLFIWAX() &&
7782 SINT
.getOpcode() == ISD::SIGN_EXTEND
) ||
7783 (Subtarget
.hasFPCVT() &&
7784 SINT
.getOpcode() == ISD::ZERO_EXTEND
)) &&
7785 SINT
.getOperand(0).getValueType() == MVT::i32
) {
7786 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7787 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7789 int FrameIdx
= MFI
.CreateStackObject(4, 4, false);
7790 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7793 DAG
.getStore(DAG
.getEntryNode(), dl
, SINT
.getOperand(0), FIdx
,
7794 MachinePointerInfo::getFixedStack(
7795 DAG
.getMachineFunction(), FrameIdx
));
7797 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
7798 "Expected an i32 store");
7803 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
7806 MachineMemOperand
*MMO
=
7807 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7808 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7809 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7810 Bits
= DAG
.getMemIntrinsicNode(SINT
.getOpcode() == ISD::ZERO_EXTEND
?
7811 PPCISD::LFIWZX
: PPCISD::LFIWAX
,
7812 dl
, DAG
.getVTList(MVT::f64
, MVT::Other
),
7813 Ops
, MVT::i32
, MMO
);
7815 Bits
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, SINT
);
7817 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Bits
);
7819 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT())
7820 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
,
7821 MVT::f32
, FP
, DAG
.getIntPtrConstant(0, dl
));
7825 assert(Op
.getOperand(0).getValueType() == MVT::i32
&&
7826 "Unhandled INT_TO_FP type in custom expander!");
7827 // Since we only generate this in 64-bit mode, we can take advantage of
7828 // 64-bit registers. In particular, sign extend the input value into the
7829 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7830 // then lfd it and fcfid it.
7831 MachineFunction
&MF
= DAG
.getMachineFunction();
7832 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7833 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7836 if (Subtarget
.hasLFIWAX() || Subtarget
.hasFPCVT()) {
7839 if (!(ReusingLoad
= canReuseLoadAddress(Op
.getOperand(0), MVT::i32
, RLI
,
7841 int FrameIdx
= MFI
.CreateStackObject(4, 4, false);
7842 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7845 DAG
.getStore(DAG
.getEntryNode(), dl
, Op
.getOperand(0), FIdx
,
7846 MachinePointerInfo::getFixedStack(
7847 DAG
.getMachineFunction(), FrameIdx
));
7849 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
7850 "Expected an i32 store");
7855 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
7859 MachineMemOperand
*MMO
=
7860 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7861 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7862 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7863 Ld
= DAG
.getMemIntrinsicNode(Op
.getOpcode() == ISD::UINT_TO_FP
?
7864 PPCISD::LFIWZX
: PPCISD::LFIWAX
,
7865 dl
, DAG
.getVTList(MVT::f64
, MVT::Other
),
7866 Ops
, MVT::i32
, MMO
);
7868 spliceIntoChain(RLI
.ResChain
, Ld
.getValue(1), DAG
);
7870 assert(Subtarget
.isPPC64() &&
7871 "i32->FP without LFIWAX supported only on PPC64");
7873 int FrameIdx
= MFI
.CreateStackObject(8, 8, false);
7874 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7876 SDValue Ext64
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i64
,
7879 // STD the extended value into the stack slot.
7880 SDValue Store
= DAG
.getStore(
7881 DAG
.getEntryNode(), dl
, Ext64
, FIdx
,
7882 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
7884 // Load the value as a double.
7886 MVT::f64
, dl
, Store
, FIdx
,
7887 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
7890 // FCFID it and return it.
7891 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Ld
);
7892 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT())
7893 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, FP
,
7894 DAG
.getIntPtrConstant(0, dl
));
7898 SDValue
PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op
,
7899 SelectionDAG
&DAG
) const {
7902 The rounding mode is in bits 30:31 of FPSR, and has the following
7909 FLT_ROUNDS, on the other hand, expects the following:
7916 To perform the conversion, we do:
7917 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7920 MachineFunction
&MF
= DAG
.getMachineFunction();
7921 EVT VT
= Op
.getValueType();
7922 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7924 // Save FP Control Word to register
7926 MVT::f64
, // return register
7927 MVT::Glue
// unused in this context
7929 SDValue Chain
= DAG
.getNode(PPCISD::MFFS
, dl
, NodeTys
, None
);
7931 // Save FP register to stack slot
7932 int SSFI
= MF
.getFrameInfo().CreateStackObject(8, 8, false);
7933 SDValue StackSlot
= DAG
.getFrameIndex(SSFI
, PtrVT
);
7934 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Chain
, StackSlot
,
7935 MachinePointerInfo());
7937 // Load FP Control Word from low 32 bits of stack slot.
7938 SDValue Four
= DAG
.getConstant(4, dl
, PtrVT
);
7939 SDValue Addr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackSlot
, Four
);
7940 SDValue CWD
= DAG
.getLoad(MVT::i32
, dl
, Store
, Addr
, MachinePointerInfo());
7942 // Transform as necessary
7944 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
7945 CWD
, DAG
.getConstant(3, dl
, MVT::i32
));
7947 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
7948 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
7949 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
,
7950 CWD
, DAG
.getConstant(3, dl
, MVT::i32
)),
7951 DAG
.getConstant(3, dl
, MVT::i32
)),
7952 DAG
.getConstant(1, dl
, MVT::i32
));
7955 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, CWD1
, CWD2
);
7957 return DAG
.getNode((VT
.getSizeInBits() < 16 ?
7958 ISD::TRUNCATE
: ISD::ZERO_EXTEND
), dl
, VT
, RetVal
);
7961 SDValue
PPCTargetLowering::LowerSHL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
7962 EVT VT
= Op
.getValueType();
7963 unsigned BitWidth
= VT
.getSizeInBits();
7965 assert(Op
.getNumOperands() == 3 &&
7966 VT
== Op
.getOperand(1).getValueType() &&
7969 // Expand into a bunch of logical ops. Note that these ops
7970 // depend on the PPC behavior for oversized shift amounts.
7971 SDValue Lo
= Op
.getOperand(0);
7972 SDValue Hi
= Op
.getOperand(1);
7973 SDValue Amt
= Op
.getOperand(2);
7974 EVT AmtVT
= Amt
.getValueType();
7976 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
7977 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
7978 SDValue Tmp2
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Amt
);
7979 SDValue Tmp3
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Tmp1
);
7980 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
7981 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
7982 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
7983 SDValue Tmp6
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Tmp5
);
7984 SDValue OutHi
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
7985 SDValue OutLo
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Amt
);
7986 SDValue OutOps
[] = { OutLo
, OutHi
};
7987 return DAG
.getMergeValues(OutOps
, dl
);
7990 SDValue
PPCTargetLowering::LowerSRL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
7991 EVT VT
= Op
.getValueType();
7993 unsigned BitWidth
= VT
.getSizeInBits();
7994 assert(Op
.getNumOperands() == 3 &&
7995 VT
== Op
.getOperand(1).getValueType() &&
7998 // Expand into a bunch of logical ops. Note that these ops
7999 // depend on the PPC behavior for oversized shift amounts.
8000 SDValue Lo
= Op
.getOperand(0);
8001 SDValue Hi
= Op
.getOperand(1);
8002 SDValue Amt
= Op
.getOperand(2);
8003 EVT AmtVT
= Amt
.getValueType();
8005 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
8006 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
8007 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
8008 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
8009 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
8010 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
8011 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
8012 SDValue Tmp6
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Tmp5
);
8013 SDValue OutLo
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
8014 SDValue OutHi
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Amt
);
8015 SDValue OutOps
[] = { OutLo
, OutHi
};
8016 return DAG
.getMergeValues(OutOps
, dl
);
8019 SDValue
PPCTargetLowering::LowerSRA_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
8021 EVT VT
= Op
.getValueType();
8022 unsigned BitWidth
= VT
.getSizeInBits();
8023 assert(Op
.getNumOperands() == 3 &&
8024 VT
== Op
.getOperand(1).getValueType() &&
8027 // Expand into a bunch of logical ops, followed by a select_cc.
8028 SDValue Lo
= Op
.getOperand(0);
8029 SDValue Hi
= Op
.getOperand(1);
8030 SDValue Amt
= Op
.getOperand(2);
8031 EVT AmtVT
= Amt
.getValueType();
8033 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
8034 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
8035 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
8036 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
8037 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
8038 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
8039 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
8040 SDValue Tmp6
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Tmp5
);
8041 SDValue OutHi
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Amt
);
8042 SDValue OutLo
= DAG
.getSelectCC(dl
, Tmp5
, DAG
.getConstant(0, dl
, AmtVT
),
8043 Tmp4
, Tmp6
, ISD::SETLE
);
8044 SDValue OutOps
[] = { OutLo
, OutHi
};
8045 return DAG
.getMergeValues(OutOps
, dl
);
8048 //===----------------------------------------------------------------------===//
8049 // Vector related lowering.
8052 /// BuildSplatI - Build a canonical splati of Val with an element size of
8053 /// SplatSize. Cast the result to VT.
8054 static SDValue
BuildSplatI(int Val
, unsigned SplatSize
, EVT VT
,
8055 SelectionDAG
&DAG
, const SDLoc
&dl
) {
8056 assert(Val
>= -16 && Val
<= 15 && "vsplti is out of range!");
8058 static const MVT VTys
[] = { // canonical VT to use for each size.
8059 MVT::v16i8
, MVT::v8i16
, MVT::Other
, MVT::v4i32
8062 EVT ReqVT
= VT
!= MVT::Other
? VT
: VTys
[SplatSize
-1];
8064 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8068 EVT CanonicalVT
= VTys
[SplatSize
-1];
8070 // Build a canonical splat for this value.
8071 return DAG
.getBitcast(ReqVT
, DAG
.getConstant(Val
, dl
, CanonicalVT
));
8074 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8075 /// specified intrinsic ID.
8076 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op
, SelectionDAG
&DAG
,
8077 const SDLoc
&dl
, EVT DestVT
= MVT::Other
) {
8078 if (DestVT
== MVT::Other
) DestVT
= Op
.getValueType();
8079 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
8080 DAG
.getConstant(IID
, dl
, MVT::i32
), Op
);
8083 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8084 /// specified intrinsic ID.
8085 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue LHS
, SDValue RHS
,
8086 SelectionDAG
&DAG
, const SDLoc
&dl
,
8087 EVT DestVT
= MVT::Other
) {
8088 if (DestVT
== MVT::Other
) DestVT
= LHS
.getValueType();
8089 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
8090 DAG
.getConstant(IID
, dl
, MVT::i32
), LHS
, RHS
);
8093 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8094 /// specified intrinsic ID.
8095 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op0
, SDValue Op1
,
8096 SDValue Op2
, SelectionDAG
&DAG
, const SDLoc
&dl
,
8097 EVT DestVT
= MVT::Other
) {
8098 if (DestVT
== MVT::Other
) DestVT
= Op0
.getValueType();
8099 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
8100 DAG
.getConstant(IID
, dl
, MVT::i32
), Op0
, Op1
, Op2
);
8103 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8104 /// amount. The result has the specified value type.
8105 static SDValue
BuildVSLDOI(SDValue LHS
, SDValue RHS
, unsigned Amt
, EVT VT
,
8106 SelectionDAG
&DAG
, const SDLoc
&dl
) {
8107 // Force LHS/RHS to be the right type.
8108 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, LHS
);
8109 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, RHS
);
8112 for (unsigned i
= 0; i
!= 16; ++i
)
8114 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, LHS
, RHS
, Ops
);
8115 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
8118 /// Do we have an efficient pattern in a .td file for this node?
8120 /// \param V - pointer to the BuildVectorSDNode being matched
8121 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8123 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8124 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8125 /// the opposite is true (expansion is beneficial) are:
8126 /// - The node builds a vector out of integers that are not 32 or 64-bits
8127 /// - The node builds a vector out of constants
8128 /// - The node is a "load-and-splat"
8129 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8130 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode
*V
,
8133 EVT VecVT
= V
->getValueType(0);
8134 bool RightType
= VecVT
== MVT::v2f64
||
8135 (HasP8Vector
&& VecVT
== MVT::v4f32
) ||
8136 (HasDirectMove
&& (VecVT
== MVT::v2i64
|| VecVT
== MVT::v4i32
));
8140 bool IsSplat
= true;
8141 bool IsLoad
= false;
8142 SDValue Op0
= V
->getOperand(0);
8144 // This function is called in a block that confirms the node is not a constant
8145 // splat. So a constant BUILD_VECTOR here means the vector is built out of
8146 // different constants.
8147 if (V
->isConstant())
8149 for (int i
= 0, e
= V
->getNumOperands(); i
< e
; ++i
) {
8150 if (V
->getOperand(i
).isUndef())
8152 // We want to expand nodes that represent load-and-splat even if the
8153 // loaded value is a floating point truncation or conversion to int.
8154 if (V
->getOperand(i
).getOpcode() == ISD::LOAD
||
8155 (V
->getOperand(i
).getOpcode() == ISD::FP_ROUND
&&
8156 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
8157 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_SINT
&&
8158 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
8159 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_UINT
&&
8160 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
))
8162 // If the operands are different or the input is not a load and has more
8163 // uses than just this BV node, then it isn't a splat.
8164 if (V
->getOperand(i
) != Op0
||
8165 (!IsLoad
&& !V
->isOnlyUserOf(V
->getOperand(i
).getNode())))
8168 return !(IsSplat
&& IsLoad
);
8171 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8172 SDValue
PPCTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const {
8175 SDValue Op0
= Op
->getOperand(0);
8177 if (!EnableQuadPrecision
||
8178 (Op
.getValueType() != MVT::f128
) ||
8179 (Op0
.getOpcode() != ISD::BUILD_PAIR
) ||
8180 (Op0
.getOperand(0).getValueType() != MVT::i64
) ||
8181 (Op0
.getOperand(1).getValueType() != MVT::i64
))
8184 return DAG
.getNode(PPCISD::BUILD_FP128
, dl
, MVT::f128
, Op0
.getOperand(0),
8188 // If this is a case we can't handle, return null and let the default
8189 // expansion code take care of it. If we CAN select this case, and if it
8190 // selects to a single instruction, return Op. Otherwise, if we can codegen
8191 // this case more efficiently than a constant pool load, lower it to the
8192 // sequence of ops that should be used.
8193 SDValue
PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op
,
8194 SelectionDAG
&DAG
) const {
8196 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
8197 assert(BVN
&& "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8199 if (Subtarget
.hasQPX() && Op
.getValueType() == MVT::v4i1
) {
8200 // We first build an i32 vector, load it into a QPX register,
8201 // then convert it to a floating-point vector and compare it
8202 // to a zero vector to get the boolean result.
8203 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
8204 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
8205 MachinePointerInfo PtrInfo
=
8206 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
8207 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
8208 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
8210 assert(BVN
->getNumOperands() == 4 &&
8211 "BUILD_VECTOR for v4i1 does not have 4 operands");
8213 bool IsConst
= true;
8214 for (unsigned i
= 0; i
< 4; ++i
) {
8215 if (BVN
->getOperand(i
).isUndef()) continue;
8216 if (!isa
<ConstantSDNode
>(BVN
->getOperand(i
))) {
8224 ConstantFP::get(Type::getFloatTy(*DAG
.getContext()), 1.0);
8226 ConstantFP::get(Type::getFloatTy(*DAG
.getContext()), -1.0);
8229 for (unsigned i
= 0; i
< 4; ++i
) {
8230 if (BVN
->getOperand(i
).isUndef())
8231 CV
[i
] = UndefValue::get(Type::getFloatTy(*DAG
.getContext()));
8232 else if (isNullConstant(BVN
->getOperand(i
)))
8238 Constant
*CP
= ConstantVector::get(CV
);
8239 SDValue CPIdx
= DAG
.getConstantPool(CP
, getPointerTy(DAG
.getDataLayout()),
8240 16 /* alignment */);
8242 SDValue Ops
[] = {DAG
.getEntryNode(), CPIdx
};
8243 SDVTList VTs
= DAG
.getVTList({MVT::v4i1
, /*chain*/ MVT::Other
});
8244 return DAG
.getMemIntrinsicNode(
8245 PPCISD::QVLFSb
, dl
, VTs
, Ops
, MVT::v4f32
,
8246 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
8249 SmallVector
<SDValue
, 4> Stores
;
8250 for (unsigned i
= 0; i
< 4; ++i
) {
8251 if (BVN
->getOperand(i
).isUndef()) continue;
8253 unsigned Offset
= 4*i
;
8254 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
8255 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
8257 unsigned StoreSize
= BVN
->getOperand(i
).getValueType().getStoreSize();
8258 if (StoreSize
> 4) {
8260 DAG
.getTruncStore(DAG
.getEntryNode(), dl
, BVN
->getOperand(i
), Idx
,
8261 PtrInfo
.getWithOffset(Offset
), MVT::i32
));
8263 SDValue StoreValue
= BVN
->getOperand(i
);
8265 StoreValue
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, StoreValue
);
8267 Stores
.push_back(DAG
.getStore(DAG
.getEntryNode(), dl
, StoreValue
, Idx
,
8268 PtrInfo
.getWithOffset(Offset
)));
8273 if (!Stores
.empty())
8274 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
8276 StoreChain
= DAG
.getEntryNode();
8278 // Now load from v4i32 into the QPX register; this will extend it to
8279 // v4i64 but not yet convert it to a floating point. Nevertheless, this
8280 // is typed as v4f64 because the QPX register integer states are not
8281 // explicitly represented.
8283 SDValue Ops
[] = {StoreChain
,
8284 DAG
.getConstant(Intrinsic::ppc_qpx_qvlfiwz
, dl
, MVT::i32
),
8286 SDVTList VTs
= DAG
.getVTList({MVT::v4f64
, /*chain*/ MVT::Other
});
8288 SDValue LoadedVect
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
,
8289 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
8290 LoadedVect
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
8291 DAG
.getConstant(Intrinsic::ppc_qpx_qvfcfidu
, dl
, MVT::i32
),
8294 SDValue FPZeros
= DAG
.getConstantFP(0.0, dl
, MVT::v4f64
);
8296 return DAG
.getSetCC(dl
, MVT::v4i1
, LoadedVect
, FPZeros
, ISD::SETEQ
);
8299 // All other QPX vectors are handled by generic code.
8300 if (Subtarget
.hasQPX())
8303 // Check if this is a splat of a constant value.
8304 APInt APSplatBits
, APSplatUndef
;
8305 unsigned SplatBitSize
;
8307 if (! BVN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
8308 HasAnyUndefs
, 0, !Subtarget
.isLittleEndian()) ||
8309 SplatBitSize
> 32) {
8310 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8311 // lowered to VSX instructions under certain conditions.
8312 // Without VSX, there is no pattern more efficient than expanding the node.
8313 if (Subtarget
.hasVSX() &&
8314 haveEfficientBuildVectorPattern(BVN
, Subtarget
.hasDirectMove(),
8315 Subtarget
.hasP8Vector()))
8320 unsigned SplatBits
= APSplatBits
.getZExtValue();
8321 unsigned SplatUndef
= APSplatUndef
.getZExtValue();
8322 unsigned SplatSize
= SplatBitSize
/ 8;
8324 // First, handle single instruction cases.
8327 if (SplatBits
== 0) {
8328 // Canonicalize all zero vectors to be v4i32.
8329 if (Op
.getValueType() != MVT::v4i32
|| HasAnyUndefs
) {
8330 SDValue Z
= DAG
.getConstant(0, dl
, MVT::v4i32
);
8331 Op
= DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Z
);
8336 // We have XXSPLTIB for constant splats one byte wide
8337 if (Subtarget
.hasP9Vector() && SplatSize
== 1) {
8338 // This is a splat of 1-byte elements with some elements potentially undef.
8339 // Rather than trying to match undef in the SDAG patterns, ensure that all
8340 // elements are the same constant.
8341 if (HasAnyUndefs
|| ISD::isBuildVectorAllOnes(BVN
)) {
8342 SmallVector
<SDValue
, 16> Ops(16, DAG
.getConstant(SplatBits
,
8344 SDValue NewBV
= DAG
.getBuildVector(MVT::v16i8
, dl
, Ops
);
8345 if (Op
.getValueType() != MVT::v16i8
)
8346 return DAG
.getBitcast(Op
.getValueType(), NewBV
);
8350 // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8351 // detect that constant splats like v8i16: 0xABAB are really just splats
8352 // of a 1-byte constant. In this case, we need to convert the node to a
8353 // splat of v16i8 and a bitcast.
8354 if (Op
.getValueType() != MVT::v16i8
)
8355 return DAG
.getBitcast(Op
.getValueType(),
8356 DAG
.getConstant(SplatBits
, dl
, MVT::v16i8
));
8361 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8362 int32_t SextVal
= (int32_t(SplatBits
<< (32-SplatBitSize
)) >>
8364 if (SextVal
>= -16 && SextVal
<= 15)
8365 return BuildSplatI(SextVal
, SplatSize
, Op
.getValueType(), DAG
, dl
);
8367 // Two instruction sequences.
8369 // If this value is in the range [-32,30] and is even, use:
8370 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8371 // If this value is in the range [17,31] and is odd, use:
8372 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8373 // If this value is in the range [-31,-17] and is odd, use:
8374 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8375 // Note the last two are three-instruction sequences.
8376 if (SextVal
>= -32 && SextVal
<= 31) {
8377 // To avoid having these optimizations undone by constant folding,
8378 // we convert to a pseudo that will be expanded later into one of
8380 SDValue Elt
= DAG
.getConstant(SextVal
, dl
, MVT::i32
);
8381 EVT VT
= (SplatSize
== 1 ? MVT::v16i8
:
8382 (SplatSize
== 2 ? MVT::v8i16
: MVT::v4i32
));
8383 SDValue EltSize
= DAG
.getConstant(SplatSize
, dl
, MVT::i32
);
8384 SDValue RetVal
= DAG
.getNode(PPCISD::VADD_SPLAT
, dl
, VT
, Elt
, EltSize
);
8385 if (VT
== Op
.getValueType())
8388 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), RetVal
);
8391 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8392 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8394 if (SplatSize
== 4 && SplatBits
== (0x7FFFFFFF&~SplatUndef
)) {
8395 // Make -1 and vspltisw -1:
8396 SDValue OnesV
= BuildSplatI(-1, 4, MVT::v4i32
, DAG
, dl
);
8398 // Make the VSLW intrinsic, computing 0x8000_0000.
8399 SDValue Res
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, OnesV
,
8402 // xor by OnesV to invert it.
8403 Res
= DAG
.getNode(ISD::XOR
, dl
, MVT::v4i32
, Res
, OnesV
);
8404 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8407 // Check to see if this is a wide variety of vsplti*, binop self cases.
8408 static const signed char SplatCsts
[] = {
8409 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8410 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8413 for (unsigned idx
= 0; idx
< array_lengthof(SplatCsts
); ++idx
) {
8414 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8415 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8416 int i
= SplatCsts
[idx
];
8418 // Figure out what shift amount will be used by altivec if shifted by i in
8420 unsigned TypeShiftAmt
= i
& (SplatBitSize
-1);
8422 // vsplti + shl self.
8423 if (SextVal
== (int)((unsigned)i
<< TypeShiftAmt
)) {
8424 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8425 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8426 Intrinsic::ppc_altivec_vslb
, Intrinsic::ppc_altivec_vslh
, 0,
8427 Intrinsic::ppc_altivec_vslw
8429 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8430 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8433 // vsplti + srl self.
8434 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
8435 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8436 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8437 Intrinsic::ppc_altivec_vsrb
, Intrinsic::ppc_altivec_vsrh
, 0,
8438 Intrinsic::ppc_altivec_vsrw
8440 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8441 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8444 // vsplti + sra self.
8445 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
8446 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8447 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8448 Intrinsic::ppc_altivec_vsrab
, Intrinsic::ppc_altivec_vsrah
, 0,
8449 Intrinsic::ppc_altivec_vsraw
8451 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8452 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8455 // vsplti + rol self.
8456 if (SextVal
== (int)(((unsigned)i
<< TypeShiftAmt
) |
8457 ((unsigned)i
>> (SplatBitSize
-TypeShiftAmt
)))) {
8458 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8459 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8460 Intrinsic::ppc_altivec_vrlb
, Intrinsic::ppc_altivec_vrlh
, 0,
8461 Intrinsic::ppc_altivec_vrlw
8463 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8464 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8467 // t = vsplti c, result = vsldoi t, t, 1
8468 if (SextVal
== (int)(((unsigned)i
<< 8) | (i
< 0 ? 0xFF : 0))) {
8469 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8470 unsigned Amt
= Subtarget
.isLittleEndian() ? 15 : 1;
8471 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8473 // t = vsplti c, result = vsldoi t, t, 2
8474 if (SextVal
== (int)(((unsigned)i
<< 16) | (i
< 0 ? 0xFFFF : 0))) {
8475 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8476 unsigned Amt
= Subtarget
.isLittleEndian() ? 14 : 2;
8477 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8479 // t = vsplti c, result = vsldoi t, t, 3
8480 if (SextVal
== (int)(((unsigned)i
<< 24) | (i
< 0 ? 0xFFFFFF : 0))) {
8481 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8482 unsigned Amt
= Subtarget
.isLittleEndian() ? 13 : 3;
8483 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8490 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8491 /// the specified operations to build the shuffle.
8492 static SDValue
GeneratePerfectShuffle(unsigned PFEntry
, SDValue LHS
,
8493 SDValue RHS
, SelectionDAG
&DAG
,
8495 unsigned OpNum
= (PFEntry
>> 26) & 0x0F;
8496 unsigned LHSID
= (PFEntry
>> 13) & ((1 << 13)-1);
8497 unsigned RHSID
= (PFEntry
>> 0) & ((1 << 13)-1);
8500 OP_COPY
= 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8512 if (OpNum
== OP_COPY
) {
8513 if (LHSID
== (1*9+2)*9+3) return LHS
;
8514 assert(LHSID
== ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8518 SDValue OpLHS
, OpRHS
;
8519 OpLHS
= GeneratePerfectShuffle(PerfectShuffleTable
[LHSID
], LHS
, RHS
, DAG
, dl
);
8520 OpRHS
= GeneratePerfectShuffle(PerfectShuffleTable
[RHSID
], LHS
, RHS
, DAG
, dl
);
8524 default: llvm_unreachable("Unknown i32 permute!");
8526 ShufIdxs
[ 0] = 0; ShufIdxs
[ 1] = 1; ShufIdxs
[ 2] = 2; ShufIdxs
[ 3] = 3;
8527 ShufIdxs
[ 4] = 16; ShufIdxs
[ 5] = 17; ShufIdxs
[ 6] = 18; ShufIdxs
[ 7] = 19;
8528 ShufIdxs
[ 8] = 4; ShufIdxs
[ 9] = 5; ShufIdxs
[10] = 6; ShufIdxs
[11] = 7;
8529 ShufIdxs
[12] = 20; ShufIdxs
[13] = 21; ShufIdxs
[14] = 22; ShufIdxs
[15] = 23;
8532 ShufIdxs
[ 0] = 8; ShufIdxs
[ 1] = 9; ShufIdxs
[ 2] = 10; ShufIdxs
[ 3] = 11;
8533 ShufIdxs
[ 4] = 24; ShufIdxs
[ 5] = 25; ShufIdxs
[ 6] = 26; ShufIdxs
[ 7] = 27;
8534 ShufIdxs
[ 8] = 12; ShufIdxs
[ 9] = 13; ShufIdxs
[10] = 14; ShufIdxs
[11] = 15;
8535 ShufIdxs
[12] = 28; ShufIdxs
[13] = 29; ShufIdxs
[14] = 30; ShufIdxs
[15] = 31;
8538 for (unsigned i
= 0; i
!= 16; ++i
)
8539 ShufIdxs
[i
] = (i
&3)+0;
8542 for (unsigned i
= 0; i
!= 16; ++i
)
8543 ShufIdxs
[i
] = (i
&3)+4;
8546 for (unsigned i
= 0; i
!= 16; ++i
)
8547 ShufIdxs
[i
] = (i
&3)+8;
8550 for (unsigned i
= 0; i
!= 16; ++i
)
8551 ShufIdxs
[i
] = (i
&3)+12;
8554 return BuildVSLDOI(OpLHS
, OpRHS
, 4, OpLHS
.getValueType(), DAG
, dl
);
8556 return BuildVSLDOI(OpLHS
, OpRHS
, 8, OpLHS
.getValueType(), DAG
, dl
);
8558 return BuildVSLDOI(OpLHS
, OpRHS
, 12, OpLHS
.getValueType(), DAG
, dl
);
8560 EVT VT
= OpLHS
.getValueType();
8561 OpLHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpLHS
);
8562 OpRHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpRHS
);
8563 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, OpLHS
, OpRHS
, ShufIdxs
);
8564 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
8567 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8568 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8570 SDValue
PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode
*N
,
8571 SelectionDAG
&DAG
) const {
8572 const unsigned BytesInVector
= 16;
8573 bool IsLE
= Subtarget
.isLittleEndian();
8575 SDValue V1
= N
->getOperand(0);
8576 SDValue V2
= N
->getOperand(1);
8577 unsigned ShiftElts
= 0, InsertAtByte
= 0;
8580 // Shifts required to get the byte we want at element 7.
8581 unsigned LittleEndianShifts
[] = {8, 7, 6, 5, 4, 3, 2, 1,
8582 0, 15, 14, 13, 12, 11, 10, 9};
8583 unsigned BigEndianShifts
[] = {9, 10, 11, 12, 13, 14, 15, 0,
8584 1, 2, 3, 4, 5, 6, 7, 8};
8586 ArrayRef
<int> Mask
= N
->getMask();
8587 int OriginalOrder
[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8589 // For each mask element, find out if we're just inserting something
8590 // from V2 into V1 or vice versa.
8591 // Possible permutations inserting an element from V2 into V1:
8592 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8593 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8595 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8596 // Inserting from V1 into V2 will be similar, except mask range will be
8599 bool FoundCandidate
= false;
8600 // If both vector operands for the shuffle are the same vector, the mask
8601 // will contain only elements from the first one and the second one will be
8603 unsigned VINSERTBSrcElem
= IsLE
? 8 : 7;
8604 // Go through the mask of half-words to find an element that's being moved
8605 // from one vector to the other.
8606 for (unsigned i
= 0; i
< BytesInVector
; ++i
) {
8607 unsigned CurrentElement
= Mask
[i
];
8608 // If 2nd operand is undefined, we should only look for element 7 in the
8610 if (V2
.isUndef() && CurrentElement
!= VINSERTBSrcElem
)
8613 bool OtherElementsInOrder
= true;
8614 // Examine the other elements in the Mask to see if they're in original
8616 for (unsigned j
= 0; j
< BytesInVector
; ++j
) {
8619 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8620 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
8621 // in which we always assume we're always picking from the 1st operand.
8623 (!V2
.isUndef() && CurrentElement
< BytesInVector
) ? BytesInVector
: 0;
8624 if (Mask
[j
] != OriginalOrder
[j
] + MaskOffset
) {
8625 OtherElementsInOrder
= false;
8629 // If other elements are in original order, we record the number of shifts
8630 // we need to get the element we want into element 7. Also record which byte
8631 // in the vector we should insert into.
8632 if (OtherElementsInOrder
) {
8633 // If 2nd operand is undefined, we assume no shifts and no swapping.
8638 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8639 ShiftElts
= IsLE
? LittleEndianShifts
[CurrentElement
& 0xF]
8640 : BigEndianShifts
[CurrentElement
& 0xF];
8641 Swap
= CurrentElement
< BytesInVector
;
8643 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) : i
;
8644 FoundCandidate
= true;
8649 if (!FoundCandidate
)
8652 // Candidate found, construct the proper SDAG sequence with VINSERTB,
8653 // optionally with VECSHL if shift is required.
8659 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
8660 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8661 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, Shl
,
8662 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8664 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, V2
,
8665 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8668 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8669 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8671 SDValue
PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode
*N
,
8672 SelectionDAG
&DAG
) const {
8673 const unsigned NumHalfWords
= 8;
8674 const unsigned BytesInVector
= NumHalfWords
* 2;
8675 // Check that the shuffle is on half-words.
8676 if (!isNByteElemShuffleMask(N
, 2, 1))
8679 bool IsLE
= Subtarget
.isLittleEndian();
8681 SDValue V1
= N
->getOperand(0);
8682 SDValue V2
= N
->getOperand(1);
8683 unsigned ShiftElts
= 0, InsertAtByte
= 0;
8686 // Shifts required to get the half-word we want at element 3.
8687 unsigned LittleEndianShifts
[] = {4, 3, 2, 1, 0, 7, 6, 5};
8688 unsigned BigEndianShifts
[] = {5, 6, 7, 0, 1, 2, 3, 4};
8691 uint32_t OriginalOrderLow
= 0x1234567;
8692 uint32_t OriginalOrderHigh
= 0x89ABCDEF;
8693 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
8694 // 32-bit space, only need 4-bit nibbles per element.
8695 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
8696 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
8697 Mask
|= ((uint32_t)(N
->getMaskElt(i
* 2) / 2) << MaskShift
);
8700 // For each mask element, find out if we're just inserting something
8701 // from V2 into V1 or vice versa. Possible permutations inserting an element
8703 // X, 1, 2, 3, 4, 5, 6, 7
8704 // 0, X, 2, 3, 4, 5, 6, 7
8705 // 0, 1, X, 3, 4, 5, 6, 7
8706 // 0, 1, 2, X, 4, 5, 6, 7
8707 // 0, 1, 2, 3, X, 5, 6, 7
8708 // 0, 1, 2, 3, 4, X, 6, 7
8709 // 0, 1, 2, 3, 4, 5, X, 7
8710 // 0, 1, 2, 3, 4, 5, 6, X
8711 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8713 bool FoundCandidate
= false;
8714 // Go through the mask of half-words to find an element that's being moved
8715 // from one vector to the other.
8716 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
8717 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
8718 uint32_t MaskOneElt
= (Mask
>> MaskShift
) & 0xF;
8719 uint32_t MaskOtherElts
= ~(0xF << MaskShift
);
8720 uint32_t TargetOrder
= 0x0;
8722 // If both vector operands for the shuffle are the same vector, the mask
8723 // will contain only elements from the first one and the second one will be
8727 unsigned VINSERTHSrcElem
= IsLE
? 4 : 3;
8728 TargetOrder
= OriginalOrderLow
;
8730 // Skip if not the correct element or mask of other elements don't equal
8731 // to our expected order.
8732 if (MaskOneElt
== VINSERTHSrcElem
&&
8733 (Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
8734 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
8735 FoundCandidate
= true;
8738 } else { // If both operands are defined.
8739 // Target order is [8,15] if the current mask is between [0,7].
8741 (MaskOneElt
< NumHalfWords
) ? OriginalOrderHigh
: OriginalOrderLow
;
8742 // Skip if mask of other elements don't equal our expected order.
8743 if ((Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
8744 // We only need the last 3 bits for the number of shifts.
8745 ShiftElts
= IsLE
? LittleEndianShifts
[MaskOneElt
& 0x7]
8746 : BigEndianShifts
[MaskOneElt
& 0x7];
8747 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
8748 Swap
= MaskOneElt
< NumHalfWords
;
8749 FoundCandidate
= true;
8755 if (!FoundCandidate
)
8758 // Candidate found, construct the proper SDAG sequence with VINSERTH,
8759 // optionally with VECSHL if shift is required.
8764 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
8766 // Double ShiftElts because we're left shifting on v16i8 type.
8767 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
8768 DAG
.getConstant(2 * ShiftElts
, dl
, MVT::i32
));
8769 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, Shl
);
8770 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
8771 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8772 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8774 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V2
);
8775 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
8776 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8777 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8780 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
8781 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
8782 /// return the code it can be lowered into. Worst case, it can always be
8783 /// lowered into a vperm.
8784 SDValue
PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
,
8785 SelectionDAG
&DAG
) const {
8787 SDValue V1
= Op
.getOperand(0);
8788 SDValue V2
= Op
.getOperand(1);
8789 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(Op
);
8790 EVT VT
= Op
.getValueType();
8791 bool isLittleEndian
= Subtarget
.isLittleEndian();
8793 unsigned ShiftElts
, InsertAtByte
;
8795 if (Subtarget
.hasP9Vector() &&
8796 PPC::isXXINSERTWMask(SVOp
, ShiftElts
, InsertAtByte
, Swap
,
8800 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8801 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
);
8803 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv2
, Conv2
,
8804 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8805 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Shl
,
8806 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8807 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8809 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Conv2
,
8810 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8811 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8814 if (Subtarget
.hasP9Altivec()) {
8816 if ((NewISDNode
= lowerToVINSERTH(SVOp
, DAG
)))
8819 if ((NewISDNode
= lowerToVINSERTB(SVOp
, DAG
)))
8823 if (Subtarget
.hasVSX() &&
8824 PPC::isXXSLDWIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
8827 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8829 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
.isUndef() ? V1
: V2
);
8831 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv1
, Conv2
,
8832 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8833 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Shl
);
8836 if (Subtarget
.hasVSX() &&
8837 PPC::isXXPERMDIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
8840 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
8842 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V2
.isUndef() ? V1
: V2
);
8844 SDValue PermDI
= DAG
.getNode(PPCISD::XXPERMDI
, dl
, MVT::v2i64
, Conv1
, Conv2
,
8845 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8846 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, PermDI
);
8849 if (Subtarget
.hasP9Vector()) {
8850 if (PPC::isXXBRHShuffleMask(SVOp
)) {
8851 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
8852 SDValue ReveHWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v8i16
, Conv
);
8853 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveHWord
);
8854 } else if (PPC::isXXBRWShuffleMask(SVOp
)) {
8855 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8856 SDValue ReveWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v4i32
, Conv
);
8857 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveWord
);
8858 } else if (PPC::isXXBRDShuffleMask(SVOp
)) {
8859 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
8860 SDValue ReveDWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v2i64
, Conv
);
8861 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveDWord
);
8862 } else if (PPC::isXXBRQShuffleMask(SVOp
)) {
8863 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v1i128
, V1
);
8864 SDValue ReveQWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v1i128
, Conv
);
8865 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveQWord
);
8869 if (Subtarget
.hasVSX()) {
8870 if (V2
.isUndef() && PPC::isSplatShuffleMask(SVOp
, 4)) {
8871 int SplatIdx
= PPC::getVSPLTImmediate(SVOp
, 4, DAG
);
8873 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8874 SDValue Splat
= DAG
.getNode(PPCISD::XXSPLT
, dl
, MVT::v4i32
, Conv
,
8875 DAG
.getConstant(SplatIdx
, dl
, MVT::i32
));
8876 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Splat
);
8879 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8880 if (V2
.isUndef() && PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) == 8) {
8881 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, V1
);
8882 SDValue Swap
= DAG
.getNode(PPCISD::SWAP_NO_CHAIN
, dl
, MVT::v2f64
, Conv
);
8883 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Swap
);
8887 if (Subtarget
.hasQPX()) {
8888 if (VT
.getVectorNumElements() != 4)
8891 if (V2
.isUndef()) V2
= V1
;
8893 int AlignIdx
= PPC::isQVALIGNIShuffleMask(SVOp
);
8894 if (AlignIdx
!= -1) {
8895 return DAG
.getNode(PPCISD::QVALIGNI
, dl
, VT
, V1
, V2
,
8896 DAG
.getConstant(AlignIdx
, dl
, MVT::i32
));
8897 } else if (SVOp
->isSplat()) {
8898 int SplatIdx
= SVOp
->getSplatIndex();
8899 if (SplatIdx
>= 4) {
8904 return DAG
.getNode(PPCISD::QVESPLATI
, dl
, VT
, V1
,
8905 DAG
.getConstant(SplatIdx
, dl
, MVT::i32
));
8908 // Lower this into a qvgpci/qvfperm pair.
8910 // Compute the qvgpci literal
8912 for (unsigned i
= 0; i
< 4; ++i
) {
8913 int m
= SVOp
->getMaskElt(i
);
8914 unsigned mm
= m
>= 0 ? (unsigned) m
: i
;
8915 idx
|= mm
<< (3-i
)*3;
8918 SDValue V3
= DAG
.getNode(PPCISD::QVGPCI
, dl
, MVT::v4f64
,
8919 DAG
.getConstant(idx
, dl
, MVT::i32
));
8920 return DAG
.getNode(PPCISD::QVFPERM
, dl
, VT
, V1
, V2
, V3
);
8923 // Cases that are handled by instructions that take permute immediates
8924 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8925 // selected by the instruction selector.
8927 if (PPC::isSplatShuffleMask(SVOp
, 1) ||
8928 PPC::isSplatShuffleMask(SVOp
, 2) ||
8929 PPC::isSplatShuffleMask(SVOp
, 4) ||
8930 PPC::isVPKUWUMShuffleMask(SVOp
, 1, DAG
) ||
8931 PPC::isVPKUHUMShuffleMask(SVOp
, 1, DAG
) ||
8932 PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) != -1 ||
8933 PPC::isVMRGLShuffleMask(SVOp
, 1, 1, DAG
) ||
8934 PPC::isVMRGLShuffleMask(SVOp
, 2, 1, DAG
) ||
8935 PPC::isVMRGLShuffleMask(SVOp
, 4, 1, DAG
) ||
8936 PPC::isVMRGHShuffleMask(SVOp
, 1, 1, DAG
) ||
8937 PPC::isVMRGHShuffleMask(SVOp
, 2, 1, DAG
) ||
8938 PPC::isVMRGHShuffleMask(SVOp
, 4, 1, DAG
) ||
8939 (Subtarget
.hasP8Altivec() && (
8940 PPC::isVPKUDUMShuffleMask(SVOp
, 1, DAG
) ||
8941 PPC::isVMRGEOShuffleMask(SVOp
, true, 1, DAG
) ||
8942 PPC::isVMRGEOShuffleMask(SVOp
, false, 1, DAG
)))) {
8947 // Altivec has a variety of "shuffle immediates" that take two vector inputs
8948 // and produce a fixed permutation. If any of these match, do not lower to
8950 unsigned int ShuffleKind
= isLittleEndian
? 2 : 0;
8951 if (PPC::isVPKUWUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
8952 PPC::isVPKUHUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
8953 PPC::isVSLDOIShuffleMask(SVOp
, ShuffleKind
, DAG
) != -1 ||
8954 PPC::isVMRGLShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
8955 PPC::isVMRGLShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
8956 PPC::isVMRGLShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
8957 PPC::isVMRGHShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
8958 PPC::isVMRGHShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
8959 PPC::isVMRGHShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
8960 (Subtarget
.hasP8Altivec() && (
8961 PPC::isVPKUDUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
8962 PPC::isVMRGEOShuffleMask(SVOp
, true, ShuffleKind
, DAG
) ||
8963 PPC::isVMRGEOShuffleMask(SVOp
, false, ShuffleKind
, DAG
))))
8966 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
8967 // perfect shuffle table to emit an optimal matching sequence.
8968 ArrayRef
<int> PermMask
= SVOp
->getMask();
8970 unsigned PFIndexes
[4];
8971 bool isFourElementShuffle
= true;
8972 for (unsigned i
= 0; i
!= 4 && isFourElementShuffle
; ++i
) { // Element number
8973 unsigned EltNo
= 8; // Start out undef.
8974 for (unsigned j
= 0; j
!= 4; ++j
) { // Intra-element byte.
8975 if (PermMask
[i
*4+j
] < 0)
8976 continue; // Undef, ignore it.
8978 unsigned ByteSource
= PermMask
[i
*4+j
];
8979 if ((ByteSource
& 3) != j
) {
8980 isFourElementShuffle
= false;
8985 EltNo
= ByteSource
/4;
8986 } else if (EltNo
!= ByteSource
/4) {
8987 isFourElementShuffle
= false;
8991 PFIndexes
[i
] = EltNo
;
8994 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8995 // perfect shuffle vector to determine if it is cost effective to do this as
8996 // discrete instructions, or whether we should use a vperm.
8997 // For now, we skip this for little endian until such time as we have a
8998 // little-endian perfect shuffle table.
8999 if (isFourElementShuffle
&& !isLittleEndian
) {
9000 // Compute the index in the perfect shuffle table.
9001 unsigned PFTableIndex
=
9002 PFIndexes
[0]*9*9*9+PFIndexes
[1]*9*9+PFIndexes
[2]*9+PFIndexes
[3];
9004 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
9005 unsigned Cost
= (PFEntry
>> 30);
9007 // Determining when to avoid vperm is tricky. Many things affect the cost
9008 // of vperm, particularly how many times the perm mask needs to be computed.
9009 // For example, if the perm mask can be hoisted out of a loop or is already
9010 // used (perhaps because there are multiple permutes with the same shuffle
9011 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
9012 // the loop requires an extra register.
9014 // As a compromise, we only emit discrete instructions if the shuffle can be
9015 // generated in 3 or fewer operations. When we have loop information
9016 // available, if this block is within a loop, we should avoid using vperm
9017 // for 3-operation perms and use a constant pool load instead.
9019 return GeneratePerfectShuffle(PFEntry
, V1
, V2
, DAG
, dl
);
9022 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9023 // vector that will get spilled to the constant pool.
9024 if (V2
.isUndef()) V2
= V1
;
9026 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9027 // that it is in input element units, not in bytes. Convert now.
9029 // For little endian, the order of the input vectors is reversed, and
9030 // the permutation mask is complemented with respect to 31. This is
9031 // necessary to produce proper semantics with the big-endian-biased vperm
9033 EVT EltVT
= V1
.getValueType().getVectorElementType();
9034 unsigned BytesPerElement
= EltVT
.getSizeInBits()/8;
9036 SmallVector
<SDValue
, 16> ResultMask
;
9037 for (unsigned i
= 0, e
= VT
.getVectorNumElements(); i
!= e
; ++i
) {
9038 unsigned SrcElt
= PermMask
[i
] < 0 ? 0 : PermMask
[i
];
9040 for (unsigned j
= 0; j
!= BytesPerElement
; ++j
)
9042 ResultMask
.push_back(DAG
.getConstant(31 - (SrcElt
*BytesPerElement
+ j
),
9045 ResultMask
.push_back(DAG
.getConstant(SrcElt
*BytesPerElement
+ j
, dl
,
9049 SDValue VPermMask
= DAG
.getBuildVector(MVT::v16i8
, dl
, ResultMask
);
9051 return DAG
.getNode(PPCISD::VPERM
, dl
, V1
.getValueType(),
9054 return DAG
.getNode(PPCISD::VPERM
, dl
, V1
.getValueType(),
9058 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9059 /// vector comparison. If it is, return true and fill in Opc/isDot with
9060 /// information about the intrinsic.
9061 static bool getVectorCompareInfo(SDValue Intrin
, int &CompareOpc
,
9062 bool &isDot
, const PPCSubtarget
&Subtarget
) {
9063 unsigned IntrinsicID
=
9064 cast
<ConstantSDNode
>(Intrin
.getOperand(0))->getZExtValue();
9067 switch (IntrinsicID
) {
9070 // Comparison predicates.
9071 case Intrinsic::ppc_altivec_vcmpbfp_p
:
9075 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
9079 case Intrinsic::ppc_altivec_vcmpequb_p
:
9083 case Intrinsic::ppc_altivec_vcmpequh_p
:
9087 case Intrinsic::ppc_altivec_vcmpequw_p
:
9091 case Intrinsic::ppc_altivec_vcmpequd_p
:
9092 if (Subtarget
.hasP8Altivec()) {
9098 case Intrinsic::ppc_altivec_vcmpneb_p
:
9099 case Intrinsic::ppc_altivec_vcmpneh_p
:
9100 case Intrinsic::ppc_altivec_vcmpnew_p
:
9101 case Intrinsic::ppc_altivec_vcmpnezb_p
:
9102 case Intrinsic::ppc_altivec_vcmpnezh_p
:
9103 case Intrinsic::ppc_altivec_vcmpnezw_p
:
9104 if (Subtarget
.hasP9Altivec()) {
9105 switch (IntrinsicID
) {
9107 llvm_unreachable("Unknown comparison intrinsic.");
9108 case Intrinsic::ppc_altivec_vcmpneb_p
:
9111 case Intrinsic::ppc_altivec_vcmpneh_p
:
9114 case Intrinsic::ppc_altivec_vcmpnew_p
:
9117 case Intrinsic::ppc_altivec_vcmpnezb_p
:
9120 case Intrinsic::ppc_altivec_vcmpnezh_p
:
9123 case Intrinsic::ppc_altivec_vcmpnezw_p
:
9131 case Intrinsic::ppc_altivec_vcmpgefp_p
:
9135 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
9139 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
9143 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
9147 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
9151 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
9152 if (Subtarget
.hasP8Altivec()) {
9158 case Intrinsic::ppc_altivec_vcmpgtub_p
:
9162 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
9166 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
9170 case Intrinsic::ppc_altivec_vcmpgtud_p
:
9171 if (Subtarget
.hasP8Altivec()) {
9178 // VSX predicate comparisons use the same infrastructure
9179 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
9180 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
9181 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
9182 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
9183 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
9184 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
9185 if (Subtarget
.hasVSX()) {
9186 switch (IntrinsicID
) {
9187 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
9190 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
9193 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
9196 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
9199 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
9202 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
9211 // Normal Comparisons.
9212 case Intrinsic::ppc_altivec_vcmpbfp
:
9215 case Intrinsic::ppc_altivec_vcmpeqfp
:
9218 case Intrinsic::ppc_altivec_vcmpequb
:
9221 case Intrinsic::ppc_altivec_vcmpequh
:
9224 case Intrinsic::ppc_altivec_vcmpequw
:
9227 case Intrinsic::ppc_altivec_vcmpequd
:
9228 if (Subtarget
.hasP8Altivec())
9233 case Intrinsic::ppc_altivec_vcmpneb
:
9234 case Intrinsic::ppc_altivec_vcmpneh
:
9235 case Intrinsic::ppc_altivec_vcmpnew
:
9236 case Intrinsic::ppc_altivec_vcmpnezb
:
9237 case Intrinsic::ppc_altivec_vcmpnezh
:
9238 case Intrinsic::ppc_altivec_vcmpnezw
:
9239 if (Subtarget
.hasP9Altivec())
9240 switch (IntrinsicID
) {
9242 llvm_unreachable("Unknown comparison intrinsic.");
9243 case Intrinsic::ppc_altivec_vcmpneb
:
9246 case Intrinsic::ppc_altivec_vcmpneh
:
9249 case Intrinsic::ppc_altivec_vcmpnew
:
9252 case Intrinsic::ppc_altivec_vcmpnezb
:
9255 case Intrinsic::ppc_altivec_vcmpnezh
:
9258 case Intrinsic::ppc_altivec_vcmpnezw
:
9265 case Intrinsic::ppc_altivec_vcmpgefp
:
9268 case Intrinsic::ppc_altivec_vcmpgtfp
:
9271 case Intrinsic::ppc_altivec_vcmpgtsb
:
9274 case Intrinsic::ppc_altivec_vcmpgtsh
:
9277 case Intrinsic::ppc_altivec_vcmpgtsw
:
9280 case Intrinsic::ppc_altivec_vcmpgtsd
:
9281 if (Subtarget
.hasP8Altivec())
9286 case Intrinsic::ppc_altivec_vcmpgtub
:
9289 case Intrinsic::ppc_altivec_vcmpgtuh
:
9292 case Intrinsic::ppc_altivec_vcmpgtuw
:
9295 case Intrinsic::ppc_altivec_vcmpgtud
:
9296 if (Subtarget
.hasP8Altivec())
9305 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9306 /// lower, do it, otherwise return null.
9307 SDValue
PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
9308 SelectionDAG
&DAG
) const {
9309 unsigned IntrinsicID
=
9310 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
9314 if (IntrinsicID
== Intrinsic::thread_pointer
) {
9315 // Reads the thread pointer register, used for __builtin_thread_pointer.
9316 if (Subtarget
.isPPC64())
9317 return DAG
.getRegister(PPC::X13
, MVT::i64
);
9318 return DAG
.getRegister(PPC::R2
, MVT::i32
);
9321 // If this is a lowered altivec predicate compare, CompareOpc is set to the
9322 // opcode number of the comparison.
9325 if (!getVectorCompareInfo(Op
, CompareOpc
, isDot
, Subtarget
))
9326 return SDValue(); // Don't custom lower most intrinsics.
9328 // If this is a non-dot comparison, make the VCMP node and we are done.
9330 SDValue Tmp
= DAG
.getNode(PPCISD::VCMP
, dl
, Op
.getOperand(2).getValueType(),
9331 Op
.getOperand(1), Op
.getOperand(2),
9332 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
));
9333 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Tmp
);
9336 // Create the PPCISD altivec 'dot' comparison node.
9338 Op
.getOperand(2), // LHS
9339 Op
.getOperand(3), // RHS
9340 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
9342 EVT VTs
[] = { Op
.getOperand(2).getValueType(), MVT::Glue
};
9343 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
);
9345 // Now that we have the comparison, emit a copy from the CR to a GPR.
9346 // This is flagged to the above dot comparison.
9347 SDValue Flags
= DAG
.getNode(PPCISD::MFOCRF
, dl
, MVT::i32
,
9348 DAG
.getRegister(PPC::CR6
, MVT::i32
),
9349 CompNode
.getValue(1));
9351 // Unpack the result based on how the target uses it.
9352 unsigned BitNo
; // Bit # of CR6.
9353 bool InvertBit
; // Invert result?
9354 switch (cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue()) {
9355 default: // Can't happen, don't crash on invalid number though.
9356 case 0: // Return the value of the EQ bit of CR6.
9357 BitNo
= 0; InvertBit
= false;
9359 case 1: // Return the inverted value of the EQ bit of CR6.
9360 BitNo
= 0; InvertBit
= true;
9362 case 2: // Return the value of the LT bit of CR6.
9363 BitNo
= 2; InvertBit
= false;
9365 case 3: // Return the inverted value of the LT bit of CR6.
9366 BitNo
= 2; InvertBit
= true;
9370 // Shift the bit into the low position.
9371 Flags
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
, Flags
,
9372 DAG
.getConstant(8 - (3 - BitNo
), dl
, MVT::i32
));
9374 Flags
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Flags
,
9375 DAG
.getConstant(1, dl
, MVT::i32
));
9377 // If we are supposed to, toggle the bit.
9379 Flags
= DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, Flags
,
9380 DAG
.getConstant(1, dl
, MVT::i32
));
9384 SDValue
PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
9385 SelectionDAG
&DAG
) const {
9386 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9387 // the beginning of the argument list.
9388 int ArgStart
= isa
<ConstantSDNode
>(Op
.getOperand(0)) ? 0 : 1;
9390 switch (cast
<ConstantSDNode
>(Op
.getOperand(ArgStart
))->getZExtValue()) {
9391 case Intrinsic::ppc_cfence
: {
9392 assert(ArgStart
== 1 && "llvm.ppc.cfence must carry a chain argument.");
9393 assert(Subtarget
.isPPC64() && "Only 64-bit is supported for now.");
9394 return SDValue(DAG
.getMachineNode(PPC::CFENCE8
, DL
, MVT::Other
,
9395 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
,
9396 Op
.getOperand(ArgStart
+ 1)),
9406 SDValue
PPCTargetLowering::LowerREM(SDValue Op
, SelectionDAG
&DAG
) const {
9407 // Check for a DIV with the same operands as this REM.
9408 for (auto UI
: Op
.getOperand(1)->uses()) {
9409 if ((Op
.getOpcode() == ISD::SREM
&& UI
->getOpcode() == ISD::SDIV
) ||
9410 (Op
.getOpcode() == ISD::UREM
&& UI
->getOpcode() == ISD::UDIV
))
9411 if (UI
->getOperand(0) == Op
.getOperand(0) &&
9412 UI
->getOperand(1) == Op
.getOperand(1))
9418 // Lower scalar BSWAP64 to xxbrd.
9419 SDValue
PPCTargetLowering::LowerBSWAP(SDValue Op
, SelectionDAG
&DAG
) const {
9422 Op
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
, Op
.getOperand(0),
9425 Op
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v2i64
, Op
);
9427 int VectorIndex
= 0;
9428 if (Subtarget
.isLittleEndian())
9430 Op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Op
,
9431 DAG
.getTargetConstant(VectorIndex
, dl
, MVT::i32
));
9435 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9436 // compared to a value that is atomically loaded (atomic loads zero-extend).
9437 SDValue
PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op
,
9438 SelectionDAG
&DAG
) const {
9439 assert(Op
.getOpcode() == ISD::ATOMIC_CMP_SWAP
&&
9440 "Expecting an atomic compare-and-swap here.");
9442 auto *AtomicNode
= cast
<AtomicSDNode
>(Op
.getNode());
9443 EVT MemVT
= AtomicNode
->getMemoryVT();
9444 if (MemVT
.getSizeInBits() >= 32)
9447 SDValue CmpOp
= Op
.getOperand(2);
9448 // If this is already correctly zero-extended, leave it alone.
9449 auto HighBits
= APInt::getHighBitsSet(32, 32 - MemVT
.getSizeInBits());
9450 if (DAG
.MaskedValueIsZero(CmpOp
, HighBits
))
9453 // Clear the high bits of the compare operand.
9454 unsigned MaskVal
= (1 << MemVT
.getSizeInBits()) - 1;
9456 DAG
.getNode(ISD::AND
, dl
, MVT::i32
, CmpOp
,
9457 DAG
.getConstant(MaskVal
, dl
, MVT::i32
));
9459 // Replace the existing compare operand with the properly zero-extended one.
9460 SmallVector
<SDValue
, 4> Ops
;
9461 for (int i
= 0, e
= AtomicNode
->getNumOperands(); i
< e
; i
++)
9462 Ops
.push_back(AtomicNode
->getOperand(i
));
9464 MachineMemOperand
*MMO
= AtomicNode
->getMemOperand();
9465 SDVTList Tys
= DAG
.getVTList(MVT::i32
, MVT::Other
);
9467 (MemVT
== MVT::i8
) ? PPCISD::ATOMIC_CMP_SWAP_8
: PPCISD::ATOMIC_CMP_SWAP_16
;
9468 return DAG
.getMemIntrinsicNode(NodeTy
, dl
, Tys
, Ops
, MemVT
, MMO
);
9471 SDValue
PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op
,
9472 SelectionDAG
&DAG
) const {
9474 // Create a stack slot that is 16-byte aligned.
9475 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9476 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9477 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9478 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9480 // Store the input value into Value#0 of the stack slot.
9481 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Op
.getOperand(0), FIdx
,
9482 MachinePointerInfo());
9484 return DAG
.getLoad(Op
.getValueType(), dl
, Store
, FIdx
, MachinePointerInfo());
9487 SDValue
PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
9488 SelectionDAG
&DAG
) const {
9489 assert(Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&&
9490 "Should only be called for ISD::INSERT_VECTOR_ELT");
9492 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
9493 // We have legal lowering for constant indices but not for variable ones.
9497 EVT VT
= Op
.getValueType();
9499 SDValue V1
= Op
.getOperand(0);
9500 SDValue V2
= Op
.getOperand(1);
9501 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9502 if (VT
== MVT::v8i16
|| VT
== MVT::v16i8
) {
9503 SDValue Mtvsrz
= DAG
.getNode(PPCISD::MTVSRZ
, dl
, VT
, V2
);
9504 unsigned BytesInEachElement
= VT
.getVectorElementType().getSizeInBits() / 8;
9505 unsigned InsertAtElement
= C
->getZExtValue();
9506 unsigned InsertAtByte
= InsertAtElement
* BytesInEachElement
;
9507 if (Subtarget
.isLittleEndian()) {
9508 InsertAtByte
= (16 - BytesInEachElement
) - InsertAtByte
;
9510 return DAG
.getNode(PPCISD::VECINSERT
, dl
, VT
, V1
, Mtvsrz
,
9511 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
9516 SDValue
PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
9517 SelectionDAG
&DAG
) const {
9519 SDNode
*N
= Op
.getNode();
9521 assert(N
->getOperand(0).getValueType() == MVT::v4i1
&&
9522 "Unknown extract_vector_elt type");
9524 SDValue Value
= N
->getOperand(0);
9526 // The first part of this is like the store lowering except that we don't
9527 // need to track the chain.
9529 // The values are now known to be -1 (false) or 1 (true). To convert this
9530 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9531 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9532 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
9534 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9535 // understand how to form the extending load.
9536 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
9538 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
9540 // Now convert to an integer and store.
9541 Value
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
9542 DAG
.getConstant(Intrinsic::ppc_qpx_qvfctiwu
, dl
, MVT::i32
),
9545 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9546 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9547 MachinePointerInfo PtrInfo
=
9548 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
9549 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9550 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9552 SDValue StoreChain
= DAG
.getEntryNode();
9553 SDValue Ops
[] = {StoreChain
,
9554 DAG
.getConstant(Intrinsic::ppc_qpx_qvstfiw
, dl
, MVT::i32
),
9556 SDVTList VTs
= DAG
.getVTList(/*chain*/ MVT::Other
);
9558 StoreChain
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
,
9559 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
9561 // Extract the value requested.
9562 unsigned Offset
= 4*cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
9563 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
9564 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
9567 DAG
.getLoad(MVT::i32
, dl
, StoreChain
, Idx
, PtrInfo
.getWithOffset(Offset
));
9569 if (!Subtarget
.useCRBits())
9572 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, IntVal
);
9575 /// Lowering for QPX v4i1 loads
9576 SDValue
PPCTargetLowering::LowerVectorLoad(SDValue Op
,
9577 SelectionDAG
&DAG
) const {
9579 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
9580 SDValue LoadChain
= LN
->getChain();
9581 SDValue BasePtr
= LN
->getBasePtr();
9583 if (Op
.getValueType() == MVT::v4f64
||
9584 Op
.getValueType() == MVT::v4f32
) {
9585 EVT MemVT
= LN
->getMemoryVT();
9586 unsigned Alignment
= LN
->getAlignment();
9588 // If this load is properly aligned, then it is legal.
9589 if (Alignment
>= MemVT
.getStoreSize())
9592 EVT ScalarVT
= Op
.getValueType().getScalarType(),
9593 ScalarMemVT
= MemVT
.getScalarType();
9594 unsigned Stride
= ScalarMemVT
.getStoreSize();
9596 SDValue Vals
[4], LoadChains
[4];
9597 for (unsigned Idx
= 0; Idx
< 4; ++Idx
) {
9599 if (ScalarVT
!= ScalarMemVT
)
9600 Load
= DAG
.getExtLoad(LN
->getExtensionType(), dl
, ScalarVT
, LoadChain
,
9602 LN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9603 ScalarMemVT
, MinAlign(Alignment
, Idx
* Stride
),
9604 LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9606 Load
= DAG
.getLoad(ScalarVT
, dl
, LoadChain
, BasePtr
,
9607 LN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9608 MinAlign(Alignment
, Idx
* Stride
),
9609 LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9611 if (Idx
== 0 && LN
->isIndexed()) {
9612 assert(LN
->getAddressingMode() == ISD::PRE_INC
&&
9613 "Unknown addressing mode on vector load");
9614 Load
= DAG
.getIndexedLoad(Load
, dl
, BasePtr
, LN
->getOffset(),
9615 LN
->getAddressingMode());
9619 LoadChains
[Idx
] = Load
.getValue(1);
9621 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
9622 DAG
.getConstant(Stride
, dl
,
9623 BasePtr
.getValueType()));
9626 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, LoadChains
);
9627 SDValue Value
= DAG
.getBuildVector(Op
.getValueType(), dl
, Vals
);
9629 if (LN
->isIndexed()) {
9630 SDValue RetOps
[] = { Value
, Vals
[0].getValue(1), TF
};
9631 return DAG
.getMergeValues(RetOps
, dl
);
9634 SDValue RetOps
[] = { Value
, TF
};
9635 return DAG
.getMergeValues(RetOps
, dl
);
9638 assert(Op
.getValueType() == MVT::v4i1
&& "Unknown load to lower");
9639 assert(LN
->isUnindexed() && "Indexed v4i1 loads are not supported");
9641 // To lower v4i1 from a byte array, we load the byte elements of the
9642 // vector and then reuse the BUILD_VECTOR logic.
9644 SDValue VectElmts
[4], VectElmtChains
[4];
9645 for (unsigned i
= 0; i
< 4; ++i
) {
9646 SDValue Idx
= DAG
.getConstant(i
, dl
, BasePtr
.getValueType());
9647 Idx
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
, Idx
);
9649 VectElmts
[i
] = DAG
.getExtLoad(
9650 ISD::EXTLOAD
, dl
, MVT::i32
, LoadChain
, Idx
,
9651 LN
->getPointerInfo().getWithOffset(i
), MVT::i8
,
9652 /* Alignment = */ 1, LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9653 VectElmtChains
[i
] = VectElmts
[i
].getValue(1);
9656 LoadChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, VectElmtChains
);
9657 SDValue Value
= DAG
.getBuildVector(MVT::v4i1
, dl
, VectElmts
);
9659 SDValue RVals
[] = { Value
, LoadChain
};
9660 return DAG
.getMergeValues(RVals
, dl
);
9663 /// Lowering for QPX v4i1 stores
9664 SDValue
PPCTargetLowering::LowerVectorStore(SDValue Op
,
9665 SelectionDAG
&DAG
) const {
9667 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
9668 SDValue StoreChain
= SN
->getChain();
9669 SDValue BasePtr
= SN
->getBasePtr();
9670 SDValue Value
= SN
->getValue();
9672 if (Value
.getValueType() == MVT::v4f64
||
9673 Value
.getValueType() == MVT::v4f32
) {
9674 EVT MemVT
= SN
->getMemoryVT();
9675 unsigned Alignment
= SN
->getAlignment();
9677 // If this store is properly aligned, then it is legal.
9678 if (Alignment
>= MemVT
.getStoreSize())
9681 EVT ScalarVT
= Value
.getValueType().getScalarType(),
9682 ScalarMemVT
= MemVT
.getScalarType();
9683 unsigned Stride
= ScalarMemVT
.getStoreSize();
9686 for (unsigned Idx
= 0; Idx
< 4; ++Idx
) {
9687 SDValue Ex
= DAG
.getNode(
9688 ISD::EXTRACT_VECTOR_ELT
, dl
, ScalarVT
, Value
,
9689 DAG
.getConstant(Idx
, dl
, getVectorIdxTy(DAG
.getDataLayout())));
9691 if (ScalarVT
!= ScalarMemVT
)
9693 DAG
.getTruncStore(StoreChain
, dl
, Ex
, BasePtr
,
9694 SN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9695 ScalarMemVT
, MinAlign(Alignment
, Idx
* Stride
),
9696 SN
->getMemOperand()->getFlags(), SN
->getAAInfo());
9698 Store
= DAG
.getStore(StoreChain
, dl
, Ex
, BasePtr
,
9699 SN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9700 MinAlign(Alignment
, Idx
* Stride
),
9701 SN
->getMemOperand()->getFlags(), SN
->getAAInfo());
9703 if (Idx
== 0 && SN
->isIndexed()) {
9704 assert(SN
->getAddressingMode() == ISD::PRE_INC
&&
9705 "Unknown addressing mode on vector store");
9706 Store
= DAG
.getIndexedStore(Store
, dl
, BasePtr
, SN
->getOffset(),
9707 SN
->getAddressingMode());
9710 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
9711 DAG
.getConstant(Stride
, dl
,
9712 BasePtr
.getValueType()));
9713 Stores
[Idx
] = Store
;
9716 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
9718 if (SN
->isIndexed()) {
9719 SDValue RetOps
[] = { TF
, Stores
[0].getValue(1) };
9720 return DAG
.getMergeValues(RetOps
, dl
);
9726 assert(SN
->isUnindexed() && "Indexed v4i1 stores are not supported");
9727 assert(Value
.getValueType() == MVT::v4i1
&& "Unknown store to lower");
9729 // The values are now known to be -1 (false) or 1 (true). To convert this
9730 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9731 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9732 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
9734 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9735 // understand how to form the extending load.
9736 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
9738 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
9740 // Now convert to an integer and store.
9741 Value
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
9742 DAG
.getConstant(Intrinsic::ppc_qpx_qvfctiwu
, dl
, MVT::i32
),
9745 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9746 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9747 MachinePointerInfo PtrInfo
=
9748 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
9749 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9750 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9752 SDValue Ops
[] = {StoreChain
,
9753 DAG
.getConstant(Intrinsic::ppc_qpx_qvstfiw
, dl
, MVT::i32
),
9755 SDVTList VTs
= DAG
.getVTList(/*chain*/ MVT::Other
);
9757 StoreChain
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
,
9758 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
9760 // Move data into the byte array.
9761 SDValue Loads
[4], LoadChains
[4];
9762 for (unsigned i
= 0; i
< 4; ++i
) {
9763 unsigned Offset
= 4*i
;
9764 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
9765 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
9767 Loads
[i
] = DAG
.getLoad(MVT::i32
, dl
, StoreChain
, Idx
,
9768 PtrInfo
.getWithOffset(Offset
));
9769 LoadChains
[i
] = Loads
[i
].getValue(1);
9772 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, LoadChains
);
9775 for (unsigned i
= 0; i
< 4; ++i
) {
9776 SDValue Idx
= DAG
.getConstant(i
, dl
, BasePtr
.getValueType());
9777 Idx
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
, Idx
);
9779 Stores
[i
] = DAG
.getTruncStore(
9780 StoreChain
, dl
, Loads
[i
], Idx
, SN
->getPointerInfo().getWithOffset(i
),
9781 MVT::i8
, /* Alignment = */ 1, SN
->getMemOperand()->getFlags(),
9785 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
9790 SDValue
PPCTargetLowering::LowerMUL(SDValue Op
, SelectionDAG
&DAG
) const {
9792 if (Op
.getValueType() == MVT::v4i32
) {
9793 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9795 SDValue Zero
= BuildSplatI( 0, 1, MVT::v4i32
, DAG
, dl
);
9796 SDValue Neg16
= BuildSplatI(-16, 4, MVT::v4i32
, DAG
, dl
);//+16 as shift amt.
9798 SDValue RHSSwap
= // = vrlw RHS, 16
9799 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw
, RHS
, Neg16
, DAG
, dl
);
9801 // Shrinkify inputs to v8i16.
9802 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, LHS
);
9803 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHS
);
9804 RHSSwap
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHSSwap
);
9806 // Low parts multiplied together, generating 32-bit results (we ignore the
9808 SDValue LoProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh
,
9809 LHS
, RHS
, DAG
, dl
, MVT::v4i32
);
9811 SDValue HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm
,
9812 LHS
, RHSSwap
, Zero
, DAG
, dl
, MVT::v4i32
);
9813 // Shift the high parts up 16 bits.
9814 HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, HiProd
,
9816 return DAG
.getNode(ISD::ADD
, dl
, MVT::v4i32
, LoProd
, HiProd
);
9817 } else if (Op
.getValueType() == MVT::v8i16
) {
9818 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9820 SDValue Zero
= BuildSplatI(0, 1, MVT::v8i16
, DAG
, dl
);
9822 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm
,
9823 LHS
, RHS
, Zero
, DAG
, dl
);
9824 } else if (Op
.getValueType() == MVT::v16i8
) {
9825 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9826 bool isLittleEndian
= Subtarget
.isLittleEndian();
9828 // Multiply the even 8-bit parts, producing 16-bit sums.
9829 SDValue EvenParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub
,
9830 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
9831 EvenParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, EvenParts
);
9833 // Multiply the odd 8-bit parts, producing 16-bit sums.
9834 SDValue OddParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub
,
9835 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
9836 OddParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OddParts
);
9838 // Merge the results together. Because vmuleub and vmuloub are
9839 // instructions with a big-endian bias, we must reverse the
9840 // element numbering and reverse the meaning of "odd" and "even"
9841 // when generating little endian code.
9843 for (unsigned i
= 0; i
!= 8; ++i
) {
9844 if (isLittleEndian
) {
9846 Ops
[i
*2+1] = 2*i
+16;
9849 Ops
[i
*2+1] = 2*i
+1+16;
9853 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, OddParts
, EvenParts
, Ops
);
9855 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, EvenParts
, OddParts
, Ops
);
9857 llvm_unreachable("Unknown mul to lower!");
9861 SDValue
PPCTargetLowering::LowerABS(SDValue Op
, SelectionDAG
&DAG
) const {
9863 assert(Op
.getOpcode() == ISD::ABS
&& "Should only be called for ISD::ABS");
9865 EVT VT
= Op
.getValueType();
9866 assert(VT
.isVector() &&
9867 "Only set vector abs as custom, scalar abs shouldn't reach here!");
9868 assert((VT
== MVT::v2i64
|| VT
== MVT::v4i32
|| VT
== MVT::v8i16
||
9869 VT
== MVT::v16i8
) &&
9870 "Unexpected vector element type!");
9871 assert((VT
!= MVT::v2i64
|| Subtarget
.hasP8Altivec()) &&
9872 "Current subtarget doesn't support smax v2i64!");
9874 // For vector abs, it can be lowered to:
9881 SDValue X
= Op
.getOperand(0);
9882 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
9883 SDValue Y
= DAG
.getNode(ISD::SUB
, dl
, VT
, Zero
, X
);
9885 // SMAX patch https://reviews.llvm.org/D47332
9886 // hasn't landed yet, so use intrinsic first here.
9887 // TODO: Should use SMAX directly once SMAX patch landed
9888 Intrinsic::ID BifID
= Intrinsic::ppc_altivec_vmaxsw
;
9889 if (VT
== MVT::v2i64
)
9890 BifID
= Intrinsic::ppc_altivec_vmaxsd
;
9891 else if (VT
== MVT::v8i16
)
9892 BifID
= Intrinsic::ppc_altivec_vmaxsh
;
9893 else if (VT
== MVT::v16i8
)
9894 BifID
= Intrinsic::ppc_altivec_vmaxsb
;
9896 return BuildIntrinsicOp(BifID
, X
, Y
, DAG
, dl
, VT
);
9899 // Custom lowering for fpext vf32 to v2f64
9900 SDValue
PPCTargetLowering::LowerFP_EXTEND(SDValue Op
, SelectionDAG
&DAG
) const {
9902 assert(Op
.getOpcode() == ISD::FP_EXTEND
&&
9903 "Should only be called for ISD::FP_EXTEND");
9905 // We only want to custom lower an extend from v2f32 to v2f64.
9906 if (Op
.getValueType() != MVT::v2f64
||
9907 Op
.getOperand(0).getValueType() != MVT::v2f32
)
9911 SDValue Op0
= Op
.getOperand(0);
9913 switch (Op0
.getOpcode()) {
9920 for (unsigned i
= 0, ie
= Op0
.getNumOperands(); i
!= ie
; ++i
) {
9921 // Ensure both input are loads.
9922 SDValue LdOp
= Op0
.getOperand(i
);
9923 if (LdOp
.getOpcode() != ISD::LOAD
)
9925 // Generate new load node.
9926 LoadSDNode
*LD
= cast
<LoadSDNode
>(LdOp
);
9927 SDValue LoadOps
[] = { LD
->getChain(), LD
->getBasePtr() };
9929 DAG
.getMemIntrinsicNode(PPCISD::LD_VSX_LH
, dl
,
9930 DAG
.getVTList(MVT::v4f32
, MVT::Other
),
9931 LoadOps
, LD
->getMemoryVT(),
9932 LD
->getMemOperand());
9934 SDValue NewOp
= DAG
.getNode(Op0
.getOpcode(), SDLoc(Op0
), MVT::v4f32
,
9935 NewLoad
[0], NewLoad
[1],
9936 Op0
.getNode()->getFlags());
9937 return DAG
.getNode(PPCISD::FP_EXTEND_LH
, dl
, MVT::v2f64
, NewOp
);
9940 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op0
);
9941 SDValue LoadOps
[] = { LD
->getChain(), LD
->getBasePtr() };
9943 DAG
.getMemIntrinsicNode(PPCISD::LD_VSX_LH
, dl
,
9944 DAG
.getVTList(MVT::v4f32
, MVT::Other
),
9945 LoadOps
, LD
->getMemoryVT(), LD
->getMemOperand());
9946 return DAG
.getNode(PPCISD::FP_EXTEND_LH
, dl
, MVT::v2f64
, NewLd
);
9949 llvm_unreachable("ERROR:Should return for all cases within swtich.");
9952 /// LowerOperation - Provide custom lowering hooks for some operations.
9954 SDValue
PPCTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
9955 switch (Op
.getOpcode()) {
9956 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
9957 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
9958 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
9959 case ISD::GlobalAddress
: return LowerGlobalAddress(Op
, DAG
);
9960 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
9961 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
9962 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
9963 case ISD::INIT_TRAMPOLINE
: return LowerINIT_TRAMPOLINE(Op
, DAG
);
9964 case ISD::ADJUST_TRAMPOLINE
: return LowerADJUST_TRAMPOLINE(Op
, DAG
);
9966 // Variable argument lowering.
9967 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
9968 case ISD::VAARG
: return LowerVAARG(Op
, DAG
);
9969 case ISD::VACOPY
: return LowerVACOPY(Op
, DAG
);
9971 case ISD::STACKRESTORE
: return LowerSTACKRESTORE(Op
, DAG
);
9972 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
9973 case ISD::GET_DYNAMIC_AREA_OFFSET
:
9974 return LowerGET_DYNAMIC_AREA_OFFSET(Op
, DAG
);
9976 // Exception handling lowering.
9977 case ISD::EH_DWARF_CFA
: return LowerEH_DWARF_CFA(Op
, DAG
);
9978 case ISD::EH_SJLJ_SETJMP
: return lowerEH_SJLJ_SETJMP(Op
, DAG
);
9979 case ISD::EH_SJLJ_LONGJMP
: return lowerEH_SJLJ_LONGJMP(Op
, DAG
);
9981 case ISD::LOAD
: return LowerLOAD(Op
, DAG
);
9982 case ISD::STORE
: return LowerSTORE(Op
, DAG
);
9983 case ISD::TRUNCATE
: return LowerTRUNCATE(Op
, DAG
);
9984 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
9985 case ISD::FP_TO_UINT
:
9986 case ISD::FP_TO_SINT
: return LowerFP_TO_INT(Op
, DAG
, SDLoc(Op
));
9987 case ISD::UINT_TO_FP
:
9988 case ISD::SINT_TO_FP
: return LowerINT_TO_FP(Op
, DAG
);
9989 case ISD::FLT_ROUNDS_
: return LowerFLT_ROUNDS_(Op
, DAG
);
9991 // Lower 64-bit shifts.
9992 case ISD::SHL_PARTS
: return LowerSHL_PARTS(Op
, DAG
);
9993 case ISD::SRL_PARTS
: return LowerSRL_PARTS(Op
, DAG
);
9994 case ISD::SRA_PARTS
: return LowerSRA_PARTS(Op
, DAG
);
9996 // Vector-related lowering.
9997 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
9998 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
9999 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
10000 case ISD::SCALAR_TO_VECTOR
: return LowerSCALAR_TO_VECTOR(Op
, DAG
);
10001 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
10002 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
10003 case ISD::MUL
: return LowerMUL(Op
, DAG
);
10004 case ISD::ABS
: return LowerABS(Op
, DAG
);
10005 case ISD::FP_EXTEND
: return LowerFP_EXTEND(Op
, DAG
);
10007 // For counter-based loop handling.
10008 case ISD::INTRINSIC_W_CHAIN
: return SDValue();
10010 case ISD::BITCAST
: return LowerBITCAST(Op
, DAG
);
10012 // Frame & Return address.
10013 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
10014 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
10016 case ISD::INTRINSIC_VOID
:
10017 return LowerINTRINSIC_VOID(Op
, DAG
);
10020 return LowerREM(Op
, DAG
);
10022 return LowerBSWAP(Op
, DAG
);
10023 case ISD::ATOMIC_CMP_SWAP
:
10024 return LowerATOMIC_CMP_SWAP(Op
, DAG
);
10028 void PPCTargetLowering::ReplaceNodeResults(SDNode
*N
,
10029 SmallVectorImpl
<SDValue
>&Results
,
10030 SelectionDAG
&DAG
) const {
10032 switch (N
->getOpcode()) {
10034 llvm_unreachable("Do not know how to custom type legalize this operation!");
10035 case ISD::READCYCLECOUNTER
: {
10036 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
10037 SDValue RTB
= DAG
.getNode(PPCISD::READ_TIME_BASE
, dl
, VTs
, N
->getOperand(0));
10039 Results
.push_back(RTB
);
10040 Results
.push_back(RTB
.getValue(1));
10041 Results
.push_back(RTB
.getValue(2));
10044 case ISD::INTRINSIC_W_CHAIN
: {
10045 if (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue() !=
10046 Intrinsic::loop_decrement
)
10049 assert(N
->getValueType(0) == MVT::i1
&&
10050 "Unexpected result type for CTR decrement intrinsic");
10051 EVT SVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(),
10052 N
->getValueType(0));
10053 SDVTList VTs
= DAG
.getVTList(SVT
, MVT::Other
);
10054 SDValue NewInt
= DAG
.getNode(N
->getOpcode(), dl
, VTs
, N
->getOperand(0),
10057 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewInt
));
10058 Results
.push_back(NewInt
.getValue(1));
10062 if (!Subtarget
.isSVR4ABI() || Subtarget
.isPPC64())
10065 EVT VT
= N
->getValueType(0);
10067 if (VT
== MVT::i64
) {
10068 SDValue NewNode
= LowerVAARG(SDValue(N
, 1), DAG
);
10070 Results
.push_back(NewNode
);
10071 Results
.push_back(NewNode
.getValue(1));
10075 case ISD::FP_TO_SINT
:
10076 case ISD::FP_TO_UINT
:
10077 // LowerFP_TO_INT() can only handle f32 and f64.
10078 if (N
->getOperand(0).getValueType() == MVT::ppcf128
)
10080 Results
.push_back(LowerFP_TO_INT(SDValue(N
, 0), DAG
, dl
));
10082 case ISD::TRUNCATE
: {
10083 EVT TrgVT
= N
->getValueType(0);
10084 EVT OpVT
= N
->getOperand(0).getValueType();
10085 if (TrgVT
.isVector() &&
10086 isOperationCustom(N
->getOpcode(), TrgVT
) &&
10087 OpVT
.getSizeInBits() <= 128 &&
10088 isPowerOf2_32(OpVT
.getVectorElementType().getSizeInBits()))
10089 Results
.push_back(LowerTRUNCATEVector(SDValue(N
, 0), DAG
));
10093 // Don't handle bitcast here.
10098 //===----------------------------------------------------------------------===//
10099 // Other Lowering Code
10100 //===----------------------------------------------------------------------===//
10102 static Instruction
* callIntrinsic(IRBuilder
<> &Builder
, Intrinsic::ID Id
) {
10103 Module
*M
= Builder
.GetInsertBlock()->getParent()->getParent();
10104 Function
*Func
= Intrinsic::getDeclaration(M
, Id
);
10105 return Builder
.CreateCall(Func
, {});
10108 // The mappings for emitLeading/TrailingFence is taken from
10109 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10110 Instruction
*PPCTargetLowering::emitLeadingFence(IRBuilder
<> &Builder
,
10112 AtomicOrdering Ord
) const {
10113 if (Ord
== AtomicOrdering::SequentiallyConsistent
)
10114 return callIntrinsic(Builder
, Intrinsic::ppc_sync
);
10115 if (isReleaseOrStronger(Ord
))
10116 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
10120 Instruction
*PPCTargetLowering::emitTrailingFence(IRBuilder
<> &Builder
,
10122 AtomicOrdering Ord
) const {
10123 if (Inst
->hasAtomicLoad() && isAcquireOrStronger(Ord
)) {
10124 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10125 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10126 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10127 if (isa
<LoadInst
>(Inst
) && Subtarget
.isPPC64())
10128 return Builder
.CreateCall(
10129 Intrinsic::getDeclaration(
10130 Builder
.GetInsertBlock()->getParent()->getParent(),
10131 Intrinsic::ppc_cfence
, {Inst
->getType()}),
10133 // FIXME: Can use isync for rmw operation.
10134 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
10139 MachineBasicBlock
*
10140 PPCTargetLowering::EmitAtomicBinary(MachineInstr
&MI
, MachineBasicBlock
*BB
,
10141 unsigned AtomicSize
,
10142 unsigned BinOpcode
,
10143 unsigned CmpOpcode
,
10144 unsigned CmpPred
) const {
10145 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10146 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10148 auto LoadMnemonic
= PPC::LDARX
;
10149 auto StoreMnemonic
= PPC::STDCX
;
10150 switch (AtomicSize
) {
10152 llvm_unreachable("Unexpected size of atomic entity");
10154 LoadMnemonic
= PPC::LBARX
;
10155 StoreMnemonic
= PPC::STBCX
;
10156 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
10159 LoadMnemonic
= PPC::LHARX
;
10160 StoreMnemonic
= PPC::STHCX
;
10161 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
10164 LoadMnemonic
= PPC::LWARX
;
10165 StoreMnemonic
= PPC::STWCX
;
10168 LoadMnemonic
= PPC::LDARX
;
10169 StoreMnemonic
= PPC::STDCX
;
10173 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10174 MachineFunction
*F
= BB
->getParent();
10175 MachineFunction::iterator It
= ++BB
->getIterator();
10177 Register dest
= MI
.getOperand(0).getReg();
10178 Register ptrA
= MI
.getOperand(1).getReg();
10179 Register ptrB
= MI
.getOperand(2).getReg();
10180 Register incr
= MI
.getOperand(3).getReg();
10181 DebugLoc dl
= MI
.getDebugLoc();
10183 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10184 MachineBasicBlock
*loop2MBB
=
10185 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
10186 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10187 F
->insert(It
, loopMBB
);
10189 F
->insert(It
, loop2MBB
);
10190 F
->insert(It
, exitMBB
);
10191 exitMBB
->splice(exitMBB
->begin(), BB
,
10192 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10193 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10195 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10196 Register TmpReg
= (!BinOpcode
) ? incr
:
10197 RegInfo
.createVirtualRegister( AtomicSize
== 8 ? &PPC::G8RCRegClass
10198 : &PPC::GPRCRegClass
);
10202 // fallthrough --> loopMBB
10203 BB
->addSuccessor(loopMBB
);
10206 // l[wd]arx dest, ptr
10207 // add r0, dest, incr
10208 // st[wd]cx. r0, ptr
10210 // fallthrough --> exitMBB
10214 // l[wd]arx dest, ptr
10215 // cmpl?[wd] incr, dest
10218 // st[wd]cx. dest, ptr
10220 // fallthrough --> exitMBB
10223 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
)
10224 .addReg(ptrA
).addReg(ptrB
);
10226 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
).addReg(incr
).addReg(dest
);
10228 // Signed comparisons of byte or halfword values must be sign-extended.
10229 if (CmpOpcode
== PPC::CMPW
&& AtomicSize
< 4) {
10230 Register ExtReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
10231 BuildMI(BB
, dl
, TII
->get(AtomicSize
== 1 ? PPC::EXTSB
: PPC::EXTSH
),
10232 ExtReg
).addReg(dest
);
10233 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10234 .addReg(incr
).addReg(ExtReg
);
10236 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10237 .addReg(incr
).addReg(dest
);
10239 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10240 .addImm(CmpPred
).addReg(PPC::CR0
).addMBB(exitMBB
);
10241 BB
->addSuccessor(loop2MBB
);
10242 BB
->addSuccessor(exitMBB
);
10245 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
10246 .addReg(TmpReg
).addReg(ptrA
).addReg(ptrB
);
10247 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10248 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(loopMBB
);
10249 BB
->addSuccessor(loopMBB
);
10250 BB
->addSuccessor(exitMBB
);
10258 MachineBasicBlock
*PPCTargetLowering::EmitPartwordAtomicBinary(
10259 MachineInstr
&MI
, MachineBasicBlock
*BB
,
10260 bool is8bit
, // operation
10261 unsigned BinOpcode
, unsigned CmpOpcode
, unsigned CmpPred
) const {
10262 // If we support part-word atomic mnemonics, just use them
10263 if (Subtarget
.hasPartwordAtomics())
10264 return EmitAtomicBinary(MI
, BB
, is8bit
? 1 : 2, BinOpcode
, CmpOpcode
,
10267 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10268 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10269 // In 64 bit mode we have to use 64 bits for addresses, even though the
10270 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
10271 // registers without caring whether they're 32 or 64, but here we're
10272 // doing actual arithmetic on the addresses.
10273 bool is64bit
= Subtarget
.isPPC64();
10274 bool isLittleEndian
= Subtarget
.isLittleEndian();
10275 unsigned ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
10277 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10278 MachineFunction
*F
= BB
->getParent();
10279 MachineFunction::iterator It
= ++BB
->getIterator();
10281 Register dest
= MI
.getOperand(0).getReg();
10282 Register ptrA
= MI
.getOperand(1).getReg();
10283 Register ptrB
= MI
.getOperand(2).getReg();
10284 Register incr
= MI
.getOperand(3).getReg();
10285 DebugLoc dl
= MI
.getDebugLoc();
10287 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10288 MachineBasicBlock
*loop2MBB
=
10289 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
10290 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10291 F
->insert(It
, loopMBB
);
10293 F
->insert(It
, loop2MBB
);
10294 F
->insert(It
, exitMBB
);
10295 exitMBB
->splice(exitMBB
->begin(), BB
,
10296 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10297 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10299 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10300 const TargetRegisterClass
*RC
=
10301 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
10302 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
10304 Register PtrReg
= RegInfo
.createVirtualRegister(RC
);
10305 Register Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
10306 Register ShiftReg
=
10307 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
10308 Register Incr2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10309 Register MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
10310 Register Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10311 Register Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
10312 Register Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10313 Register Tmp3Reg
= RegInfo
.createVirtualRegister(GPRC
);
10314 Register Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
10315 Register TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
10318 (!BinOpcode
) ? Incr2Reg
: RegInfo
.createVirtualRegister(GPRC
);
10322 // fallthrough --> loopMBB
10323 BB
->addSuccessor(loopMBB
);
10325 // The 4-byte load must be aligned, while a char or short may be
10326 // anywhere in the word. Hence all this nasty bookkeeping code.
10327 // add ptr1, ptrA, ptrB [copy if ptrA==0]
10328 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10329 // xori shift, shift1, 24 [16]
10330 // rlwinm ptr, ptr1, 0, 0, 29
10331 // slw incr2, incr, shift
10332 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10333 // slw mask, mask2, shift
10335 // lwarx tmpDest, ptr
10336 // add tmp, tmpDest, incr2
10337 // andc tmp2, tmpDest, mask
10338 // and tmp3, tmp, mask
10339 // or tmp4, tmp3, tmp2
10340 // stwcx. tmp4, ptr
10342 // fallthrough --> exitMBB
10343 // srw dest, tmpDest, shift
10344 if (ptrA
!= ZeroReg
) {
10345 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
10346 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
10352 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10354 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
10355 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
10358 .addImm(is8bit
? 28 : 27);
10359 if (!isLittleEndian
)
10360 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
10362 .addImm(is8bit
? 24 : 16);
10364 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
10369 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
10374 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), Incr2Reg
).addReg(incr
).addReg(ShiftReg
);
10376 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
10378 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
10379 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
10383 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
10388 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
10392 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
)
10394 .addReg(TmpDestReg
);
10395 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
10396 .addReg(TmpDestReg
)
10398 BuildMI(BB
, dl
, TII
->get(PPC::AND
), Tmp3Reg
).addReg(TmpReg
).addReg(MaskReg
);
10400 // For unsigned comparisons, we can directly compare the shifted values.
10401 // For signed comparisons we shift and sign extend.
10402 Register SReg
= RegInfo
.createVirtualRegister(GPRC
);
10403 BuildMI(BB
, dl
, TII
->get(PPC::AND
), SReg
)
10404 .addReg(TmpDestReg
)
10406 unsigned ValueReg
= SReg
;
10407 unsigned CmpReg
= Incr2Reg
;
10408 if (CmpOpcode
== PPC::CMPW
) {
10409 ValueReg
= RegInfo
.createVirtualRegister(GPRC
);
10410 BuildMI(BB
, dl
, TII
->get(PPC::SRW
), ValueReg
)
10413 Register ValueSReg
= RegInfo
.createVirtualRegister(GPRC
);
10414 BuildMI(BB
, dl
, TII
->get(is8bit
? PPC::EXTSB
: PPC::EXTSH
), ValueSReg
)
10416 ValueReg
= ValueSReg
;
10419 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10422 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10426 BB
->addSuccessor(loop2MBB
);
10427 BB
->addSuccessor(exitMBB
);
10430 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
).addReg(Tmp3Reg
).addReg(Tmp2Reg
);
10431 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
10435 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10436 .addImm(PPC::PRED_NE
)
10439 BB
->addSuccessor(loopMBB
);
10440 BB
->addSuccessor(exitMBB
);
10445 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), dest
)
10446 .addReg(TmpDestReg
)
10451 llvm::MachineBasicBlock
*
10452 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr
&MI
,
10453 MachineBasicBlock
*MBB
) const {
10454 DebugLoc DL
= MI
.getDebugLoc();
10455 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10456 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
10458 MachineFunction
*MF
= MBB
->getParent();
10459 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
10461 const BasicBlock
*BB
= MBB
->getBasicBlock();
10462 MachineFunction::iterator I
= ++MBB
->getIterator();
10464 Register DstReg
= MI
.getOperand(0).getReg();
10465 const TargetRegisterClass
*RC
= MRI
.getRegClass(DstReg
);
10466 assert(TRI
->isTypeLegalForClass(*RC
, MVT::i32
) && "Invalid destination!");
10467 Register mainDstReg
= MRI
.createVirtualRegister(RC
);
10468 Register restoreDstReg
= MRI
.createVirtualRegister(RC
);
10470 MVT PVT
= getPointerTy(MF
->getDataLayout());
10471 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
10472 "Invalid Pointer Size!");
10473 // For v = setjmp(buf), we generate
10476 // SjLjSetup mainMBB
10482 // buf[LabelOffset] = LR
10486 // v = phi(main, restore)
10489 MachineBasicBlock
*thisMBB
= MBB
;
10490 MachineBasicBlock
*mainMBB
= MF
->CreateMachineBasicBlock(BB
);
10491 MachineBasicBlock
*sinkMBB
= MF
->CreateMachineBasicBlock(BB
);
10492 MF
->insert(I
, mainMBB
);
10493 MF
->insert(I
, sinkMBB
);
10495 MachineInstrBuilder MIB
;
10497 // Transfer the remainder of BB and its successor edges to sinkMBB.
10498 sinkMBB
->splice(sinkMBB
->begin(), MBB
,
10499 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
10500 sinkMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
10502 // Note that the structure of the jmp_buf used here is not compatible
10503 // with that used by libc, and is not designed to be. Specifically, it
10504 // stores only those 'reserved' registers that LLVM does not otherwise
10505 // understand how to spill. Also, by convention, by the time this
10506 // intrinsic is called, Clang has already stored the frame address in the
10507 // first slot of the buffer and stack address in the third. Following the
10508 // X86 target code, we'll store the jump address in the second slot. We also
10509 // need to save the TOC pointer (R2) to handle jumps between shared
10510 // libraries, and that will be stored in the fourth slot. The thread
10511 // identifier (R13) is not affected.
10514 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
10515 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
10516 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
10518 // Prepare IP either in reg.
10519 const TargetRegisterClass
*PtrRC
= getRegClassFor(PVT
);
10520 Register LabelReg
= MRI
.createVirtualRegister(PtrRC
);
10521 Register BufReg
= MI
.getOperand(1).getReg();
10523 if (Subtarget
.is64BitELFABI()) {
10524 setUsesTOCBasePtr(*MBB
->getParent());
10525 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::STD
))
10532 // Naked functions never have a base pointer, and so we use r1. For all
10533 // other functions, this decision must be delayed until during PEI.
10535 if (MF
->getFunction().hasFnAttribute(Attribute::Naked
))
10536 BaseReg
= Subtarget
.isPPC64() ? PPC::X1
: PPC::R1
;
10538 BaseReg
= Subtarget
.isPPC64() ? PPC::BP8
: PPC::BP
;
10540 MIB
= BuildMI(*thisMBB
, MI
, DL
,
10541 TII
->get(Subtarget
.isPPC64() ? PPC::STD
: PPC::STW
))
10548 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::BCLalways
)).addMBB(mainMBB
);
10549 MIB
.addRegMask(TRI
->getNoPreservedMask());
10551 BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::LI
), restoreDstReg
).addImm(1);
10553 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::EH_SjLj_Setup
))
10555 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::B
)).addMBB(sinkMBB
);
10557 thisMBB
->addSuccessor(mainMBB
, BranchProbability::getZero());
10558 thisMBB
->addSuccessor(sinkMBB
, BranchProbability::getOne());
10563 BuildMI(mainMBB
, DL
,
10564 TII
->get(Subtarget
.isPPC64() ? PPC::MFLR8
: PPC::MFLR
), LabelReg
);
10567 if (Subtarget
.isPPC64()) {
10568 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STD
))
10570 .addImm(LabelOffset
)
10573 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STW
))
10575 .addImm(LabelOffset
)
10578 MIB
.cloneMemRefs(MI
);
10580 BuildMI(mainMBB
, DL
, TII
->get(PPC::LI
), mainDstReg
).addImm(0);
10581 mainMBB
->addSuccessor(sinkMBB
);
10584 BuildMI(*sinkMBB
, sinkMBB
->begin(), DL
,
10585 TII
->get(PPC::PHI
), DstReg
)
10586 .addReg(mainDstReg
).addMBB(mainMBB
)
10587 .addReg(restoreDstReg
).addMBB(thisMBB
);
10589 MI
.eraseFromParent();
10593 MachineBasicBlock
*
10594 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr
&MI
,
10595 MachineBasicBlock
*MBB
) const {
10596 DebugLoc DL
= MI
.getDebugLoc();
10597 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10599 MachineFunction
*MF
= MBB
->getParent();
10600 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
10602 MVT PVT
= getPointerTy(MF
->getDataLayout());
10603 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
10604 "Invalid Pointer Size!");
10606 const TargetRegisterClass
*RC
=
10607 (PVT
== MVT::i64
) ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
10608 Register Tmp
= MRI
.createVirtualRegister(RC
);
10609 // Since FP is only updated here but NOT referenced, it's treated as GPR.
10610 unsigned FP
= (PVT
== MVT::i64
) ? PPC::X31
: PPC::R31
;
10611 unsigned SP
= (PVT
== MVT::i64
) ? PPC::X1
: PPC::R1
;
10615 : (Subtarget
.isSVR4ABI() && isPositionIndependent() ? PPC::R29
10618 MachineInstrBuilder MIB
;
10620 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
10621 const int64_t SPOffset
= 2 * PVT
.getStoreSize();
10622 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
10623 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
10625 Register BufReg
= MI
.getOperand(0).getReg();
10627 // Reload FP (the jumped-to function may not have had a
10628 // frame pointer, and if so, then its r31 will be restored
10630 if (PVT
== MVT::i64
) {
10631 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), FP
)
10635 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), FP
)
10639 MIB
.cloneMemRefs(MI
);
10642 if (PVT
== MVT::i64
) {
10643 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), Tmp
)
10644 .addImm(LabelOffset
)
10647 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), Tmp
)
10648 .addImm(LabelOffset
)
10651 MIB
.cloneMemRefs(MI
);
10654 if (PVT
== MVT::i64
) {
10655 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), SP
)
10659 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), SP
)
10663 MIB
.cloneMemRefs(MI
);
10666 if (PVT
== MVT::i64
) {
10667 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), BP
)
10671 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), BP
)
10675 MIB
.cloneMemRefs(MI
);
10678 if (PVT
== MVT::i64
&& Subtarget
.isSVR4ABI()) {
10679 setUsesTOCBasePtr(*MBB
->getParent());
10680 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), PPC::X2
)
10687 BuildMI(*MBB
, MI
, DL
,
10688 TII
->get(PVT
== MVT::i64
? PPC::MTCTR8
: PPC::MTCTR
)).addReg(Tmp
);
10689 BuildMI(*MBB
, MI
, DL
, TII
->get(PVT
== MVT::i64
? PPC::BCTR8
: PPC::BCTR
));
10691 MI
.eraseFromParent();
10695 MachineBasicBlock
*
10696 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
10697 MachineBasicBlock
*BB
) const {
10698 if (MI
.getOpcode() == TargetOpcode::STACKMAP
||
10699 MI
.getOpcode() == TargetOpcode::PATCHPOINT
) {
10700 if (Subtarget
.is64BitELFABI() &&
10701 MI
.getOpcode() == TargetOpcode::PATCHPOINT
) {
10702 // Call lowering should have added an r2 operand to indicate a dependence
10703 // on the TOC base pointer value. It can't however, because there is no
10704 // way to mark the dependence as implicit there, and so the stackmap code
10705 // will confuse it with a regular operand. Instead, add the dependence
10707 MI
.addOperand(MachineOperand::CreateReg(PPC::X2
, false, true));
10710 return emitPatchPoint(MI
, BB
);
10713 if (MI
.getOpcode() == PPC::EH_SjLj_SetJmp32
||
10714 MI
.getOpcode() == PPC::EH_SjLj_SetJmp64
) {
10715 return emitEHSjLjSetJmp(MI
, BB
);
10716 } else if (MI
.getOpcode() == PPC::EH_SjLj_LongJmp32
||
10717 MI
.getOpcode() == PPC::EH_SjLj_LongJmp64
) {
10718 return emitEHSjLjLongJmp(MI
, BB
);
10721 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10723 // To "insert" these instructions we actually have to insert their
10724 // control-flow patterns.
10725 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10726 MachineFunction::iterator It
= ++BB
->getIterator();
10728 MachineFunction
*F
= BB
->getParent();
10730 if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10731 MI
.getOpcode() == PPC::SELECT_CC_I8
|| MI
.getOpcode() == PPC::SELECT_I4
||
10732 MI
.getOpcode() == PPC::SELECT_I8
) {
10733 SmallVector
<MachineOperand
, 2> Cond
;
10734 if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10735 MI
.getOpcode() == PPC::SELECT_CC_I8
)
10736 Cond
.push_back(MI
.getOperand(4));
10738 Cond
.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET
));
10739 Cond
.push_back(MI
.getOperand(1));
10741 DebugLoc dl
= MI
.getDebugLoc();
10742 TII
->insertSelect(*BB
, MI
, dl
, MI
.getOperand(0).getReg(), Cond
,
10743 MI
.getOperand(2).getReg(), MI
.getOperand(3).getReg());
10744 } else if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10745 MI
.getOpcode() == PPC::SELECT_CC_I8
||
10746 MI
.getOpcode() == PPC::SELECT_CC_F4
||
10747 MI
.getOpcode() == PPC::SELECT_CC_F8
||
10748 MI
.getOpcode() == PPC::SELECT_CC_F16
||
10749 MI
.getOpcode() == PPC::SELECT_CC_QFRC
||
10750 MI
.getOpcode() == PPC::SELECT_CC_QSRC
||
10751 MI
.getOpcode() == PPC::SELECT_CC_QBRC
||
10752 MI
.getOpcode() == PPC::SELECT_CC_VRRC
||
10753 MI
.getOpcode() == PPC::SELECT_CC_VSFRC
||
10754 MI
.getOpcode() == PPC::SELECT_CC_VSSRC
||
10755 MI
.getOpcode() == PPC::SELECT_CC_VSRC
||
10756 MI
.getOpcode() == PPC::SELECT_CC_SPE4
||
10757 MI
.getOpcode() == PPC::SELECT_CC_SPE
||
10758 MI
.getOpcode() == PPC::SELECT_I4
||
10759 MI
.getOpcode() == PPC::SELECT_I8
||
10760 MI
.getOpcode() == PPC::SELECT_F4
||
10761 MI
.getOpcode() == PPC::SELECT_F8
||
10762 MI
.getOpcode() == PPC::SELECT_F16
||
10763 MI
.getOpcode() == PPC::SELECT_QFRC
||
10764 MI
.getOpcode() == PPC::SELECT_QSRC
||
10765 MI
.getOpcode() == PPC::SELECT_QBRC
||
10766 MI
.getOpcode() == PPC::SELECT_SPE
||
10767 MI
.getOpcode() == PPC::SELECT_SPE4
||
10768 MI
.getOpcode() == PPC::SELECT_VRRC
||
10769 MI
.getOpcode() == PPC::SELECT_VSFRC
||
10770 MI
.getOpcode() == PPC::SELECT_VSSRC
||
10771 MI
.getOpcode() == PPC::SELECT_VSRC
) {
10772 // The incoming instruction knows the destination vreg to set, the
10773 // condition code register to branch on, the true/false values to
10774 // select between, and a branch opcode to use.
10779 // cmpTY ccX, r1, r2
10781 // fallthrough --> copy0MBB
10782 MachineBasicBlock
*thisMBB
= BB
;
10783 MachineBasicBlock
*copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10784 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10785 DebugLoc dl
= MI
.getDebugLoc();
10786 F
->insert(It
, copy0MBB
);
10787 F
->insert(It
, sinkMBB
);
10789 // Transfer the remainder of BB and its successor edges to sinkMBB.
10790 sinkMBB
->splice(sinkMBB
->begin(), BB
,
10791 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10792 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10794 // Next, add the true and fallthrough blocks as its successors.
10795 BB
->addSuccessor(copy0MBB
);
10796 BB
->addSuccessor(sinkMBB
);
10798 if (MI
.getOpcode() == PPC::SELECT_I4
|| MI
.getOpcode() == PPC::SELECT_I8
||
10799 MI
.getOpcode() == PPC::SELECT_F4
|| MI
.getOpcode() == PPC::SELECT_F8
||
10800 MI
.getOpcode() == PPC::SELECT_F16
||
10801 MI
.getOpcode() == PPC::SELECT_SPE4
||
10802 MI
.getOpcode() == PPC::SELECT_SPE
||
10803 MI
.getOpcode() == PPC::SELECT_QFRC
||
10804 MI
.getOpcode() == PPC::SELECT_QSRC
||
10805 MI
.getOpcode() == PPC::SELECT_QBRC
||
10806 MI
.getOpcode() == PPC::SELECT_VRRC
||
10807 MI
.getOpcode() == PPC::SELECT_VSFRC
||
10808 MI
.getOpcode() == PPC::SELECT_VSSRC
||
10809 MI
.getOpcode() == PPC::SELECT_VSRC
) {
10810 BuildMI(BB
, dl
, TII
->get(PPC::BC
))
10811 .addReg(MI
.getOperand(1).getReg())
10814 unsigned SelectPred
= MI
.getOperand(4).getImm();
10815 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10816 .addImm(SelectPred
)
10817 .addReg(MI
.getOperand(1).getReg())
10822 // %FalseValue = ...
10823 // # fallthrough to sinkMBB
10826 // Update machine-CFG edges
10827 BB
->addSuccessor(sinkMBB
);
10830 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10833 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::PHI
), MI
.getOperand(0).getReg())
10834 .addReg(MI
.getOperand(3).getReg())
10836 .addReg(MI
.getOperand(2).getReg())
10838 } else if (MI
.getOpcode() == PPC::ReadTB
) {
10839 // To read the 64-bit time-base register on a 32-bit target, we read the
10840 // two halves. Should the counter have wrapped while it was being read, we
10841 // need to try again.
10844 // mfspr Rx,TBU # load from TBU
10845 // mfspr Ry,TB # load from TB
10846 // mfspr Rz,TBU # load from TBU
10847 // cmpw crX,Rx,Rz # check if 'old'='new'
10848 // bne readLoop # branch if they're not equal
10851 MachineBasicBlock
*readMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10852 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10853 DebugLoc dl
= MI
.getDebugLoc();
10854 F
->insert(It
, readMBB
);
10855 F
->insert(It
, sinkMBB
);
10857 // Transfer the remainder of BB and its successor edges to sinkMBB.
10858 sinkMBB
->splice(sinkMBB
->begin(), BB
,
10859 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10860 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10862 BB
->addSuccessor(readMBB
);
10865 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10866 Register ReadAgainReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
10867 Register LoReg
= MI
.getOperand(0).getReg();
10868 Register HiReg
= MI
.getOperand(1).getReg();
10870 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), HiReg
).addImm(269);
10871 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), LoReg
).addImm(268);
10872 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), ReadAgainReg
).addImm(269);
10874 Register CmpReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
10876 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), CmpReg
)
10878 .addReg(ReadAgainReg
);
10879 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10880 .addImm(PPC::PRED_NE
)
10884 BB
->addSuccessor(readMBB
);
10885 BB
->addSuccessor(sinkMBB
);
10886 } else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8
)
10887 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::ADD4
);
10888 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16
)
10889 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::ADD4
);
10890 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32
)
10891 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::ADD4
);
10892 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64
)
10893 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::ADD8
);
10895 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I8
)
10896 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::AND
);
10897 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I16
)
10898 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::AND
);
10899 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I32
)
10900 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::AND
);
10901 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I64
)
10902 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::AND8
);
10904 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I8
)
10905 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::OR
);
10906 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I16
)
10907 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::OR
);
10908 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I32
)
10909 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::OR
);
10910 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I64
)
10911 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::OR8
);
10913 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8
)
10914 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::XOR
);
10915 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16
)
10916 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::XOR
);
10917 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32
)
10918 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::XOR
);
10919 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64
)
10920 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::XOR8
);
10922 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8
)
10923 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::NAND
);
10924 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16
)
10925 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::NAND
);
10926 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32
)
10927 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::NAND
);
10928 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64
)
10929 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::NAND8
);
10931 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8
)
10932 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::SUBF
);
10933 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16
)
10934 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::SUBF
);
10935 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32
)
10936 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::SUBF
);
10937 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64
)
10938 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::SUBF8
);
10940 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8
)
10941 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_GE
);
10942 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16
)
10943 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_GE
);
10944 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32
)
10945 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_GE
);
10946 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64
)
10947 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_GE
);
10949 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8
)
10950 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_LE
);
10951 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16
)
10952 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_LE
);
10953 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32
)
10954 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_LE
);
10955 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64
)
10956 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_LE
);
10958 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8
)
10959 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_GE
);
10960 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16
)
10961 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_GE
);
10962 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32
)
10963 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_GE
);
10964 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64
)
10965 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_GE
);
10967 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8
)
10968 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_LE
);
10969 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16
)
10970 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_LE
);
10971 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32
)
10972 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_LE
);
10973 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64
)
10974 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_LE
);
10976 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I8
)
10977 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0);
10978 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I16
)
10979 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0);
10980 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I32
)
10981 BB
= EmitAtomicBinary(MI
, BB
, 4, 0);
10982 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I64
)
10983 BB
= EmitAtomicBinary(MI
, BB
, 8, 0);
10984 else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32
||
10985 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
||
10986 (Subtarget
.hasPartwordAtomics() &&
10987 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
) ||
10988 (Subtarget
.hasPartwordAtomics() &&
10989 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
)) {
10990 bool is64bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
;
10992 auto LoadMnemonic
= PPC::LDARX
;
10993 auto StoreMnemonic
= PPC::STDCX
;
10994 switch (MI
.getOpcode()) {
10996 llvm_unreachable("Compare and swap of unknown size");
10997 case PPC::ATOMIC_CMP_SWAP_I8
:
10998 LoadMnemonic
= PPC::LBARX
;
10999 StoreMnemonic
= PPC::STBCX
;
11000 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
11002 case PPC::ATOMIC_CMP_SWAP_I16
:
11003 LoadMnemonic
= PPC::LHARX
;
11004 StoreMnemonic
= PPC::STHCX
;
11005 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
11007 case PPC::ATOMIC_CMP_SWAP_I32
:
11008 LoadMnemonic
= PPC::LWARX
;
11009 StoreMnemonic
= PPC::STWCX
;
11011 case PPC::ATOMIC_CMP_SWAP_I64
:
11012 LoadMnemonic
= PPC::LDARX
;
11013 StoreMnemonic
= PPC::STDCX
;
11016 Register dest
= MI
.getOperand(0).getReg();
11017 Register ptrA
= MI
.getOperand(1).getReg();
11018 Register ptrB
= MI
.getOperand(2).getReg();
11019 Register oldval
= MI
.getOperand(3).getReg();
11020 Register newval
= MI
.getOperand(4).getReg();
11021 DebugLoc dl
= MI
.getDebugLoc();
11023 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11024 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11025 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11026 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11027 F
->insert(It
, loop1MBB
);
11028 F
->insert(It
, loop2MBB
);
11029 F
->insert(It
, midMBB
);
11030 F
->insert(It
, exitMBB
);
11031 exitMBB
->splice(exitMBB
->begin(), BB
,
11032 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
11033 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
11037 // fallthrough --> loopMBB
11038 BB
->addSuccessor(loop1MBB
);
11041 // l[bhwd]arx dest, ptr
11042 // cmp[wd] dest, oldval
11045 // st[bhwd]cx. newval, ptr
11049 // st[bhwd]cx. dest, ptr
11052 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
).addReg(ptrA
).addReg(ptrB
);
11053 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::CMPD
: PPC::CMPW
), PPC::CR0
)
11056 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11057 .addImm(PPC::PRED_NE
)
11060 BB
->addSuccessor(loop2MBB
);
11061 BB
->addSuccessor(midMBB
);
11064 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
11068 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11069 .addImm(PPC::PRED_NE
)
11072 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
11073 BB
->addSuccessor(loop1MBB
);
11074 BB
->addSuccessor(exitMBB
);
11077 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
11081 BB
->addSuccessor(exitMBB
);
11086 } else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
||
11087 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
) {
11088 // We must use 64-bit registers for addresses when targeting 64-bit,
11089 // since we're actually doing arithmetic on them. Other registers
11091 bool is64bit
= Subtarget
.isPPC64();
11092 bool isLittleEndian
= Subtarget
.isLittleEndian();
11093 bool is8bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
;
11095 Register dest
= MI
.getOperand(0).getReg();
11096 Register ptrA
= MI
.getOperand(1).getReg();
11097 Register ptrB
= MI
.getOperand(2).getReg();
11098 Register oldval
= MI
.getOperand(3).getReg();
11099 Register newval
= MI
.getOperand(4).getReg();
11100 DebugLoc dl
= MI
.getDebugLoc();
11102 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11103 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11104 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11105 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11106 F
->insert(It
, loop1MBB
);
11107 F
->insert(It
, loop2MBB
);
11108 F
->insert(It
, midMBB
);
11109 F
->insert(It
, exitMBB
);
11110 exitMBB
->splice(exitMBB
->begin(), BB
,
11111 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
11112 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
11114 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11115 const TargetRegisterClass
*RC
=
11116 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
11117 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
11119 Register PtrReg
= RegInfo
.createVirtualRegister(RC
);
11120 Register Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
11121 Register ShiftReg
=
11122 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
11123 Register NewVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11124 Register NewVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
11125 Register OldVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11126 Register OldVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
11127 Register MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
11128 Register Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11129 Register Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
11130 Register Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11131 Register Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
11132 Register TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
11134 Register TmpReg
= RegInfo
.createVirtualRegister(GPRC
);
11135 Register ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
11138 // fallthrough --> loopMBB
11139 BB
->addSuccessor(loop1MBB
);
11141 // The 4-byte load must be aligned, while a char or short may be
11142 // anywhere in the word. Hence all this nasty bookkeeping code.
11143 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11144 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11145 // xori shift, shift1, 24 [16]
11146 // rlwinm ptr, ptr1, 0, 0, 29
11147 // slw newval2, newval, shift
11148 // slw oldval2, oldval,shift
11149 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11150 // slw mask, mask2, shift
11151 // and newval3, newval2, mask
11152 // and oldval3, oldval2, mask
11154 // lwarx tmpDest, ptr
11155 // and tmp, tmpDest, mask
11156 // cmpw tmp, oldval3
11159 // andc tmp2, tmpDest, mask
11160 // or tmp4, tmp2, newval3
11161 // stwcx. tmp4, ptr
11165 // stwcx. tmpDest, ptr
11167 // srw dest, tmpDest, shift
11168 if (ptrA
!= ZeroReg
) {
11169 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
11170 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
11177 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11179 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
11180 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
11183 .addImm(is8bit
? 28 : 27);
11184 if (!isLittleEndian
)
11185 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
11187 .addImm(is8bit
? 24 : 16);
11189 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
11194 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
11199 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), NewVal2Reg
)
11202 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), OldVal2Reg
)
11206 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
11208 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
11209 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
11213 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
11216 BuildMI(BB
, dl
, TII
->get(PPC::AND
), NewVal3Reg
)
11217 .addReg(NewVal2Reg
)
11219 BuildMI(BB
, dl
, TII
->get(PPC::AND
), OldVal3Reg
)
11220 .addReg(OldVal2Reg
)
11224 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
11227 BuildMI(BB
, dl
, TII
->get(PPC::AND
), TmpReg
)
11228 .addReg(TmpDestReg
)
11230 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), PPC::CR0
)
11232 .addReg(OldVal3Reg
);
11233 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11234 .addImm(PPC::PRED_NE
)
11237 BB
->addSuccessor(loop2MBB
);
11238 BB
->addSuccessor(midMBB
);
11241 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
11242 .addReg(TmpDestReg
)
11244 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
)
11246 .addReg(NewVal3Reg
);
11247 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
11251 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11252 .addImm(PPC::PRED_NE
)
11255 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
11256 BB
->addSuccessor(loop1MBB
);
11257 BB
->addSuccessor(exitMBB
);
11260 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
11261 .addReg(TmpDestReg
)
11264 BB
->addSuccessor(exitMBB
);
11269 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), dest
)
11272 } else if (MI
.getOpcode() == PPC::FADDrtz
) {
11273 // This pseudo performs an FADD with rounding mode temporarily forced
11274 // to round-to-zero. We emit this via custom inserter since the FPSCR
11275 // is not modeled at the SelectionDAG level.
11276 Register Dest
= MI
.getOperand(0).getReg();
11277 Register Src1
= MI
.getOperand(1).getReg();
11278 Register Src2
= MI
.getOperand(2).getReg();
11279 DebugLoc dl
= MI
.getDebugLoc();
11281 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11282 Register MFFSReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
11284 // Save FPSCR value.
11285 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), MFFSReg
);
11287 // Set rounding mode to round-to-zero.
11288 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB1
)).addImm(31);
11289 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB0
)).addImm(30);
11291 // Perform addition.
11292 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::FADD
), Dest
).addReg(Src1
).addReg(Src2
);
11294 // Restore FPSCR value.
11295 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSFb
)).addImm(1).addReg(MFFSReg
);
11296 } else if (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT
||
11297 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT
||
11298 MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
||
11299 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT8
) {
11300 unsigned Opcode
= (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
||
11301 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT8
)
11304 bool isEQ
= (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT
||
11305 MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
);
11307 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11308 Register Dest
= RegInfo
.createVirtualRegister(
11309 Opcode
== PPC::ANDIo
? &PPC::GPRCRegClass
: &PPC::G8RCRegClass
);
11311 DebugLoc dl
= MI
.getDebugLoc();
11312 BuildMI(*BB
, MI
, dl
, TII
->get(Opcode
), Dest
)
11313 .addReg(MI
.getOperand(1).getReg())
11315 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::COPY
),
11316 MI
.getOperand(0).getReg())
11317 .addReg(isEQ
? PPC::CR0EQ
: PPC::CR0GT
);
11318 } else if (MI
.getOpcode() == PPC::TCHECK_RET
) {
11319 DebugLoc Dl
= MI
.getDebugLoc();
11320 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11321 Register CRReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
11322 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::TCHECK
), CRReg
);
11323 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::COPY
),
11324 MI
.getOperand(0).getReg())
11326 } else if (MI
.getOpcode() == PPC::TBEGIN_RET
) {
11327 DebugLoc Dl
= MI
.getDebugLoc();
11328 unsigned Imm
= MI
.getOperand(1).getImm();
11329 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::TBEGIN
)).addImm(Imm
);
11330 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::COPY
),
11331 MI
.getOperand(0).getReg())
11332 .addReg(PPC::CR0EQ
);
11333 } else if (MI
.getOpcode() == PPC::SETRNDi
) {
11334 DebugLoc dl
= MI
.getDebugLoc();
11335 Register OldFPSCRReg
= MI
.getOperand(0).getReg();
11337 // Save FPSCR value.
11338 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), OldFPSCRReg
);
11340 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11341 // the following settings:
11342 // 00 Round to nearest
11344 // 10 Round to +inf
11345 // 11 Round to -inf
11347 // When the operand is immediate, using the two least significant bits of
11348 // the immediate to set the bits 62:63 of FPSCR.
11349 unsigned Mode
= MI
.getOperand(1).getImm();
11350 BuildMI(*BB
, MI
, dl
, TII
->get((Mode
& 1) ? PPC::MTFSB1
: PPC::MTFSB0
))
11353 BuildMI(*BB
, MI
, dl
, TII
->get((Mode
& 2) ? PPC::MTFSB1
: PPC::MTFSB0
))
11355 } else if (MI
.getOpcode() == PPC::SETRND
) {
11356 DebugLoc dl
= MI
.getDebugLoc();
11358 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
11359 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
11360 // If the target doesn't have DirectMove, we should use stack to do the
11361 // conversion, because the target doesn't have the instructions like mtvsrd
11362 // or mfvsrd to do this conversion directly.
11363 auto copyRegFromG8RCOrF8RC
= [&] (unsigned DestReg
, unsigned SrcReg
) {
11364 if (Subtarget
.hasDirectMove()) {
11365 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::COPY
), DestReg
)
11368 // Use stack to do the register copy.
11369 unsigned StoreOp
= PPC::STD
, LoadOp
= PPC::LFD
;
11370 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11371 const TargetRegisterClass
*RC
= RegInfo
.getRegClass(SrcReg
);
11372 if (RC
== &PPC::F8RCRegClass
) {
11373 // Copy register from F8RCRegClass to G8RCRegclass.
11374 assert((RegInfo
.getRegClass(DestReg
) == &PPC::G8RCRegClass
) &&
11375 "Unsupported RegClass.");
11377 StoreOp
= PPC::STFD
;
11380 // Copy register from G8RCRegClass to F8RCRegclass.
11381 assert((RegInfo
.getRegClass(SrcReg
) == &PPC::G8RCRegClass
) &&
11382 (RegInfo
.getRegClass(DestReg
) == &PPC::F8RCRegClass
) &&
11383 "Unsupported RegClass.");
11386 MachineFrameInfo
&MFI
= F
->getFrameInfo();
11387 int FrameIdx
= MFI
.CreateStackObject(8, 8, false);
11389 MachineMemOperand
*MMOStore
= F
->getMachineMemOperand(
11390 MachinePointerInfo::getFixedStack(*F
, FrameIdx
, 0),
11391 MachineMemOperand::MOStore
, MFI
.getObjectSize(FrameIdx
),
11392 MFI
.getObjectAlignment(FrameIdx
));
11394 // Store the SrcReg into the stack.
11395 BuildMI(*BB
, MI
, dl
, TII
->get(StoreOp
))
11398 .addFrameIndex(FrameIdx
)
11399 .addMemOperand(MMOStore
);
11401 MachineMemOperand
*MMOLoad
= F
->getMachineMemOperand(
11402 MachinePointerInfo::getFixedStack(*F
, FrameIdx
, 0),
11403 MachineMemOperand::MOLoad
, MFI
.getObjectSize(FrameIdx
),
11404 MFI
.getObjectAlignment(FrameIdx
));
11406 // Load from the stack where SrcReg is stored, and save to DestReg,
11407 // so we have done the RegClass conversion from RegClass::SrcReg to
11408 // RegClass::DestReg.
11409 BuildMI(*BB
, MI
, dl
, TII
->get(LoadOp
), DestReg
)
11411 .addFrameIndex(FrameIdx
)
11412 .addMemOperand(MMOLoad
);
11416 Register OldFPSCRReg
= MI
.getOperand(0).getReg();
11418 // Save FPSCR value.
11419 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), OldFPSCRReg
);
11421 // When the operand is gprc register, use two least significant bits of the
11422 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
11424 // copy OldFPSCRTmpReg, OldFPSCRReg
11425 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
11426 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
11427 // copy NewFPSCRReg, NewFPSCRTmpReg
11428 // mtfsf 255, NewFPSCRReg
11429 MachineOperand SrcOp
= MI
.getOperand(1);
11430 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11431 Register OldFPSCRTmpReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11433 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg
, OldFPSCRReg
);
11435 Register ImDefReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11436 Register ExtSrcReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11438 // The first operand of INSERT_SUBREG should be a register which has
11439 // subregisters, we only care about its RegClass, so we should use an
11440 // IMPLICIT_DEF register.
11441 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::IMPLICIT_DEF
), ImDefReg
);
11442 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::INSERT_SUBREG
), ExtSrcReg
)
11447 Register NewFPSCRTmpReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11448 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::RLDIMI
), NewFPSCRTmpReg
)
11449 .addReg(OldFPSCRTmpReg
)
11454 Register NewFPSCRReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
11455 copyRegFromG8RCOrF8RC(NewFPSCRReg
, NewFPSCRTmpReg
);
11457 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
11459 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSF
))
11461 .addReg(NewFPSCRReg
)
11465 llvm_unreachable("Unexpected instr type to insert");
11468 MI
.eraseFromParent(); // The pseudo instruction is gone now.
11472 //===----------------------------------------------------------------------===//
11473 // Target Optimization Hooks
11474 //===----------------------------------------------------------------------===//
11476 static int getEstimateRefinementSteps(EVT VT
, const PPCSubtarget
&Subtarget
) {
11477 // For the estimates, convergence is quadratic, so we essentially double the
11478 // number of digits correct after every iteration. For both FRE and FRSQRTE,
11479 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
11480 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
11481 int RefinementSteps
= Subtarget
.hasRecipPrec() ? 1 : 3;
11482 if (VT
.getScalarType() == MVT::f64
)
11484 return RefinementSteps
;
11487 SDValue
PPCTargetLowering::getSqrtEstimate(SDValue Operand
, SelectionDAG
&DAG
,
11488 int Enabled
, int &RefinementSteps
,
11489 bool &UseOneConstNR
,
11490 bool Reciprocal
) const {
11491 EVT VT
= Operand
.getValueType();
11492 if ((VT
== MVT::f32
&& Subtarget
.hasFRSQRTES()) ||
11493 (VT
== MVT::f64
&& Subtarget
.hasFRSQRTE()) ||
11494 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
11495 (VT
== MVT::v2f64
&& Subtarget
.hasVSX()) ||
11496 (VT
== MVT::v4f32
&& Subtarget
.hasQPX()) ||
11497 (VT
== MVT::v4f64
&& Subtarget
.hasQPX())) {
11498 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
11499 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
11501 // The Newton-Raphson computation with a single constant does not provide
11502 // enough accuracy on some CPUs.
11503 UseOneConstNR
= !Subtarget
.needsTwoConstNR();
11504 return DAG
.getNode(PPCISD::FRSQRTE
, SDLoc(Operand
), VT
, Operand
);
11509 SDValue
PPCTargetLowering::getRecipEstimate(SDValue Operand
, SelectionDAG
&DAG
,
11511 int &RefinementSteps
) const {
11512 EVT VT
= Operand
.getValueType();
11513 if ((VT
== MVT::f32
&& Subtarget
.hasFRES()) ||
11514 (VT
== MVT::f64
&& Subtarget
.hasFRE()) ||
11515 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
11516 (VT
== MVT::v2f64
&& Subtarget
.hasVSX()) ||
11517 (VT
== MVT::v4f32
&& Subtarget
.hasQPX()) ||
11518 (VT
== MVT::v4f64
&& Subtarget
.hasQPX())) {
11519 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
11520 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
11521 return DAG
.getNode(PPCISD::FRE
, SDLoc(Operand
), VT
, Operand
);
11526 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11527 // Note: This functionality is used only when unsafe-fp-math is enabled, and
11528 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11529 // enabled for division), this functionality is redundant with the default
11530 // combiner logic (once the division -> reciprocal/multiply transformation
11531 // has taken place). As a result, this matters more for older cores than for
11534 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11535 // reciprocal if there are two or more FDIVs (for embedded cores with only
11536 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11537 switch (Subtarget
.getDarwinDirective()) {
11542 case PPC::DIR_E500
:
11543 case PPC::DIR_E500mc
:
11544 case PPC::DIR_E5500
:
11549 // isConsecutiveLSLoc needs to work even if all adds have not yet been
11550 // collapsed, and so we need to look through chains of them.
11551 static void getBaseWithConstantOffset(SDValue Loc
, SDValue
&Base
,
11552 int64_t& Offset
, SelectionDAG
&DAG
) {
11553 if (DAG
.isBaseWithConstantOffset(Loc
)) {
11554 Base
= Loc
.getOperand(0);
11555 Offset
+= cast
<ConstantSDNode
>(Loc
.getOperand(1))->getSExtValue();
11557 // The base might itself be a base plus an offset, and if so, accumulate
11559 getBaseWithConstantOffset(Loc
.getOperand(0), Base
, Offset
, DAG
);
11563 static bool isConsecutiveLSLoc(SDValue Loc
, EVT VT
, LSBaseSDNode
*Base
,
11564 unsigned Bytes
, int Dist
,
11565 SelectionDAG
&DAG
) {
11566 if (VT
.getSizeInBits() / 8 != Bytes
)
11569 SDValue BaseLoc
= Base
->getBasePtr();
11570 if (Loc
.getOpcode() == ISD::FrameIndex
) {
11571 if (BaseLoc
.getOpcode() != ISD::FrameIndex
)
11573 const MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
11574 int FI
= cast
<FrameIndexSDNode
>(Loc
)->getIndex();
11575 int BFI
= cast
<FrameIndexSDNode
>(BaseLoc
)->getIndex();
11576 int FS
= MFI
.getObjectSize(FI
);
11577 int BFS
= MFI
.getObjectSize(BFI
);
11578 if (FS
!= BFS
|| FS
!= (int)Bytes
) return false;
11579 return MFI
.getObjectOffset(FI
) == (MFI
.getObjectOffset(BFI
) + Dist
*Bytes
);
11582 SDValue Base1
= Loc
, Base2
= BaseLoc
;
11583 int64_t Offset1
= 0, Offset2
= 0;
11584 getBaseWithConstantOffset(Loc
, Base1
, Offset1
, DAG
);
11585 getBaseWithConstantOffset(BaseLoc
, Base2
, Offset2
, DAG
);
11586 if (Base1
== Base2
&& Offset1
== (Offset2
+ Dist
* Bytes
))
11589 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
11590 const GlobalValue
*GV1
= nullptr;
11591 const GlobalValue
*GV2
= nullptr;
11594 bool isGA1
= TLI
.isGAPlusOffset(Loc
.getNode(), GV1
, Offset1
);
11595 bool isGA2
= TLI
.isGAPlusOffset(BaseLoc
.getNode(), GV2
, Offset2
);
11596 if (isGA1
&& isGA2
&& GV1
== GV2
)
11597 return Offset1
== (Offset2
+ Dist
*Bytes
);
11601 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11602 // not enforce equality of the chain operands.
11603 static bool isConsecutiveLS(SDNode
*N
, LSBaseSDNode
*Base
,
11604 unsigned Bytes
, int Dist
,
11605 SelectionDAG
&DAG
) {
11606 if (LSBaseSDNode
*LS
= dyn_cast
<LSBaseSDNode
>(N
)) {
11607 EVT VT
= LS
->getMemoryVT();
11608 SDValue Loc
= LS
->getBasePtr();
11609 return isConsecutiveLSLoc(Loc
, VT
, Base
, Bytes
, Dist
, DAG
);
11612 if (N
->getOpcode() == ISD::INTRINSIC_W_CHAIN
) {
11614 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
11615 default: return false;
11616 case Intrinsic::ppc_qpx_qvlfd
:
11617 case Intrinsic::ppc_qpx_qvlfda
:
11620 case Intrinsic::ppc_qpx_qvlfs
:
11621 case Intrinsic::ppc_qpx_qvlfsa
:
11624 case Intrinsic::ppc_qpx_qvlfcd
:
11625 case Intrinsic::ppc_qpx_qvlfcda
:
11628 case Intrinsic::ppc_qpx_qvlfcs
:
11629 case Intrinsic::ppc_qpx_qvlfcsa
:
11632 case Intrinsic::ppc_qpx_qvlfiwa
:
11633 case Intrinsic::ppc_qpx_qvlfiwz
:
11634 case Intrinsic::ppc_altivec_lvx
:
11635 case Intrinsic::ppc_altivec_lvxl
:
11636 case Intrinsic::ppc_vsx_lxvw4x
:
11637 case Intrinsic::ppc_vsx_lxvw4x_be
:
11640 case Intrinsic::ppc_vsx_lxvd2x
:
11641 case Intrinsic::ppc_vsx_lxvd2x_be
:
11644 case Intrinsic::ppc_altivec_lvebx
:
11647 case Intrinsic::ppc_altivec_lvehx
:
11650 case Intrinsic::ppc_altivec_lvewx
:
11655 return isConsecutiveLSLoc(N
->getOperand(2), VT
, Base
, Bytes
, Dist
, DAG
);
11658 if (N
->getOpcode() == ISD::INTRINSIC_VOID
) {
11660 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
11661 default: return false;
11662 case Intrinsic::ppc_qpx_qvstfd
:
11663 case Intrinsic::ppc_qpx_qvstfda
:
11666 case Intrinsic::ppc_qpx_qvstfs
:
11667 case Intrinsic::ppc_qpx_qvstfsa
:
11670 case Intrinsic::ppc_qpx_qvstfcd
:
11671 case Intrinsic::ppc_qpx_qvstfcda
:
11674 case Intrinsic::ppc_qpx_qvstfcs
:
11675 case Intrinsic::ppc_qpx_qvstfcsa
:
11678 case Intrinsic::ppc_qpx_qvstfiw
:
11679 case Intrinsic::ppc_qpx_qvstfiwa
:
11680 case Intrinsic::ppc_altivec_stvx
:
11681 case Intrinsic::ppc_altivec_stvxl
:
11682 case Intrinsic::ppc_vsx_stxvw4x
:
11685 case Intrinsic::ppc_vsx_stxvd2x
:
11688 case Intrinsic::ppc_vsx_stxvw4x_be
:
11691 case Intrinsic::ppc_vsx_stxvd2x_be
:
11694 case Intrinsic::ppc_altivec_stvebx
:
11697 case Intrinsic::ppc_altivec_stvehx
:
11700 case Intrinsic::ppc_altivec_stvewx
:
11705 return isConsecutiveLSLoc(N
->getOperand(3), VT
, Base
, Bytes
, Dist
, DAG
);
11711 // Return true is there is a nearyby consecutive load to the one provided
11712 // (regardless of alignment). We search up and down the chain, looking though
11713 // token factors and other loads (but nothing else). As a result, a true result
11714 // indicates that it is safe to create a new consecutive load adjacent to the
11716 static bool findConsecutiveLoad(LoadSDNode
*LD
, SelectionDAG
&DAG
) {
11717 SDValue Chain
= LD
->getChain();
11718 EVT VT
= LD
->getMemoryVT();
11720 SmallSet
<SDNode
*, 16> LoadRoots
;
11721 SmallVector
<SDNode
*, 8> Queue(1, Chain
.getNode());
11722 SmallSet
<SDNode
*, 16> Visited
;
11724 // First, search up the chain, branching to follow all token-factor operands.
11725 // If we find a consecutive load, then we're done, otherwise, record all
11726 // nodes just above the top-level loads and token factors.
11727 while (!Queue
.empty()) {
11728 SDNode
*ChainNext
= Queue
.pop_back_val();
11729 if (!Visited
.insert(ChainNext
).second
)
11732 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(ChainNext
)) {
11733 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
11736 if (!Visited
.count(ChainLD
->getChain().getNode()))
11737 Queue
.push_back(ChainLD
->getChain().getNode());
11738 } else if (ChainNext
->getOpcode() == ISD::TokenFactor
) {
11739 for (const SDUse
&O
: ChainNext
->ops())
11740 if (!Visited
.count(O
.getNode()))
11741 Queue
.push_back(O
.getNode());
11743 LoadRoots
.insert(ChainNext
);
11746 // Second, search down the chain, starting from the top-level nodes recorded
11747 // in the first phase. These top-level nodes are the nodes just above all
11748 // loads and token factors. Starting with their uses, recursively look though
11749 // all loads (just the chain uses) and token factors to find a consecutive
11754 for (SmallSet
<SDNode
*, 16>::iterator I
= LoadRoots
.begin(),
11755 IE
= LoadRoots
.end(); I
!= IE
; ++I
) {
11756 Queue
.push_back(*I
);
11758 while (!Queue
.empty()) {
11759 SDNode
*LoadRoot
= Queue
.pop_back_val();
11760 if (!Visited
.insert(LoadRoot
).second
)
11763 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(LoadRoot
))
11764 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
11767 for (SDNode::use_iterator UI
= LoadRoot
->use_begin(),
11768 UE
= LoadRoot
->use_end(); UI
!= UE
; ++UI
)
11769 if (((isa
<MemSDNode
>(*UI
) &&
11770 cast
<MemSDNode
>(*UI
)->getChain().getNode() == LoadRoot
) ||
11771 UI
->getOpcode() == ISD::TokenFactor
) && !Visited
.count(*UI
))
11772 Queue
.push_back(*UI
);
11779 /// This function is called when we have proved that a SETCC node can be replaced
11780 /// by subtraction (and other supporting instructions) so that the result of
11781 /// comparison is kept in a GPR instead of CR. This function is purely for
11782 /// codegen purposes and has some flags to guide the codegen process.
11783 static SDValue
generateEquivalentSub(SDNode
*N
, int Size
, bool Complement
,
11784 bool Swap
, SDLoc
&DL
, SelectionDAG
&DAG
) {
11785 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
11787 // Zero extend the operands to the largest legal integer. Originally, they
11788 // must be of a strictly smaller size.
11789 auto Op0
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(0),
11790 DAG
.getConstant(Size
, DL
, MVT::i32
));
11791 auto Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(1),
11792 DAG
.getConstant(Size
, DL
, MVT::i32
));
11794 // Swap if needed. Depends on the condition code.
11796 std::swap(Op0
, Op1
);
11798 // Subtract extended integers.
11799 auto SubNode
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, Op0
, Op1
);
11801 // Move the sign bit to the least significant position and zero out the rest.
11802 // Now the least significant bit carries the result of original comparison.
11803 auto Shifted
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, SubNode
,
11804 DAG
.getConstant(Size
- 1, DL
, MVT::i32
));
11805 auto Final
= Shifted
;
11807 // Complement the result if needed. Based on the condition code.
11809 Final
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, Shifted
,
11810 DAG
.getConstant(1, DL
, MVT::i64
));
11812 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, Final
);
11815 SDValue
PPCTargetLowering::ConvertSETCCToSubtract(SDNode
*N
,
11816 DAGCombinerInfo
&DCI
) const {
11817 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
11819 SelectionDAG
&DAG
= DCI
.DAG
;
11822 // Size of integers being compared has a critical role in the following
11823 // analysis, so we prefer to do this when all types are legal.
11824 if (!DCI
.isAfterLegalizeDAG())
11827 // If all users of SETCC extend its value to a legal integer type
11828 // then we replace SETCC with a subtraction
11829 for (SDNode::use_iterator UI
= N
->use_begin(),
11830 UE
= N
->use_end(); UI
!= UE
; ++UI
) {
11831 if (UI
->getOpcode() != ISD::ZERO_EXTEND
)
11835 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
11836 auto OpSize
= N
->getOperand(0).getValueSizeInBits();
11838 unsigned Size
= DAG
.getDataLayout().getLargestLegalIntTypeSizeInBits();
11840 if (OpSize
< Size
) {
11844 return generateEquivalentSub(N
, Size
, false, false, DL
, DAG
);
11846 return generateEquivalentSub(N
, Size
, true, true, DL
, DAG
);
11848 return generateEquivalentSub(N
, Size
, false, true, DL
, DAG
);
11850 return generateEquivalentSub(N
, Size
, true, false, DL
, DAG
);
11857 SDValue
PPCTargetLowering::DAGCombineTruncBoolExt(SDNode
*N
,
11858 DAGCombinerInfo
&DCI
) const {
11859 SelectionDAG
&DAG
= DCI
.DAG
;
11862 assert(Subtarget
.useCRBits() && "Expecting to be tracking CR bits");
11863 // If we're tracking CR bits, we need to be careful that we don't have:
11864 // trunc(binary-ops(zext(x), zext(y)))
11866 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11867 // such that we're unnecessarily moving things into GPRs when it would be
11868 // better to keep them in CR bits.
11870 // Note that trunc here can be an actual i1 trunc, or can be the effective
11871 // truncation that comes from a setcc or select_cc.
11872 if (N
->getOpcode() == ISD::TRUNCATE
&&
11873 N
->getValueType(0) != MVT::i1
)
11876 if (N
->getOperand(0).getValueType() != MVT::i32
&&
11877 N
->getOperand(0).getValueType() != MVT::i64
)
11880 if (N
->getOpcode() == ISD::SETCC
||
11881 N
->getOpcode() == ISD::SELECT_CC
) {
11882 // If we're looking at a comparison, then we need to make sure that the
11883 // high bits (all except for the first) don't matter the result.
11885 cast
<CondCodeSDNode
>(N
->getOperand(
11886 N
->getOpcode() == ISD::SETCC
? 2 : 4))->get();
11887 unsigned OpBits
= N
->getOperand(0).getValueSizeInBits();
11889 if (ISD::isSignedIntSetCC(CC
)) {
11890 if (DAG
.ComputeNumSignBits(N
->getOperand(0)) != OpBits
||
11891 DAG
.ComputeNumSignBits(N
->getOperand(1)) != OpBits
)
11893 } else if (ISD::isUnsignedIntSetCC(CC
)) {
11894 if (!DAG
.MaskedValueIsZero(N
->getOperand(0),
11895 APInt::getHighBitsSet(OpBits
, OpBits
-1)) ||
11896 !DAG
.MaskedValueIsZero(N
->getOperand(1),
11897 APInt::getHighBitsSet(OpBits
, OpBits
-1)))
11898 return (N
->getOpcode() == ISD::SETCC
? ConvertSETCCToSubtract(N
, DCI
)
11901 // This is neither a signed nor an unsigned comparison, just make sure
11902 // that the high bits are equal.
11903 KnownBits Op1Known
= DAG
.computeKnownBits(N
->getOperand(0));
11904 KnownBits Op2Known
= DAG
.computeKnownBits(N
->getOperand(1));
11906 // We don't really care about what is known about the first bit (if
11907 // anything), so clear it in all masks prior to comparing them.
11908 Op1Known
.Zero
.clearBit(0); Op1Known
.One
.clearBit(0);
11909 Op2Known
.Zero
.clearBit(0); Op2Known
.One
.clearBit(0);
11911 if (Op1Known
.Zero
!= Op2Known
.Zero
|| Op1Known
.One
!= Op2Known
.One
)
11916 // We now know that the higher-order bits are irrelevant, we just need to
11917 // make sure that all of the intermediate operations are bit operations, and
11918 // all inputs are extensions.
11919 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
11920 N
->getOperand(0).getOpcode() != ISD::OR
&&
11921 N
->getOperand(0).getOpcode() != ISD::XOR
&&
11922 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
11923 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
&&
11924 N
->getOperand(0).getOpcode() != ISD::TRUNCATE
&&
11925 N
->getOperand(0).getOpcode() != ISD::SIGN_EXTEND
&&
11926 N
->getOperand(0).getOpcode() != ISD::ZERO_EXTEND
&&
11927 N
->getOperand(0).getOpcode() != ISD::ANY_EXTEND
)
11930 if ((N
->getOpcode() == ISD::SETCC
|| N
->getOpcode() == ISD::SELECT_CC
) &&
11931 N
->getOperand(1).getOpcode() != ISD::AND
&&
11932 N
->getOperand(1).getOpcode() != ISD::OR
&&
11933 N
->getOperand(1).getOpcode() != ISD::XOR
&&
11934 N
->getOperand(1).getOpcode() != ISD::SELECT
&&
11935 N
->getOperand(1).getOpcode() != ISD::SELECT_CC
&&
11936 N
->getOperand(1).getOpcode() != ISD::TRUNCATE
&&
11937 N
->getOperand(1).getOpcode() != ISD::SIGN_EXTEND
&&
11938 N
->getOperand(1).getOpcode() != ISD::ZERO_EXTEND
&&
11939 N
->getOperand(1).getOpcode() != ISD::ANY_EXTEND
)
11942 SmallVector
<SDValue
, 4> Inputs
;
11943 SmallVector
<SDValue
, 8> BinOps
, PromOps
;
11944 SmallPtrSet
<SDNode
*, 16> Visited
;
11946 for (unsigned i
= 0; i
< 2; ++i
) {
11947 if (((N
->getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
11948 N
->getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
11949 N
->getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
11950 N
->getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
11951 isa
<ConstantSDNode
>(N
->getOperand(i
)))
11952 Inputs
.push_back(N
->getOperand(i
));
11954 BinOps
.push_back(N
->getOperand(i
));
11956 if (N
->getOpcode() == ISD::TRUNCATE
)
11960 // Visit all inputs, collect all binary operations (and, or, xor and
11961 // select) that are all fed by extensions.
11962 while (!BinOps
.empty()) {
11963 SDValue BinOp
= BinOps
.back();
11966 if (!Visited
.insert(BinOp
.getNode()).second
)
11969 PromOps
.push_back(BinOp
);
11971 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
11972 // The condition of the select is not promoted.
11973 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
11975 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
11978 if (((BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
11979 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
11980 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
11981 BinOp
.getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
11982 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
11983 Inputs
.push_back(BinOp
.getOperand(i
));
11984 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
11985 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
11986 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
11987 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
11988 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
||
11989 BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
11990 BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
11991 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
11992 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) {
11993 BinOps
.push_back(BinOp
.getOperand(i
));
11995 // We have an input that is not an extension or another binary
11996 // operation; we'll abort this transformation.
12002 // Make sure that this is a self-contained cluster of operations (which
12003 // is not quite the same thing as saying that everything has only one
12005 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12006 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12009 for (SDNode::use_iterator UI
= Inputs
[i
].getNode()->use_begin(),
12010 UE
= Inputs
[i
].getNode()->use_end();
12012 SDNode
*User
= *UI
;
12013 if (User
!= N
&& !Visited
.count(User
))
12016 // Make sure that we're not going to promote the non-output-value
12017 // operand(s) or SELECT or SELECT_CC.
12018 // FIXME: Although we could sometimes handle this, and it does occur in
12019 // practice that one of the condition inputs to the select is also one of
12020 // the outputs, we currently can't deal with this.
12021 if (User
->getOpcode() == ISD::SELECT
) {
12022 if (User
->getOperand(0) == Inputs
[i
])
12024 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12025 if (User
->getOperand(0) == Inputs
[i
] ||
12026 User
->getOperand(1) == Inputs
[i
])
12032 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
12033 for (SDNode::use_iterator UI
= PromOps
[i
].getNode()->use_begin(),
12034 UE
= PromOps
[i
].getNode()->use_end();
12036 SDNode
*User
= *UI
;
12037 if (User
!= N
&& !Visited
.count(User
))
12040 // Make sure that we're not going to promote the non-output-value
12041 // operand(s) or SELECT or SELECT_CC.
12042 // FIXME: Although we could sometimes handle this, and it does occur in
12043 // practice that one of the condition inputs to the select is also one of
12044 // the outputs, we currently can't deal with this.
12045 if (User
->getOpcode() == ISD::SELECT
) {
12046 if (User
->getOperand(0) == PromOps
[i
])
12048 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12049 if (User
->getOperand(0) == PromOps
[i
] ||
12050 User
->getOperand(1) == PromOps
[i
])
12056 // Replace all inputs with the extension operand.
12057 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12058 // Constants may have users outside the cluster of to-be-promoted nodes,
12059 // and so we need to replace those as we do the promotions.
12060 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12063 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], Inputs
[i
].getOperand(0));
12066 std::list
<HandleSDNode
> PromOpHandles
;
12067 for (auto &PromOp
: PromOps
)
12068 PromOpHandles
.emplace_back(PromOp
);
12070 // Replace all operations (these are all the same, but have a different
12071 // (i1) return type). DAG.getNode will validate that the types of
12072 // a binary operator match, so go through the list in reverse so that
12073 // we've likely promoted both operands first. Any intermediate truncations or
12074 // extensions disappear.
12075 while (!PromOpHandles
.empty()) {
12076 SDValue PromOp
= PromOpHandles
.back().getValue();
12077 PromOpHandles
.pop_back();
12079 if (PromOp
.getOpcode() == ISD::TRUNCATE
||
12080 PromOp
.getOpcode() == ISD::SIGN_EXTEND
||
12081 PromOp
.getOpcode() == ISD::ZERO_EXTEND
||
12082 PromOp
.getOpcode() == ISD::ANY_EXTEND
) {
12083 if (!isa
<ConstantSDNode
>(PromOp
.getOperand(0)) &&
12084 PromOp
.getOperand(0).getValueType() != MVT::i1
) {
12085 // The operand is not yet ready (see comment below).
12086 PromOpHandles
.emplace_front(PromOp
);
12090 SDValue RepValue
= PromOp
.getOperand(0);
12091 if (isa
<ConstantSDNode
>(RepValue
))
12092 RepValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, RepValue
);
12094 DAG
.ReplaceAllUsesOfValueWith(PromOp
, RepValue
);
12099 switch (PromOp
.getOpcode()) {
12100 default: C
= 0; break;
12101 case ISD::SELECT
: C
= 1; break;
12102 case ISD::SELECT_CC
: C
= 2; break;
12105 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
12106 PromOp
.getOperand(C
).getValueType() != MVT::i1
) ||
12107 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
12108 PromOp
.getOperand(C
+1).getValueType() != MVT::i1
)) {
12109 // The to-be-promoted operands of this node have not yet been
12110 // promoted (this should be rare because we're going through the
12111 // list backward, but if one of the operands has several users in
12112 // this cluster of to-be-promoted nodes, it is possible).
12113 PromOpHandles
.emplace_front(PromOp
);
12117 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
12118 PromOp
.getNode()->op_end());
12120 // If there are any constant inputs, make sure they're replaced now.
12121 for (unsigned i
= 0; i
< 2; ++i
)
12122 if (isa
<ConstantSDNode
>(Ops
[C
+i
]))
12123 Ops
[C
+i
] = DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, Ops
[C
+i
]);
12125 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
12126 DAG
.getNode(PromOp
.getOpcode(), dl
, MVT::i1
, Ops
));
12129 // Now we're left with the initial truncation itself.
12130 if (N
->getOpcode() == ISD::TRUNCATE
)
12131 return N
->getOperand(0);
12133 // Otherwise, this is a comparison. The operands to be compared have just
12134 // changed type (to i1), but everything else is the same.
12135 return SDValue(N
, 0);
12138 SDValue
PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode
*N
,
12139 DAGCombinerInfo
&DCI
) const {
12140 SelectionDAG
&DAG
= DCI
.DAG
;
12143 // If we're tracking CR bits, we need to be careful that we don't have:
12144 // zext(binary-ops(trunc(x), trunc(y)))
12146 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12147 // such that we're unnecessarily moving things into CR bits that can more
12148 // efficiently stay in GPRs. Note that if we're not certain that the high
12149 // bits are set as required by the final extension, we still may need to do
12150 // some masking to get the proper behavior.
12152 // This same functionality is important on PPC64 when dealing with
12153 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12154 // the return values of functions. Because it is so similar, it is handled
12157 if (N
->getValueType(0) != MVT::i32
&&
12158 N
->getValueType(0) != MVT::i64
)
12161 if (!((N
->getOperand(0).getValueType() == MVT::i1
&& Subtarget
.useCRBits()) ||
12162 (N
->getOperand(0).getValueType() == MVT::i32
&& Subtarget
.isPPC64())))
12165 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
12166 N
->getOperand(0).getOpcode() != ISD::OR
&&
12167 N
->getOperand(0).getOpcode() != ISD::XOR
&&
12168 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
12169 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
)
12172 SmallVector
<SDValue
, 4> Inputs
;
12173 SmallVector
<SDValue
, 8> BinOps(1, N
->getOperand(0)), PromOps
;
12174 SmallPtrSet
<SDNode
*, 16> Visited
;
12176 // Visit all inputs, collect all binary operations (and, or, xor and
12177 // select) that are all fed by truncations.
12178 while (!BinOps
.empty()) {
12179 SDValue BinOp
= BinOps
.back();
12182 if (!Visited
.insert(BinOp
.getNode()).second
)
12185 PromOps
.push_back(BinOp
);
12187 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
12188 // The condition of the select is not promoted.
12189 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
12191 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
12194 if (BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
12195 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
12196 Inputs
.push_back(BinOp
.getOperand(i
));
12197 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
12198 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
12199 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
12200 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
12201 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
) {
12202 BinOps
.push_back(BinOp
.getOperand(i
));
12204 // We have an input that is not a truncation or another binary
12205 // operation; we'll abort this transformation.
12211 // The operands of a select that must be truncated when the select is
12212 // promoted because the operand is actually part of the to-be-promoted set.
12213 DenseMap
<SDNode
*, EVT
> SelectTruncOp
[2];
12215 // Make sure that this is a self-contained cluster of operations (which
12216 // is not quite the same thing as saying that everything has only one
12218 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12219 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12222 for (SDNode::use_iterator UI
= Inputs
[i
].getNode()->use_begin(),
12223 UE
= Inputs
[i
].getNode()->use_end();
12225 SDNode
*User
= *UI
;
12226 if (User
!= N
&& !Visited
.count(User
))
12229 // If we're going to promote the non-output-value operand(s) or SELECT or
12230 // SELECT_CC, record them for truncation.
12231 if (User
->getOpcode() == ISD::SELECT
) {
12232 if (User
->getOperand(0) == Inputs
[i
])
12233 SelectTruncOp
[0].insert(std::make_pair(User
,
12234 User
->getOperand(0).getValueType()));
12235 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12236 if (User
->getOperand(0) == Inputs
[i
])
12237 SelectTruncOp
[0].insert(std::make_pair(User
,
12238 User
->getOperand(0).getValueType()));
12239 if (User
->getOperand(1) == Inputs
[i
])
12240 SelectTruncOp
[1].insert(std::make_pair(User
,
12241 User
->getOperand(1).getValueType()));
12246 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
12247 for (SDNode::use_iterator UI
= PromOps
[i
].getNode()->use_begin(),
12248 UE
= PromOps
[i
].getNode()->use_end();
12250 SDNode
*User
= *UI
;
12251 if (User
!= N
&& !Visited
.count(User
))
12254 // If we're going to promote the non-output-value operand(s) or SELECT or
12255 // SELECT_CC, record them for truncation.
12256 if (User
->getOpcode() == ISD::SELECT
) {
12257 if (User
->getOperand(0) == PromOps
[i
])
12258 SelectTruncOp
[0].insert(std::make_pair(User
,
12259 User
->getOperand(0).getValueType()));
12260 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12261 if (User
->getOperand(0) == PromOps
[i
])
12262 SelectTruncOp
[0].insert(std::make_pair(User
,
12263 User
->getOperand(0).getValueType()));
12264 if (User
->getOperand(1) == PromOps
[i
])
12265 SelectTruncOp
[1].insert(std::make_pair(User
,
12266 User
->getOperand(1).getValueType()));
12271 unsigned PromBits
= N
->getOperand(0).getValueSizeInBits();
12272 bool ReallyNeedsExt
= false;
12273 if (N
->getOpcode() != ISD::ANY_EXTEND
) {
12274 // If all of the inputs are not already sign/zero extended, then
12275 // we'll still need to do that at the end.
12276 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12277 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12281 Inputs
[i
].getOperand(0).getValueSizeInBits();
12282 assert(PromBits
< OpBits
&& "Truncation not to a smaller bit count?");
12284 if ((N
->getOpcode() == ISD::ZERO_EXTEND
&&
12285 !DAG
.MaskedValueIsZero(Inputs
[i
].getOperand(0),
12286 APInt::getHighBitsSet(OpBits
,
12287 OpBits
-PromBits
))) ||
12288 (N
->getOpcode() == ISD::SIGN_EXTEND
&&
12289 DAG
.ComputeNumSignBits(Inputs
[i
].getOperand(0)) <
12290 (OpBits
-(PromBits
-1)))) {
12291 ReallyNeedsExt
= true;
12297 // Replace all inputs, either with the truncation operand, or a
12298 // truncation or extension to the final output type.
12299 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12300 // Constant inputs need to be replaced with the to-be-promoted nodes that
12301 // use them because they might have users outside of the cluster of
12303 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12306 SDValue InSrc
= Inputs
[i
].getOperand(0);
12307 if (Inputs
[i
].getValueType() == N
->getValueType(0))
12308 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], InSrc
);
12309 else if (N
->getOpcode() == ISD::SIGN_EXTEND
)
12310 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
12311 DAG
.getSExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
12312 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
12313 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
12314 DAG
.getZExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
12316 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
12317 DAG
.getAnyExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
12320 std::list
<HandleSDNode
> PromOpHandles
;
12321 for (auto &PromOp
: PromOps
)
12322 PromOpHandles
.emplace_back(PromOp
);
12324 // Replace all operations (these are all the same, but have a different
12325 // (promoted) return type). DAG.getNode will validate that the types of
12326 // a binary operator match, so go through the list in reverse so that
12327 // we've likely promoted both operands first.
12328 while (!PromOpHandles
.empty()) {
12329 SDValue PromOp
= PromOpHandles
.back().getValue();
12330 PromOpHandles
.pop_back();
12333 switch (PromOp
.getOpcode()) {
12334 default: C
= 0; break;
12335 case ISD::SELECT
: C
= 1; break;
12336 case ISD::SELECT_CC
: C
= 2; break;
12339 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
12340 PromOp
.getOperand(C
).getValueType() != N
->getValueType(0)) ||
12341 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
12342 PromOp
.getOperand(C
+1).getValueType() != N
->getValueType(0))) {
12343 // The to-be-promoted operands of this node have not yet been
12344 // promoted (this should be rare because we're going through the
12345 // list backward, but if one of the operands has several users in
12346 // this cluster of to-be-promoted nodes, it is possible).
12347 PromOpHandles
.emplace_front(PromOp
);
12351 // For SELECT and SELECT_CC nodes, we do a similar check for any
12352 // to-be-promoted comparison inputs.
12353 if (PromOp
.getOpcode() == ISD::SELECT
||
12354 PromOp
.getOpcode() == ISD::SELECT_CC
) {
12355 if ((SelectTruncOp
[0].count(PromOp
.getNode()) &&
12356 PromOp
.getOperand(0).getValueType() != N
->getValueType(0)) ||
12357 (SelectTruncOp
[1].count(PromOp
.getNode()) &&
12358 PromOp
.getOperand(1).getValueType() != N
->getValueType(0))) {
12359 PromOpHandles
.emplace_front(PromOp
);
12364 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
12365 PromOp
.getNode()->op_end());
12367 // If this node has constant inputs, then they'll need to be promoted here.
12368 for (unsigned i
= 0; i
< 2; ++i
) {
12369 if (!isa
<ConstantSDNode
>(Ops
[C
+i
]))
12371 if (Ops
[C
+i
].getValueType() == N
->getValueType(0))
12374 if (N
->getOpcode() == ISD::SIGN_EXTEND
)
12375 Ops
[C
+i
] = DAG
.getSExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
12376 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
12377 Ops
[C
+i
] = DAG
.getZExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
12379 Ops
[C
+i
] = DAG
.getAnyExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
12382 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
12383 // truncate them again to the original value type.
12384 if (PromOp
.getOpcode() == ISD::SELECT
||
12385 PromOp
.getOpcode() == ISD::SELECT_CC
) {
12386 auto SI0
= SelectTruncOp
[0].find(PromOp
.getNode());
12387 if (SI0
!= SelectTruncOp
[0].end())
12388 Ops
[0] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI0
->second
, Ops
[0]);
12389 auto SI1
= SelectTruncOp
[1].find(PromOp
.getNode());
12390 if (SI1
!= SelectTruncOp
[1].end())
12391 Ops
[1] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI1
->second
, Ops
[1]);
12394 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
12395 DAG
.getNode(PromOp
.getOpcode(), dl
, N
->getValueType(0), Ops
));
12398 // Now we're left with the initial extension itself.
12399 if (!ReallyNeedsExt
)
12400 return N
->getOperand(0);
12402 // To zero extend, just mask off everything except for the first bit (in the
12404 if (N
->getOpcode() == ISD::ZERO_EXTEND
)
12405 return DAG
.getNode(ISD::AND
, dl
, N
->getValueType(0), N
->getOperand(0),
12406 DAG
.getConstant(APInt::getLowBitsSet(
12407 N
->getValueSizeInBits(0), PromBits
),
12408 dl
, N
->getValueType(0)));
12410 assert(N
->getOpcode() == ISD::SIGN_EXTEND
&&
12411 "Invalid extension type");
12412 EVT ShiftAmountTy
= getShiftAmountTy(N
->getValueType(0), DAG
.getDataLayout());
12414 DAG
.getConstant(N
->getValueSizeInBits(0) - PromBits
, dl
, ShiftAmountTy
);
12415 return DAG
.getNode(
12416 ISD::SRA
, dl
, N
->getValueType(0),
12417 DAG
.getNode(ISD::SHL
, dl
, N
->getValueType(0), N
->getOperand(0), ShiftCst
),
12421 SDValue
PPCTargetLowering::combineSetCC(SDNode
*N
,
12422 DAGCombinerInfo
&DCI
) const {
12423 assert(N
->getOpcode() == ISD::SETCC
&&
12424 "Should be called with a SETCC node");
12426 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
12427 if (CC
== ISD::SETNE
|| CC
== ISD::SETEQ
) {
12428 SDValue LHS
= N
->getOperand(0);
12429 SDValue RHS
= N
->getOperand(1);
12431 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
12432 if (LHS
.getOpcode() == ISD::SUB
&& isNullConstant(LHS
.getOperand(0)) &&
12434 std::swap(LHS
, RHS
);
12436 // x == 0-y --> x+y == 0
12437 // x != 0-y --> x+y != 0
12438 if (RHS
.getOpcode() == ISD::SUB
&& isNullConstant(RHS
.getOperand(0)) &&
12441 SelectionDAG
&DAG
= DCI
.DAG
;
12442 EVT VT
= N
->getValueType(0);
12443 EVT OpVT
= LHS
.getValueType();
12444 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, OpVT
, LHS
, RHS
.getOperand(1));
12445 return DAG
.getSetCC(DL
, VT
, Add
, DAG
.getConstant(0, DL
, OpVT
), CC
);
12449 return DAGCombineTruncBoolExt(N
, DCI
);
12452 // Is this an extending load from an f32 to an f64?
12453 static bool isFPExtLoad(SDValue Op
) {
12454 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
.getNode()))
12455 return LD
->getExtensionType() == ISD::EXTLOAD
&&
12456 Op
.getValueType() == MVT::f64
;
12460 /// Reduces the number of fp-to-int conversion when building a vector.
12462 /// If this vector is built out of floating to integer conversions,
12463 /// transform it to a vector built out of floating point values followed by a
12464 /// single floating to integer conversion of the vector.
12465 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
12466 /// becomes (fptosi (build_vector ($A, $B, ...)))
12467 SDValue
PPCTargetLowering::
12468 combineElementTruncationToVectorTruncation(SDNode
*N
,
12469 DAGCombinerInfo
&DCI
) const {
12470 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12471 "Should be called with a BUILD_VECTOR node");
12473 SelectionDAG
&DAG
= DCI
.DAG
;
12476 SDValue FirstInput
= N
->getOperand(0);
12477 assert(FirstInput
.getOpcode() == PPCISD::MFVSR
&&
12478 "The input operand must be an fp-to-int conversion.");
12480 // This combine happens after legalization so the fp_to_[su]i nodes are
12481 // already converted to PPCSISD nodes.
12482 unsigned FirstConversion
= FirstInput
.getOperand(0).getOpcode();
12483 if (FirstConversion
== PPCISD::FCTIDZ
||
12484 FirstConversion
== PPCISD::FCTIDUZ
||
12485 FirstConversion
== PPCISD::FCTIWZ
||
12486 FirstConversion
== PPCISD::FCTIWUZ
) {
12487 bool IsSplat
= true;
12488 bool Is32Bit
= FirstConversion
== PPCISD::FCTIWZ
||
12489 FirstConversion
== PPCISD::FCTIWUZ
;
12490 EVT SrcVT
= FirstInput
.getOperand(0).getValueType();
12491 SmallVector
<SDValue
, 4> Ops
;
12492 EVT TargetVT
= N
->getValueType(0);
12493 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
12494 SDValue NextOp
= N
->getOperand(i
);
12495 if (NextOp
.getOpcode() != PPCISD::MFVSR
)
12497 unsigned NextConversion
= NextOp
.getOperand(0).getOpcode();
12498 if (NextConversion
!= FirstConversion
)
12500 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
12501 // This is not valid if the input was originally double precision. It is
12502 // also not profitable to do unless this is an extending load in which
12503 // case doing this combine will allow us to combine consecutive loads.
12504 if (Is32Bit
&& !isFPExtLoad(NextOp
.getOperand(0).getOperand(0)))
12506 if (N
->getOperand(i
) != FirstInput
)
12510 // If this is a splat, we leave it as-is since there will be only a single
12511 // fp-to-int conversion followed by a splat of the integer. This is better
12512 // for 32-bit and smaller ints and neutral for 64-bit ints.
12516 // Now that we know we have the right type of node, get its operands
12517 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
12518 SDValue In
= N
->getOperand(i
).getOperand(0);
12520 // For 32-bit values, we need to add an FP_ROUND node (if we made it
12521 // here, we know that all inputs are extending loads so this is safe).
12523 Ops
.push_back(DAG
.getUNDEF(SrcVT
));
12525 SDValue Trunc
= DAG
.getNode(ISD::FP_ROUND
, dl
,
12526 MVT::f32
, In
.getOperand(0),
12527 DAG
.getIntPtrConstant(1, dl
));
12528 Ops
.push_back(Trunc
);
12531 Ops
.push_back(In
.isUndef() ? DAG
.getUNDEF(SrcVT
) : In
.getOperand(0));
12535 if (FirstConversion
== PPCISD::FCTIDZ
||
12536 FirstConversion
== PPCISD::FCTIWZ
)
12537 Opcode
= ISD::FP_TO_SINT
;
12539 Opcode
= ISD::FP_TO_UINT
;
12541 EVT NewVT
= TargetVT
== MVT::v2i64
? MVT::v2f64
: MVT::v4f32
;
12542 SDValue BV
= DAG
.getBuildVector(NewVT
, dl
, Ops
);
12543 return DAG
.getNode(Opcode
, dl
, TargetVT
, BV
);
12548 /// Reduce the number of loads when building a vector.
12550 /// Building a vector out of multiple loads can be converted to a load
12551 /// of the vector type if the loads are consecutive. If the loads are
12552 /// consecutive but in descending order, a shuffle is added at the end
12553 /// to reorder the vector.
12554 static SDValue
combineBVOfConsecutiveLoads(SDNode
*N
, SelectionDAG
&DAG
) {
12555 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12556 "Should be called with a BUILD_VECTOR node");
12560 // Return early for non byte-sized type, as they can't be consecutive.
12561 if (!N
->getValueType(0).getVectorElementType().isByteSized())
12564 bool InputsAreConsecutiveLoads
= true;
12565 bool InputsAreReverseConsecutive
= true;
12566 unsigned ElemSize
= N
->getValueType(0).getScalarType().getStoreSize();
12567 SDValue FirstInput
= N
->getOperand(0);
12568 bool IsRoundOfExtLoad
= false;
12570 if (FirstInput
.getOpcode() == ISD::FP_ROUND
&&
12571 FirstInput
.getOperand(0).getOpcode() == ISD::LOAD
) {
12572 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(FirstInput
.getOperand(0));
12573 IsRoundOfExtLoad
= LD
->getExtensionType() == ISD::EXTLOAD
;
12575 // Not a build vector of (possibly fp_rounded) loads.
12576 if ((!IsRoundOfExtLoad
&& FirstInput
.getOpcode() != ISD::LOAD
) ||
12577 N
->getNumOperands() == 1)
12580 for (int i
= 1, e
= N
->getNumOperands(); i
< e
; ++i
) {
12581 // If any inputs are fp_round(extload), they all must be.
12582 if (IsRoundOfExtLoad
&& N
->getOperand(i
).getOpcode() != ISD::FP_ROUND
)
12585 SDValue NextInput
= IsRoundOfExtLoad
? N
->getOperand(i
).getOperand(0) :
12587 if (NextInput
.getOpcode() != ISD::LOAD
)
12590 SDValue PreviousInput
=
12591 IsRoundOfExtLoad
? N
->getOperand(i
-1).getOperand(0) : N
->getOperand(i
-1);
12592 LoadSDNode
*LD1
= dyn_cast
<LoadSDNode
>(PreviousInput
);
12593 LoadSDNode
*LD2
= dyn_cast
<LoadSDNode
>(NextInput
);
12595 // If any inputs are fp_round(extload), they all must be.
12596 if (IsRoundOfExtLoad
&& LD2
->getExtensionType() != ISD::EXTLOAD
)
12599 if (!isConsecutiveLS(LD2
, LD1
, ElemSize
, 1, DAG
))
12600 InputsAreConsecutiveLoads
= false;
12601 if (!isConsecutiveLS(LD1
, LD2
, ElemSize
, 1, DAG
))
12602 InputsAreReverseConsecutive
= false;
12604 // Exit early if the loads are neither consecutive nor reverse consecutive.
12605 if (!InputsAreConsecutiveLoads
&& !InputsAreReverseConsecutive
)
12609 assert(!(InputsAreConsecutiveLoads
&& InputsAreReverseConsecutive
) &&
12610 "The loads cannot be both consecutive and reverse consecutive.");
12612 SDValue FirstLoadOp
=
12613 IsRoundOfExtLoad
? FirstInput
.getOperand(0) : FirstInput
;
12614 SDValue LastLoadOp
=
12615 IsRoundOfExtLoad
? N
->getOperand(N
->getNumOperands()-1).getOperand(0) :
12616 N
->getOperand(N
->getNumOperands()-1);
12618 LoadSDNode
*LD1
= dyn_cast
<LoadSDNode
>(FirstLoadOp
);
12619 LoadSDNode
*LDL
= dyn_cast
<LoadSDNode
>(LastLoadOp
);
12620 if (InputsAreConsecutiveLoads
) {
12621 assert(LD1
&& "Input needs to be a LoadSDNode.");
12622 return DAG
.getLoad(N
->getValueType(0), dl
, LD1
->getChain(),
12623 LD1
->getBasePtr(), LD1
->getPointerInfo(),
12624 LD1
->getAlignment());
12626 if (InputsAreReverseConsecutive
) {
12627 assert(LDL
&& "Input needs to be a LoadSDNode.");
12628 SDValue Load
= DAG
.getLoad(N
->getValueType(0), dl
, LDL
->getChain(),
12629 LDL
->getBasePtr(), LDL
->getPointerInfo(),
12630 LDL
->getAlignment());
12631 SmallVector
<int, 16> Ops
;
12632 for (int i
= N
->getNumOperands() - 1; i
>= 0; i
--)
12635 return DAG
.getVectorShuffle(N
->getValueType(0), dl
, Load
,
12636 DAG
.getUNDEF(N
->getValueType(0)), Ops
);
12641 // This function adds the required vector_shuffle needed to get
12642 // the elements of the vector extract in the correct position
12643 // as specified by the CorrectElems encoding.
12644 static SDValue
addShuffleForVecExtend(SDNode
*N
, SelectionDAG
&DAG
,
12645 SDValue Input
, uint64_t Elems
,
12646 uint64_t CorrectElems
) {
12649 unsigned NumElems
= Input
.getValueType().getVectorNumElements();
12650 SmallVector
<int, 16> ShuffleMask(NumElems
, -1);
12652 // Knowing the element indices being extracted from the original
12653 // vector and the order in which they're being inserted, just put
12654 // them at element indices required for the instruction.
12655 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
12656 if (DAG
.getDataLayout().isLittleEndian())
12657 ShuffleMask
[CorrectElems
& 0xF] = Elems
& 0xF;
12659 ShuffleMask
[(CorrectElems
& 0xF0) >> 4] = (Elems
& 0xF0) >> 4;
12660 CorrectElems
= CorrectElems
>> 8;
12661 Elems
= Elems
>> 8;
12665 DAG
.getVectorShuffle(Input
.getValueType(), dl
, Input
,
12666 DAG
.getUNDEF(Input
.getValueType()), ShuffleMask
);
12668 EVT Ty
= N
->getValueType(0);
12669 SDValue BV
= DAG
.getNode(PPCISD::SExtVElems
, dl
, Ty
, Shuffle
);
12673 // Look for build vector patterns where input operands come from sign
12674 // extended vector_extract elements of specific indices. If the correct indices
12675 // aren't used, add a vector shuffle to fix up the indices and create a new
12676 // PPCISD:SExtVElems node which selects the vector sign extend instructions
12677 // during instruction selection.
12678 static SDValue
combineBVOfVecSExt(SDNode
*N
, SelectionDAG
&DAG
) {
12679 // This array encodes the indices that the vector sign extend instructions
12680 // extract from when extending from one type to another for both BE and LE.
12681 // The right nibble of each byte corresponds to the LE incides.
12682 // and the left nibble of each byte corresponds to the BE incides.
12683 // For example: 0x3074B8FC byte->word
12684 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12685 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12686 // For example: 0x000070F8 byte->double word
12687 // For LE: the allowed indices are: 0x0,0x8
12688 // For BE: the allowed indices are: 0x7,0xF
12689 uint64_t TargetElems
[] = {
12690 0x3074B8FC, // b->w
12691 0x000070F8, // b->d
12692 0x10325476, // h->w
12693 0x00003074, // h->d
12694 0x00001032, // w->d
12697 uint64_t Elems
= 0;
12701 auto isSExtOfVecExtract
= [&](SDValue Op
) -> bool {
12704 if (Op
.getOpcode() != ISD::SIGN_EXTEND
&&
12705 Op
.getOpcode() != ISD::SIGN_EXTEND_INREG
)
12708 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12709 // of the right width.
12710 SDValue Extract
= Op
.getOperand(0);
12711 if (Extract
.getOpcode() == ISD::ANY_EXTEND
)
12712 Extract
= Extract
.getOperand(0);
12713 if (Extract
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
12716 ConstantSDNode
*ExtOp
= dyn_cast
<ConstantSDNode
>(Extract
.getOperand(1));
12720 Index
= ExtOp
->getZExtValue();
12721 if (Input
&& Input
!= Extract
.getOperand(0))
12725 Input
= Extract
.getOperand(0);
12727 Elems
= Elems
<< 8;
12728 Index
= DAG
.getDataLayout().isLittleEndian() ? Index
: Index
<< 4;
12734 // If the build vector operands aren't sign extended vector extracts,
12735 // of the same input vector, then return.
12736 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
12737 if (!isSExtOfVecExtract(N
->getOperand(i
))) {
12742 // If the vector extract indicies are not correct, add the appropriate
12744 int TgtElemArrayIdx
;
12745 int InputSize
= Input
.getValueType().getScalarSizeInBits();
12746 int OutputSize
= N
->getValueType(0).getScalarSizeInBits();
12747 if (InputSize
+ OutputSize
== 40)
12748 TgtElemArrayIdx
= 0;
12749 else if (InputSize
+ OutputSize
== 72)
12750 TgtElemArrayIdx
= 1;
12751 else if (InputSize
+ OutputSize
== 48)
12752 TgtElemArrayIdx
= 2;
12753 else if (InputSize
+ OutputSize
== 80)
12754 TgtElemArrayIdx
= 3;
12755 else if (InputSize
+ OutputSize
== 96)
12756 TgtElemArrayIdx
= 4;
12760 uint64_t CorrectElems
= TargetElems
[TgtElemArrayIdx
];
12761 CorrectElems
= DAG
.getDataLayout().isLittleEndian()
12762 ? CorrectElems
& 0x0F0F0F0F0F0F0F0F
12763 : CorrectElems
& 0xF0F0F0F0F0F0F0F0;
12764 if (Elems
!= CorrectElems
) {
12765 return addShuffleForVecExtend(N
, DAG
, Input
, Elems
, CorrectElems
);
12768 // Regular lowering will catch cases where a shuffle is not needed.
12772 SDValue
PPCTargetLowering::DAGCombineBuildVector(SDNode
*N
,
12773 DAGCombinerInfo
&DCI
) const {
12774 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12775 "Should be called with a BUILD_VECTOR node");
12777 SelectionDAG
&DAG
= DCI
.DAG
;
12780 if (!Subtarget
.hasVSX())
12783 // The target independent DAG combiner will leave a build_vector of
12784 // float-to-int conversions intact. We can generate MUCH better code for
12785 // a float-to-int conversion of a vector of floats.
12786 SDValue FirstInput
= N
->getOperand(0);
12787 if (FirstInput
.getOpcode() == PPCISD::MFVSR
) {
12788 SDValue Reduced
= combineElementTruncationToVectorTruncation(N
, DCI
);
12793 // If we're building a vector out of consecutive loads, just load that
12795 SDValue Reduced
= combineBVOfConsecutiveLoads(N
, DAG
);
12799 // If we're building a vector out of extended elements from another vector
12800 // we have P9 vector integer extend instructions. The code assumes legal
12801 // input types (i.e. it can't handle things like v4i16) so do not run before
12803 if (Subtarget
.hasP9Altivec() && !DCI
.isBeforeLegalize()) {
12804 Reduced
= combineBVOfVecSExt(N
, DAG
);
12810 if (N
->getValueType(0) != MVT::v2f64
)
12814 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12815 if (FirstInput
.getOpcode() != ISD::SINT_TO_FP
&&
12816 FirstInput
.getOpcode() != ISD::UINT_TO_FP
)
12818 if (N
->getOperand(1).getOpcode() != ISD::SINT_TO_FP
&&
12819 N
->getOperand(1).getOpcode() != ISD::UINT_TO_FP
)
12821 if (FirstInput
.getOpcode() != N
->getOperand(1).getOpcode())
12824 SDValue Ext1
= FirstInput
.getOperand(0);
12825 SDValue Ext2
= N
->getOperand(1).getOperand(0);
12826 if(Ext1
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
12827 Ext2
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
12830 ConstantSDNode
*Ext1Op
= dyn_cast
<ConstantSDNode
>(Ext1
.getOperand(1));
12831 ConstantSDNode
*Ext2Op
= dyn_cast
<ConstantSDNode
>(Ext2
.getOperand(1));
12832 if (!Ext1Op
|| !Ext2Op
)
12834 if (Ext1
.getOperand(0).getValueType() != MVT::v4i32
||
12835 Ext1
.getOperand(0) != Ext2
.getOperand(0))
12838 int FirstElem
= Ext1Op
->getZExtValue();
12839 int SecondElem
= Ext2Op
->getZExtValue();
12841 if (FirstElem
== 0 && SecondElem
== 1)
12842 SubvecIdx
= Subtarget
.isLittleEndian() ? 1 : 0;
12843 else if (FirstElem
== 2 && SecondElem
== 3)
12844 SubvecIdx
= Subtarget
.isLittleEndian() ? 0 : 1;
12848 SDValue SrcVec
= Ext1
.getOperand(0);
12849 auto NodeType
= (N
->getOperand(1).getOpcode() == ISD::SINT_TO_FP
) ?
12850 PPCISD::SINT_VEC_TO_FP
: PPCISD::UINT_VEC_TO_FP
;
12851 return DAG
.getNode(NodeType
, dl
, MVT::v2f64
,
12852 SrcVec
, DAG
.getIntPtrConstant(SubvecIdx
, dl
));
12855 SDValue
PPCTargetLowering::combineFPToIntToFP(SDNode
*N
,
12856 DAGCombinerInfo
&DCI
) const {
12857 assert((N
->getOpcode() == ISD::SINT_TO_FP
||
12858 N
->getOpcode() == ISD::UINT_TO_FP
) &&
12859 "Need an int -> FP conversion node here");
12861 if (useSoftFloat() || !Subtarget
.has64BitSupport())
12864 SelectionDAG
&DAG
= DCI
.DAG
;
12868 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12869 // from the hardware.
12870 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
12872 if (Op
.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1
) ||
12873 Op
.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64
))
12876 SDValue
FirstOperand(Op
.getOperand(0));
12877 bool SubWordLoad
= FirstOperand
.getOpcode() == ISD::LOAD
&&
12878 (FirstOperand
.getValueType() == MVT::i8
||
12879 FirstOperand
.getValueType() == MVT::i16
);
12880 if (Subtarget
.hasP9Vector() && Subtarget
.hasP9Altivec() && SubWordLoad
) {
12881 bool Signed
= N
->getOpcode() == ISD::SINT_TO_FP
;
12882 bool DstDouble
= Op
.getValueType() == MVT::f64
;
12883 unsigned ConvOp
= Signed
?
12884 (DstDouble
? PPCISD::FCFID
: PPCISD::FCFIDS
) :
12885 (DstDouble
? PPCISD::FCFIDU
: PPCISD::FCFIDUS
);
12886 SDValue WidthConst
=
12887 DAG
.getIntPtrConstant(FirstOperand
.getValueType() == MVT::i8
? 1 : 2,
12889 LoadSDNode
*LDN
= cast
<LoadSDNode
>(FirstOperand
.getNode());
12890 SDValue Ops
[] = { LDN
->getChain(), LDN
->getBasePtr(), WidthConst
};
12891 SDValue Ld
= DAG
.getMemIntrinsicNode(PPCISD::LXSIZX
, dl
,
12892 DAG
.getVTList(MVT::f64
, MVT::Other
),
12893 Ops
, MVT::i8
, LDN
->getMemOperand());
12895 // For signed conversion, we need to sign-extend the value in the VSR
12897 SDValue ExtOps
[] = { Ld
, WidthConst
};
12898 SDValue Ext
= DAG
.getNode(PPCISD::VEXTS
, dl
, MVT::f64
, ExtOps
);
12899 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ext
);
12901 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ld
);
12905 // For i32 intermediate values, unfortunately, the conversion functions
12906 // leave the upper 32 bits of the value are undefined. Within the set of
12907 // scalar instructions, we have no method for zero- or sign-extending the
12908 // value. Thus, we cannot handle i32 intermediate values here.
12909 if (Op
.getOperand(0).getValueType() == MVT::i32
)
12912 assert((Op
.getOpcode() == ISD::SINT_TO_FP
|| Subtarget
.hasFPCVT()) &&
12913 "UINT_TO_FP is supported only with FPCVT");
12915 // If we have FCFIDS, then use it when converting to single-precision.
12916 // Otherwise, convert to double-precision and then round.
12917 unsigned FCFOp
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
12918 ? (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDUS
12920 : (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDU
12922 MVT FCFTy
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
12926 // If we're converting from a float, to an int, and back to a float again,
12927 // then we don't need the store/load pair at all.
12928 if ((Op
.getOperand(0).getOpcode() == ISD::FP_TO_UINT
&&
12929 Subtarget
.hasFPCVT()) ||
12930 (Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
)) {
12931 SDValue Src
= Op
.getOperand(0).getOperand(0);
12932 if (Src
.getValueType() == MVT::f32
) {
12933 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
12934 DCI
.AddToWorklist(Src
.getNode());
12935 } else if (Src
.getValueType() != MVT::f64
) {
12936 // Make sure that we don't pick up a ppc_fp128 source value.
12941 Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
12944 SDValue Tmp
= DAG
.getNode(FCTOp
, dl
, MVT::f64
, Src
);
12945 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Tmp
);
12947 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT()) {
12948 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
,
12949 MVT::f32
, FP
, DAG
.getIntPtrConstant(0, dl
));
12950 DCI
.AddToWorklist(FP
.getNode());
12959 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
12960 // builtins) into loads with swaps.
12961 SDValue
PPCTargetLowering::expandVSXLoadForLE(SDNode
*N
,
12962 DAGCombinerInfo
&DCI
) const {
12963 SelectionDAG
&DAG
= DCI
.DAG
;
12967 MachineMemOperand
*MMO
;
12969 switch (N
->getOpcode()) {
12971 llvm_unreachable("Unexpected opcode for little endian VSX load");
12973 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
12974 Chain
= LD
->getChain();
12975 Base
= LD
->getBasePtr();
12976 MMO
= LD
->getMemOperand();
12977 // If the MMO suggests this isn't a load of a full vector, leave
12978 // things alone. For a built-in, we have to make the change for
12979 // correctness, so if there is a size problem that will be a bug.
12980 if (MMO
->getSize() < 16)
12984 case ISD::INTRINSIC_W_CHAIN
: {
12985 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
12986 Chain
= Intrin
->getChain();
12987 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
12988 // us what we want. Get operand 2 instead.
12989 Base
= Intrin
->getOperand(2);
12990 MMO
= Intrin
->getMemOperand();
12995 MVT VecTy
= N
->getValueType(0).getSimpleVT();
12997 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
12998 // aligned and the type is a vector with elements up to 4 bytes
12999 if (Subtarget
.needsSwapsForVSXMemOps() && !(MMO
->getAlignment()%16)
13000 && VecTy
.getScalarSizeInBits() <= 32 ) {
13004 SDValue LoadOps
[] = { Chain
, Base
};
13005 SDValue Load
= DAG
.getMemIntrinsicNode(PPCISD::LXVD2X
, dl
,
13006 DAG
.getVTList(MVT::v2f64
, MVT::Other
),
13007 LoadOps
, MVT::v2f64
, MMO
);
13009 DCI
.AddToWorklist(Load
.getNode());
13010 Chain
= Load
.getValue(1);
13011 SDValue Swap
= DAG
.getNode(
13012 PPCISD::XXSWAPD
, dl
, DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Load
);
13013 DCI
.AddToWorklist(Swap
.getNode());
13015 // Add a bitcast if the resulting load type doesn't match v2f64.
13016 if (VecTy
!= MVT::v2f64
) {
13017 SDValue N
= DAG
.getNode(ISD::BITCAST
, dl
, VecTy
, Swap
);
13018 DCI
.AddToWorklist(N
.getNode());
13019 // Package {bitcast value, swap's chain} to match Load's shape.
13020 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, DAG
.getVTList(VecTy
, MVT::Other
),
13021 N
, Swap
.getValue(1));
13027 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13028 // builtins) into stores with swaps.
13029 SDValue
PPCTargetLowering::expandVSXStoreForLE(SDNode
*N
,
13030 DAGCombinerInfo
&DCI
) const {
13031 SelectionDAG
&DAG
= DCI
.DAG
;
13036 MachineMemOperand
*MMO
;
13038 switch (N
->getOpcode()) {
13040 llvm_unreachable("Unexpected opcode for little endian VSX store");
13042 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
13043 Chain
= ST
->getChain();
13044 Base
= ST
->getBasePtr();
13045 MMO
= ST
->getMemOperand();
13047 // If the MMO suggests this isn't a store of a full vector, leave
13048 // things alone. For a built-in, we have to make the change for
13049 // correctness, so if there is a size problem that will be a bug.
13050 if (MMO
->getSize() < 16)
13054 case ISD::INTRINSIC_VOID
: {
13055 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
13056 Chain
= Intrin
->getChain();
13057 // Intrin->getBasePtr() oddly does not get what we want.
13058 Base
= Intrin
->getOperand(3);
13059 MMO
= Intrin
->getMemOperand();
13065 SDValue Src
= N
->getOperand(SrcOpnd
);
13066 MVT VecTy
= Src
.getValueType().getSimpleVT();
13068 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13069 // aligned and the type is a vector with elements up to 4 bytes
13070 if (Subtarget
.needsSwapsForVSXMemOps() && !(MMO
->getAlignment()%16)
13071 && VecTy
.getScalarSizeInBits() <= 32 ) {
13075 // All stores are done as v2f64 and possible bit cast.
13076 if (VecTy
!= MVT::v2f64
) {
13077 Src
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, Src
);
13078 DCI
.AddToWorklist(Src
.getNode());
13081 SDValue Swap
= DAG
.getNode(PPCISD::XXSWAPD
, dl
,
13082 DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Src
);
13083 DCI
.AddToWorklist(Swap
.getNode());
13084 Chain
= Swap
.getValue(1);
13085 SDValue StoreOps
[] = { Chain
, Swap
, Base
};
13086 SDValue Store
= DAG
.getMemIntrinsicNode(PPCISD::STXVD2X
, dl
,
13087 DAG
.getVTList(MVT::Other
),
13088 StoreOps
, VecTy
, MMO
);
13089 DCI
.AddToWorklist(Store
.getNode());
13093 // Handle DAG combine for STORE (FP_TO_INT F).
13094 SDValue
PPCTargetLowering::combineStoreFPToInt(SDNode
*N
,
13095 DAGCombinerInfo
&DCI
) const {
13097 SelectionDAG
&DAG
= DCI
.DAG
;
13099 unsigned Opcode
= N
->getOperand(1).getOpcode();
13101 assert((Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
)
13102 && "Not a FP_TO_INT Instruction!");
13104 SDValue Val
= N
->getOperand(1).getOperand(0);
13105 EVT Op1VT
= N
->getOperand(1).getValueType();
13106 EVT ResVT
= Val
.getValueType();
13108 // Floating point types smaller than 32 bits are not legal on Power.
13109 if (ResVT
.getScalarSizeInBits() < 32)
13112 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13113 bool ValidTypeForStoreFltAsInt
=
13114 (Op1VT
== MVT::i32
|| Op1VT
== MVT::i64
||
13115 (Subtarget
.hasP9Vector() && (Op1VT
== MVT::i16
|| Op1VT
== MVT::i8
)));
13117 if (ResVT
== MVT::ppcf128
|| !Subtarget
.hasP8Altivec() ||
13118 cast
<StoreSDNode
>(N
)->isTruncatingStore() || !ValidTypeForStoreFltAsInt
)
13121 // Extend f32 values to f64
13122 if (ResVT
.getScalarSizeInBits() == 32) {
13123 Val
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Val
);
13124 DCI
.AddToWorklist(Val
.getNode());
13127 // Set signed or unsigned conversion opcode.
13128 unsigned ConvOpcode
= (Opcode
== ISD::FP_TO_SINT
) ?
13129 PPCISD::FP_TO_SINT_IN_VSR
:
13130 PPCISD::FP_TO_UINT_IN_VSR
;
13132 Val
= DAG
.getNode(ConvOpcode
,
13133 dl
, ResVT
== MVT::f128
? MVT::f128
: MVT::f64
, Val
);
13134 DCI
.AddToWorklist(Val
.getNode());
13136 // Set number of bytes being converted.
13137 unsigned ByteSize
= Op1VT
.getScalarSizeInBits() / 8;
13138 SDValue Ops
[] = { N
->getOperand(0), Val
, N
->getOperand(2),
13139 DAG
.getIntPtrConstant(ByteSize
, dl
, false),
13140 DAG
.getValueType(Op1VT
) };
13142 Val
= DAG
.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT
, dl
,
13143 DAG
.getVTList(MVT::Other
), Ops
,
13144 cast
<StoreSDNode
>(N
)->getMemoryVT(),
13145 cast
<StoreSDNode
>(N
)->getMemOperand());
13147 DCI
.AddToWorklist(Val
.getNode());
13151 SDValue
PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode
*SVN
,
13152 LSBaseSDNode
*LSBase
,
13153 DAGCombinerInfo
&DCI
) const {
13154 assert((ISD::isNormalLoad(LSBase
) || ISD::isNormalStore(LSBase
)) &&
13155 "Not a reverse memop pattern!");
13157 auto IsElementReverse
= [](const ShuffleVectorSDNode
*SVN
) -> bool {
13158 auto Mask
= SVN
->getMask();
13160 auto I
= Mask
.rbegin();
13161 auto E
= Mask
.rend();
13163 for (; I
!= E
; ++I
) {
13171 SelectionDAG
&DAG
= DCI
.DAG
;
13172 EVT VT
= SVN
->getValueType(0);
13174 if (!isTypeLegal(VT
) || !Subtarget
.isLittleEndian() || !Subtarget
.hasVSX())
13177 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
13178 // See comment in PPCVSXSwapRemoval.cpp.
13179 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
13180 if (!Subtarget
.hasP9Vector())
13183 if(!IsElementReverse(SVN
))
13186 if (LSBase
->getOpcode() == ISD::LOAD
) {
13188 SDValue LoadOps
[] = {LSBase
->getChain(), LSBase
->getBasePtr()};
13189 return DAG
.getMemIntrinsicNode(
13190 PPCISD::LOAD_VEC_BE
, dl
, DAG
.getVTList(VT
, MVT::Other
), LoadOps
,
13191 LSBase
->getMemoryVT(), LSBase
->getMemOperand());
13194 if (LSBase
->getOpcode() == ISD::STORE
) {
13196 SDValue StoreOps
[] = {LSBase
->getChain(), SVN
->getOperand(0),
13197 LSBase
->getBasePtr()};
13198 return DAG
.getMemIntrinsicNode(
13199 PPCISD::STORE_VEC_BE
, dl
, DAG
.getVTList(MVT::Other
), StoreOps
,
13200 LSBase
->getMemoryVT(), LSBase
->getMemOperand());
13203 llvm_unreachable("Expected a load or store node here");
13206 SDValue
PPCTargetLowering::PerformDAGCombine(SDNode
*N
,
13207 DAGCombinerInfo
&DCI
) const {
13208 SelectionDAG
&DAG
= DCI
.DAG
;
13210 switch (N
->getOpcode()) {
13213 return combineADD(N
, DCI
);
13215 return combineSHL(N
, DCI
);
13217 return combineSRA(N
, DCI
);
13219 return combineSRL(N
, DCI
);
13221 return combineMUL(N
, DCI
);
13223 if (isNullConstant(N
->getOperand(0))) // 0 << V -> 0.
13224 return N
->getOperand(0);
13227 if (isNullConstant(N
->getOperand(0))) // 0 >>u V -> 0.
13228 return N
->getOperand(0);
13231 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(0))) {
13232 if (C
->isNullValue() || // 0 >>s V -> 0.
13233 C
->isAllOnesValue()) // -1 >>s V -> -1.
13234 return N
->getOperand(0);
13237 case ISD::SIGN_EXTEND
:
13238 case ISD::ZERO_EXTEND
:
13239 case ISD::ANY_EXTEND
:
13240 return DAGCombineExtBoolTrunc(N
, DCI
);
13241 case ISD::TRUNCATE
:
13242 return combineTRUNCATE(N
, DCI
);
13244 if (SDValue CSCC
= combineSetCC(N
, DCI
))
13247 case ISD::SELECT_CC
:
13248 return DAGCombineTruncBoolExt(N
, DCI
);
13249 case ISD::SINT_TO_FP
:
13250 case ISD::UINT_TO_FP
:
13251 return combineFPToIntToFP(N
, DCI
);
13252 case ISD::VECTOR_SHUFFLE
:
13253 if (ISD::isNormalLoad(N
->getOperand(0).getNode())) {
13254 LSBaseSDNode
* LSBase
= cast
<LSBaseSDNode
>(N
->getOperand(0));
13255 return combineVReverseMemOP(cast
<ShuffleVectorSDNode
>(N
), LSBase
, DCI
);
13260 EVT Op1VT
= N
->getOperand(1).getValueType();
13261 unsigned Opcode
= N
->getOperand(1).getOpcode();
13263 if (Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
) {
13264 SDValue Val
= combineStoreFPToInt(N
, DCI
);
13269 if (Opcode
== ISD::VECTOR_SHUFFLE
&& ISD::isNormalStore(N
)) {
13270 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
->getOperand(1));
13271 SDValue Val
= combineVReverseMemOP(SVN
, cast
<LSBaseSDNode
>(N
), DCI
);
13276 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
13277 if (cast
<StoreSDNode
>(N
)->isUnindexed() && Opcode
== ISD::BSWAP
&&
13278 N
->getOperand(1).getNode()->hasOneUse() &&
13279 (Op1VT
== MVT::i32
|| Op1VT
== MVT::i16
||
13280 (Subtarget
.hasLDBRX() && Subtarget
.isPPC64() && Op1VT
== MVT::i64
))) {
13282 // STBRX can only handle simple types and it makes no sense to store less
13283 // two bytes in byte-reversed order.
13284 EVT mVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
13285 if (mVT
.isExtended() || mVT
.getSizeInBits() < 16)
13288 SDValue BSwapOp
= N
->getOperand(1).getOperand(0);
13289 // Do an any-extend to 32-bits if this is a half-word input.
13290 if (BSwapOp
.getValueType() == MVT::i16
)
13291 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, BSwapOp
);
13293 // If the type of BSWAP operand is wider than stored memory width
13294 // it need to be shifted to the right side before STBRX.
13295 if (Op1VT
.bitsGT(mVT
)) {
13296 int Shift
= Op1VT
.getSizeInBits() - mVT
.getSizeInBits();
13297 BSwapOp
= DAG
.getNode(ISD::SRL
, dl
, Op1VT
, BSwapOp
,
13298 DAG
.getConstant(Shift
, dl
, MVT::i32
));
13299 // Need to truncate if this is a bswap of i64 stored as i32/i16.
13300 if (Op1VT
== MVT::i64
)
13301 BSwapOp
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, BSwapOp
);
13305 N
->getOperand(0), BSwapOp
, N
->getOperand(2), DAG
.getValueType(mVT
)
13308 DAG
.getMemIntrinsicNode(PPCISD::STBRX
, dl
, DAG
.getVTList(MVT::Other
),
13309 Ops
, cast
<StoreSDNode
>(N
)->getMemoryVT(),
13310 cast
<StoreSDNode
>(N
)->getMemOperand());
13313 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
13314 // So it can increase the chance of CSE constant construction.
13315 if (Subtarget
.isPPC64() && !DCI
.isBeforeLegalize() &&
13316 isa
<ConstantSDNode
>(N
->getOperand(1)) && Op1VT
== MVT::i32
) {
13317 // Need to sign-extended to 64-bits to handle negative values.
13318 EVT MemVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
13319 uint64_t Val64
= SignExtend64(N
->getConstantOperandVal(1),
13320 MemVT
.getSizeInBits());
13321 SDValue Const64
= DAG
.getConstant(Val64
, dl
, MVT::i64
);
13323 // DAG.getTruncStore() can't be used here because it doesn't accept
13324 // the general (base + offset) addressing mode.
13325 // So we use UpdateNodeOperands and setTruncatingStore instead.
13326 DAG
.UpdateNodeOperands(N
, N
->getOperand(0), Const64
, N
->getOperand(2),
13328 cast
<StoreSDNode
>(N
)->setTruncatingStore(true);
13329 return SDValue(N
, 0);
13332 // For little endian, VSX stores require generating xxswapd/lxvd2x.
13333 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13334 if (Op1VT
.isSimple()) {
13335 MVT StoreVT
= Op1VT
.getSimpleVT();
13336 if (Subtarget
.needsSwapsForVSXMemOps() &&
13337 (StoreVT
== MVT::v2f64
|| StoreVT
== MVT::v2i64
||
13338 StoreVT
== MVT::v4f32
|| StoreVT
== MVT::v4i32
))
13339 return expandVSXStoreForLE(N
, DCI
);
13344 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
13345 EVT VT
= LD
->getValueType(0);
13347 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13348 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13349 if (VT
.isSimple()) {
13350 MVT LoadVT
= VT
.getSimpleVT();
13351 if (Subtarget
.needsSwapsForVSXMemOps() &&
13352 (LoadVT
== MVT::v2f64
|| LoadVT
== MVT::v2i64
||
13353 LoadVT
== MVT::v4f32
|| LoadVT
== MVT::v4i32
))
13354 return expandVSXLoadForLE(N
, DCI
);
13357 // We sometimes end up with a 64-bit integer load, from which we extract
13358 // two single-precision floating-point numbers. This happens with
13359 // std::complex<float>, and other similar structures, because of the way we
13360 // canonicalize structure copies. However, if we lack direct moves,
13361 // then the final bitcasts from the extracted integer values to the
13362 // floating-point numbers turn into store/load pairs. Even with direct moves,
13363 // just loading the two floating-point numbers is likely better.
13364 auto ReplaceTwoFloatLoad
= [&]() {
13365 if (VT
!= MVT::i64
)
13368 if (LD
->getExtensionType() != ISD::NON_EXTLOAD
||
13372 // We're looking for a sequence like this:
13373 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
13374 // t16: i64 = srl t13, Constant:i32<32>
13375 // t17: i32 = truncate t16
13376 // t18: f32 = bitcast t17
13377 // t19: i32 = truncate t13
13378 // t20: f32 = bitcast t19
13380 if (!LD
->hasNUsesOfValue(2, 0))
13383 auto UI
= LD
->use_begin();
13384 while (UI
.getUse().getResNo() != 0) ++UI
;
13385 SDNode
*Trunc
= *UI
++;
13386 while (UI
.getUse().getResNo() != 0) ++UI
;
13387 SDNode
*RightShift
= *UI
;
13388 if (Trunc
->getOpcode() != ISD::TRUNCATE
)
13389 std::swap(Trunc
, RightShift
);
13391 if (Trunc
->getOpcode() != ISD::TRUNCATE
||
13392 Trunc
->getValueType(0) != MVT::i32
||
13393 !Trunc
->hasOneUse())
13395 if (RightShift
->getOpcode() != ISD::SRL
||
13396 !isa
<ConstantSDNode
>(RightShift
->getOperand(1)) ||
13397 RightShift
->getConstantOperandVal(1) != 32 ||
13398 !RightShift
->hasOneUse())
13401 SDNode
*Trunc2
= *RightShift
->use_begin();
13402 if (Trunc2
->getOpcode() != ISD::TRUNCATE
||
13403 Trunc2
->getValueType(0) != MVT::i32
||
13404 !Trunc2
->hasOneUse())
13407 SDNode
*Bitcast
= *Trunc
->use_begin();
13408 SDNode
*Bitcast2
= *Trunc2
->use_begin();
13410 if (Bitcast
->getOpcode() != ISD::BITCAST
||
13411 Bitcast
->getValueType(0) != MVT::f32
)
13413 if (Bitcast2
->getOpcode() != ISD::BITCAST
||
13414 Bitcast2
->getValueType(0) != MVT::f32
)
13417 if (Subtarget
.isLittleEndian())
13418 std::swap(Bitcast
, Bitcast2
);
13420 // Bitcast has the second float (in memory-layout order) and Bitcast2
13421 // has the first one.
13423 SDValue BasePtr
= LD
->getBasePtr();
13424 if (LD
->isIndexed()) {
13425 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
13426 "Non-pre-inc AM on PPC?");
13428 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
13433 LD
->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile
;
13434 SDValue FloatLoad
= DAG
.getLoad(MVT::f32
, dl
, LD
->getChain(), BasePtr
,
13435 LD
->getPointerInfo(), LD
->getAlignment(),
13436 MMOFlags
, LD
->getAAInfo());
13438 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(),
13439 BasePtr
, DAG
.getIntPtrConstant(4, dl
));
13440 SDValue FloatLoad2
= DAG
.getLoad(
13441 MVT::f32
, dl
, SDValue(FloatLoad
.getNode(), 1), AddPtr
,
13442 LD
->getPointerInfo().getWithOffset(4),
13443 MinAlign(LD
->getAlignment(), 4), MMOFlags
, LD
->getAAInfo());
13445 if (LD
->isIndexed()) {
13446 // Note that DAGCombine should re-form any pre-increment load(s) from
13447 // what is produced here if that makes sense.
13448 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, 1), BasePtr
);
13451 DCI
.CombineTo(Bitcast2
, FloatLoad
);
13452 DCI
.CombineTo(Bitcast
, FloatLoad2
);
13454 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, LD
->isIndexed() ? 2 : 1),
13455 SDValue(FloatLoad2
.getNode(), 1));
13459 if (ReplaceTwoFloatLoad())
13460 return SDValue(N
, 0);
13462 EVT MemVT
= LD
->getMemoryVT();
13463 Type
*Ty
= MemVT
.getTypeForEVT(*DAG
.getContext());
13464 unsigned ABIAlignment
= DAG
.getDataLayout().getABITypeAlignment(Ty
);
13465 Type
*STy
= MemVT
.getScalarType().getTypeForEVT(*DAG
.getContext());
13466 unsigned ScalarABIAlignment
= DAG
.getDataLayout().getABITypeAlignment(STy
);
13467 if (LD
->isUnindexed() && VT
.isVector() &&
13468 ((Subtarget
.hasAltivec() && ISD::isNON_EXTLoad(N
) &&
13469 // P8 and later hardware should just use LOAD.
13470 !Subtarget
.hasP8Vector() && (VT
== MVT::v16i8
|| VT
== MVT::v8i16
||
13471 VT
== MVT::v4i32
|| VT
== MVT::v4f32
)) ||
13472 (Subtarget
.hasQPX() && (VT
== MVT::v4f64
|| VT
== MVT::v4f32
) &&
13473 LD
->getAlignment() >= ScalarABIAlignment
)) &&
13474 LD
->getAlignment() < ABIAlignment
) {
13475 // This is a type-legal unaligned Altivec or QPX load.
13476 SDValue Chain
= LD
->getChain();
13477 SDValue Ptr
= LD
->getBasePtr();
13478 bool isLittleEndian
= Subtarget
.isLittleEndian();
13480 // This implements the loading of unaligned vectors as described in
13481 // the venerable Apple Velocity Engine overview. Specifically:
13482 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
13483 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
13485 // The general idea is to expand a sequence of one or more unaligned
13486 // loads into an alignment-based permutation-control instruction (lvsl
13487 // or lvsr), a series of regular vector loads (which always truncate
13488 // their input address to an aligned address), and a series of
13489 // permutations. The results of these permutations are the requested
13490 // loaded values. The trick is that the last "extra" load is not taken
13491 // from the address you might suspect (sizeof(vector) bytes after the
13492 // last requested load), but rather sizeof(vector) - 1 bytes after the
13493 // last requested vector. The point of this is to avoid a page fault if
13494 // the base address happened to be aligned. This works because if the
13495 // base address is aligned, then adding less than a full vector length
13496 // will cause the last vector in the sequence to be (re)loaded.
13497 // Otherwise, the next vector will be fetched as you might suspect was
13500 // We might be able to reuse the permutation generation from
13501 // a different base address offset from this one by an aligned amount.
13502 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
13503 // optimization later.
13504 Intrinsic::ID Intr
, IntrLD
, IntrPerm
;
13505 MVT PermCntlTy
, PermTy
, LDTy
;
13506 if (Subtarget
.hasAltivec()) {
13507 Intr
= isLittleEndian
? Intrinsic::ppc_altivec_lvsr
:
13508 Intrinsic::ppc_altivec_lvsl
;
13509 IntrLD
= Intrinsic::ppc_altivec_lvx
;
13510 IntrPerm
= Intrinsic::ppc_altivec_vperm
;
13511 PermCntlTy
= MVT::v16i8
;
13512 PermTy
= MVT::v4i32
;
13515 Intr
= MemVT
== MVT::v4f64
? Intrinsic::ppc_qpx_qvlpcld
:
13516 Intrinsic::ppc_qpx_qvlpcls
;
13517 IntrLD
= MemVT
== MVT::v4f64
? Intrinsic::ppc_qpx_qvlfd
:
13518 Intrinsic::ppc_qpx_qvlfs
;
13519 IntrPerm
= Intrinsic::ppc_qpx_qvfperm
;
13520 PermCntlTy
= MVT::v4f64
;
13521 PermTy
= MVT::v4f64
;
13522 LDTy
= MemVT
.getSimpleVT();
13525 SDValue PermCntl
= BuildIntrinsicOp(Intr
, Ptr
, DAG
, dl
, PermCntlTy
);
13527 // Create the new MMO for the new base load. It is like the original MMO,
13528 // but represents an area in memory almost twice the vector size centered
13529 // on the original address. If the address is unaligned, we might start
13530 // reading up to (sizeof(vector)-1) bytes below the address of the
13531 // original unaligned load.
13532 MachineFunction
&MF
= DAG
.getMachineFunction();
13533 MachineMemOperand
*BaseMMO
=
13534 MF
.getMachineMemOperand(LD
->getMemOperand(),
13535 -(long)MemVT
.getStoreSize()+1,
13536 2*MemVT
.getStoreSize()-1);
13538 // Create the new base load.
13540 DAG
.getTargetConstant(IntrLD
, dl
, getPointerTy(MF
.getDataLayout()));
13541 SDValue BaseLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
13543 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
13544 DAG
.getVTList(PermTy
, MVT::Other
),
13545 BaseLoadOps
, LDTy
, BaseMMO
);
13547 // Note that the value of IncOffset (which is provided to the next
13548 // load's pointer info offset value, and thus used to calculate the
13549 // alignment), and the value of IncValue (which is actually used to
13550 // increment the pointer value) are different! This is because we
13551 // require the next load to appear to be aligned, even though it
13552 // is actually offset from the base pointer by a lesser amount.
13553 int IncOffset
= VT
.getSizeInBits() / 8;
13554 int IncValue
= IncOffset
;
13556 // Walk (both up and down) the chain looking for another load at the real
13557 // (aligned) offset (the alignment of the other load does not matter in
13558 // this case). If found, then do not use the offset reduction trick, as
13559 // that will prevent the loads from being later combined (as they would
13560 // otherwise be duplicates).
13561 if (!findConsecutiveLoad(LD
, DAG
))
13564 SDValue Increment
=
13565 DAG
.getConstant(IncValue
, dl
, getPointerTy(MF
.getDataLayout()));
13566 Ptr
= DAG
.getNode(ISD::ADD
, dl
, Ptr
.getValueType(), Ptr
, Increment
);
13568 MachineMemOperand
*ExtraMMO
=
13569 MF
.getMachineMemOperand(LD
->getMemOperand(),
13570 1, 2*MemVT
.getStoreSize()-1);
13571 SDValue ExtraLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
13572 SDValue ExtraLoad
=
13573 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
13574 DAG
.getVTList(PermTy
, MVT::Other
),
13575 ExtraLoadOps
, LDTy
, ExtraMMO
);
13577 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
13578 BaseLoad
.getValue(1), ExtraLoad
.getValue(1));
13580 // Because vperm has a big-endian bias, we must reverse the order
13581 // of the input vectors and complement the permute control vector
13582 // when generating little endian code. We have already handled the
13583 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
13584 // and ExtraLoad here.
13586 if (isLittleEndian
)
13587 Perm
= BuildIntrinsicOp(IntrPerm
,
13588 ExtraLoad
, BaseLoad
, PermCntl
, DAG
, dl
);
13590 Perm
= BuildIntrinsicOp(IntrPerm
,
13591 BaseLoad
, ExtraLoad
, PermCntl
, DAG
, dl
);
13594 Perm
= Subtarget
.hasAltivec() ?
13595 DAG
.getNode(ISD::BITCAST
, dl
, VT
, Perm
) :
13596 DAG
.getNode(ISD::FP_ROUND
, dl
, VT
, Perm
, // QPX
13597 DAG
.getTargetConstant(1, dl
, MVT::i64
));
13598 // second argument is 1 because this rounding
13599 // is always exact.
13601 // The output of the permutation is our loaded result, the TokenFactor is
13603 DCI
.CombineTo(N
, Perm
, TF
);
13604 return SDValue(N
, 0);
13608 case ISD::INTRINSIC_WO_CHAIN
: {
13609 bool isLittleEndian
= Subtarget
.isLittleEndian();
13610 unsigned IID
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
13611 Intrinsic::ID Intr
= (isLittleEndian
? Intrinsic::ppc_altivec_lvsr
13612 : Intrinsic::ppc_altivec_lvsl
);
13613 if ((IID
== Intr
||
13614 IID
== Intrinsic::ppc_qpx_qvlpcld
||
13615 IID
== Intrinsic::ppc_qpx_qvlpcls
) &&
13616 N
->getOperand(1)->getOpcode() == ISD::ADD
) {
13617 SDValue Add
= N
->getOperand(1);
13619 int Bits
= IID
== Intrinsic::ppc_qpx_qvlpcld
?
13620 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
13622 if (DAG
.MaskedValueIsZero(Add
->getOperand(1),
13623 APInt::getAllOnesValue(Bits
/* alignment */)
13624 .zext(Add
.getScalarValueSizeInBits()))) {
13625 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
13626 for (SDNode::use_iterator UI
= BasePtr
->use_begin(),
13627 UE
= BasePtr
->use_end();
13629 if (UI
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13630 cast
<ConstantSDNode
>(UI
->getOperand(0))->getZExtValue() == IID
) {
13631 // We've found another LVSL/LVSR, and this address is an aligned
13632 // multiple of that one. The results will be the same, so use the
13633 // one we've just found instead.
13635 return SDValue(*UI
, 0);
13640 if (isa
<ConstantSDNode
>(Add
->getOperand(1))) {
13641 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
13642 for (SDNode::use_iterator UI
= BasePtr
->use_begin(),
13643 UE
= BasePtr
->use_end(); UI
!= UE
; ++UI
) {
13644 if (UI
->getOpcode() == ISD::ADD
&&
13645 isa
<ConstantSDNode
>(UI
->getOperand(1)) &&
13646 (cast
<ConstantSDNode
>(Add
->getOperand(1))->getZExtValue() -
13647 cast
<ConstantSDNode
>(UI
->getOperand(1))->getZExtValue()) %
13648 (1ULL << Bits
) == 0) {
13649 SDNode
*OtherAdd
= *UI
;
13650 for (SDNode::use_iterator VI
= OtherAdd
->use_begin(),
13651 VE
= OtherAdd
->use_end(); VI
!= VE
; ++VI
) {
13652 if (VI
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13653 cast
<ConstantSDNode
>(VI
->getOperand(0))->getZExtValue() == IID
) {
13654 return SDValue(*VI
, 0);
13662 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13663 // Expose the vabsduw/h/b opportunity for down stream
13664 if (!DCI
.isAfterLegalizeDAG() && Subtarget
.hasP9Altivec() &&
13665 (IID
== Intrinsic::ppc_altivec_vmaxsw
||
13666 IID
== Intrinsic::ppc_altivec_vmaxsh
||
13667 IID
== Intrinsic::ppc_altivec_vmaxsb
)) {
13668 SDValue V1
= N
->getOperand(1);
13669 SDValue V2
= N
->getOperand(2);
13670 if ((V1
.getSimpleValueType() == MVT::v4i32
||
13671 V1
.getSimpleValueType() == MVT::v8i16
||
13672 V1
.getSimpleValueType() == MVT::v16i8
) &&
13673 V1
.getSimpleValueType() == V2
.getSimpleValueType()) {
13675 if (V1
.getOpcode() == ISD::SUB
&&
13676 ISD::isBuildVectorAllZeros(V1
.getOperand(0).getNode()) &&
13677 V1
.getOperand(1) == V2
) {
13678 return DAG
.getNode(ISD::ABS
, dl
, V2
.getValueType(), V2
);
13681 if (V2
.getOpcode() == ISD::SUB
&&
13682 ISD::isBuildVectorAllZeros(V2
.getOperand(0).getNode()) &&
13683 V2
.getOperand(1) == V1
) {
13684 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
13687 if (V1
.getOpcode() == ISD::SUB
&& V2
.getOpcode() == ISD::SUB
&&
13688 V1
.getOperand(0) == V2
.getOperand(1) &&
13689 V1
.getOperand(1) == V2
.getOperand(0)) {
13690 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
13697 case ISD::INTRINSIC_W_CHAIN
:
13698 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13699 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13700 if (Subtarget
.needsSwapsForVSXMemOps()) {
13701 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
13704 case Intrinsic::ppc_vsx_lxvw4x
:
13705 case Intrinsic::ppc_vsx_lxvd2x
:
13706 return expandVSXLoadForLE(N
, DCI
);
13710 case ISD::INTRINSIC_VOID
:
13711 // For little endian, VSX stores require generating xxswapd/stxvd2x.
13712 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13713 if (Subtarget
.needsSwapsForVSXMemOps()) {
13714 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
13717 case Intrinsic::ppc_vsx_stxvw4x
:
13718 case Intrinsic::ppc_vsx_stxvd2x
:
13719 return expandVSXStoreForLE(N
, DCI
);
13724 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13725 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
13726 N
->getOperand(0).hasOneUse() &&
13727 (N
->getValueType(0) == MVT::i32
|| N
->getValueType(0) == MVT::i16
||
13728 (Subtarget
.hasLDBRX() && Subtarget
.isPPC64() &&
13729 N
->getValueType(0) == MVT::i64
))) {
13730 SDValue Load
= N
->getOperand(0);
13731 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
13732 // Create the byte-swapping load.
13734 LD
->getChain(), // Chain
13735 LD
->getBasePtr(), // Ptr
13736 DAG
.getValueType(N
->getValueType(0)) // VT
13739 DAG
.getMemIntrinsicNode(PPCISD::LBRX
, dl
,
13740 DAG
.getVTList(N
->getValueType(0) == MVT::i64
?
13741 MVT::i64
: MVT::i32
, MVT::Other
),
13742 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
13744 // If this is an i16 load, insert the truncate.
13745 SDValue ResVal
= BSLoad
;
13746 if (N
->getValueType(0) == MVT::i16
)
13747 ResVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i16
, BSLoad
);
13749 // First, combine the bswap away. This makes the value produced by the
13751 DCI
.CombineTo(N
, ResVal
);
13753 // Next, combine the load away, we give it a bogus result value but a real
13754 // chain result. The result value is dead because the bswap is dead.
13755 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
13757 // Return N so it doesn't get rechecked!
13758 return SDValue(N
, 0);
13762 // If a VCMPo node already exists with exactly the same operands as this
13763 // node, use its result instead of this node (VCMPo computes both a CR6 and
13764 // a normal output).
13766 if (!N
->getOperand(0).hasOneUse() &&
13767 !N
->getOperand(1).hasOneUse() &&
13768 !N
->getOperand(2).hasOneUse()) {
13770 // Scan all of the users of the LHS, looking for VCMPo's that match.
13771 SDNode
*VCMPoNode
= nullptr;
13773 SDNode
*LHSN
= N
->getOperand(0).getNode();
13774 for (SDNode::use_iterator UI
= LHSN
->use_begin(), E
= LHSN
->use_end();
13776 if (UI
->getOpcode() == PPCISD::VCMPo
&&
13777 UI
->getOperand(1) == N
->getOperand(1) &&
13778 UI
->getOperand(2) == N
->getOperand(2) &&
13779 UI
->getOperand(0) == N
->getOperand(0)) {
13784 // If there is no VCMPo node, or if the flag value has a single use, don't
13786 if (!VCMPoNode
|| VCMPoNode
->hasNUsesOfValue(0, 1))
13789 // Look at the (necessarily single) use of the flag value. If it has a
13790 // chain, this transformation is more complex. Note that multiple things
13791 // could use the value result, which we should ignore.
13792 SDNode
*FlagUser
= nullptr;
13793 for (SDNode::use_iterator UI
= VCMPoNode
->use_begin();
13794 FlagUser
== nullptr; ++UI
) {
13795 assert(UI
!= VCMPoNode
->use_end() && "Didn't find user!");
13796 SDNode
*User
= *UI
;
13797 for (unsigned i
= 0, e
= User
->getNumOperands(); i
!= e
; ++i
) {
13798 if (User
->getOperand(i
) == SDValue(VCMPoNode
, 1)) {
13805 // If the user is a MFOCRF instruction, we know this is safe.
13806 // Otherwise we give up for right now.
13807 if (FlagUser
->getOpcode() == PPCISD::MFOCRF
)
13808 return SDValue(VCMPoNode
, 0);
13811 case ISD::BRCOND
: {
13812 SDValue Cond
= N
->getOperand(1);
13813 SDValue Target
= N
->getOperand(2);
13815 if (Cond
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13816 cast
<ConstantSDNode
>(Cond
.getOperand(1))->getZExtValue() ==
13817 Intrinsic::loop_decrement
) {
13819 // We now need to make the intrinsic dead (it cannot be instruction
13821 DAG
.ReplaceAllUsesOfValueWith(Cond
.getValue(1), Cond
.getOperand(0));
13822 assert(Cond
.getNode()->hasOneUse() &&
13823 "Counter decrement has more than one use");
13825 return DAG
.getNode(PPCISD::BDNZ
, dl
, MVT::Other
,
13826 N
->getOperand(0), Target
);
13831 // If this is a branch on an altivec predicate comparison, lower this so
13832 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
13833 // lowering is done pre-legalize, because the legalizer lowers the predicate
13834 // compare down to code that is difficult to reassemble.
13835 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
13836 SDValue LHS
= N
->getOperand(2), RHS
= N
->getOperand(3);
13838 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13839 // value. If so, pass-through the AND to get to the intrinsic.
13840 if (LHS
.getOpcode() == ISD::AND
&&
13841 LHS
.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13842 cast
<ConstantSDNode
>(LHS
.getOperand(0).getOperand(1))->getZExtValue() ==
13843 Intrinsic::loop_decrement
&&
13844 isa
<ConstantSDNode
>(LHS
.getOperand(1)) &&
13845 !isNullConstant(LHS
.getOperand(1)))
13846 LHS
= LHS
.getOperand(0);
13848 if (LHS
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13849 cast
<ConstantSDNode
>(LHS
.getOperand(1))->getZExtValue() ==
13850 Intrinsic::loop_decrement
&&
13851 isa
<ConstantSDNode
>(RHS
)) {
13852 assert((CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) &&
13853 "Counter decrement comparison is not EQ or NE");
13855 unsigned Val
= cast
<ConstantSDNode
>(RHS
)->getZExtValue();
13856 bool isBDNZ
= (CC
== ISD::SETEQ
&& Val
) ||
13857 (CC
== ISD::SETNE
&& !Val
);
13859 // We now need to make the intrinsic dead (it cannot be instruction
13861 DAG
.ReplaceAllUsesOfValueWith(LHS
.getValue(1), LHS
.getOperand(0));
13862 assert(LHS
.getNode()->hasOneUse() &&
13863 "Counter decrement has more than one use");
13865 return DAG
.getNode(isBDNZ
? PPCISD::BDNZ
: PPCISD::BDZ
, dl
, MVT::Other
,
13866 N
->getOperand(0), N
->getOperand(4));
13872 if (LHS
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13873 isa
<ConstantSDNode
>(RHS
) && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) &&
13874 getVectorCompareInfo(LHS
, CompareOpc
, isDot
, Subtarget
)) {
13875 assert(isDot
&& "Can't compare against a vector result!");
13877 // If this is a comparison against something other than 0/1, then we know
13878 // that the condition is never/always true.
13879 unsigned Val
= cast
<ConstantSDNode
>(RHS
)->getZExtValue();
13880 if (Val
!= 0 && Val
!= 1) {
13881 if (CC
== ISD::SETEQ
) // Cond never true, remove branch.
13882 return N
->getOperand(0);
13883 // Always !=, turn it into an unconditional branch.
13884 return DAG
.getNode(ISD::BR
, dl
, MVT::Other
,
13885 N
->getOperand(0), N
->getOperand(4));
13888 bool BranchOnWhenPredTrue
= (CC
== ISD::SETEQ
) ^ (Val
== 0);
13890 // Create the PPCISD altivec 'dot' comparison node.
13892 LHS
.getOperand(2), // LHS of compare
13893 LHS
.getOperand(3), // RHS of compare
13894 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
13896 EVT VTs
[] = { LHS
.getOperand(2).getValueType(), MVT::Glue
};
13897 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
);
13899 // Unpack the result based on how the target uses it.
13900 PPC::Predicate CompOpc
;
13901 switch (cast
<ConstantSDNode
>(LHS
.getOperand(1))->getZExtValue()) {
13902 default: // Can't happen, don't crash on invalid number though.
13903 case 0: // Branch on the value of the EQ bit of CR6.
13904 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_EQ
: PPC::PRED_NE
;
13906 case 1: // Branch on the inverted value of the EQ bit of CR6.
13907 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_NE
: PPC::PRED_EQ
;
13909 case 2: // Branch on the value of the LT bit of CR6.
13910 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_LT
: PPC::PRED_GE
;
13912 case 3: // Branch on the inverted value of the LT bit of CR6.
13913 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_GE
: PPC::PRED_LT
;
13917 return DAG
.getNode(PPCISD::COND_BRANCH
, dl
, MVT::Other
, N
->getOperand(0),
13918 DAG
.getConstant(CompOpc
, dl
, MVT::i32
),
13919 DAG
.getRegister(PPC::CR6
, MVT::i32
),
13920 N
->getOperand(4), CompNode
.getValue(1));
13924 case ISD::BUILD_VECTOR
:
13925 return DAGCombineBuildVector(N
, DCI
);
13927 return combineABS(N
, DCI
);
13929 return combineVSelect(N
, DCI
);
13936 PPCTargetLowering::BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
,
13938 SmallVectorImpl
<SDNode
*> &Created
) const {
13939 // fold (sdiv X, pow2)
13940 EVT VT
= N
->getValueType(0);
13941 if (VT
== MVT::i64
&& !Subtarget
.isPPC64())
13943 if ((VT
!= MVT::i32
&& VT
!= MVT::i64
) ||
13944 !(Divisor
.isPowerOf2() || (-Divisor
).isPowerOf2()))
13948 SDValue N0
= N
->getOperand(0);
13950 bool IsNegPow2
= (-Divisor
).isPowerOf2();
13951 unsigned Lg2
= (IsNegPow2
? -Divisor
: Divisor
).countTrailingZeros();
13952 SDValue ShiftAmt
= DAG
.getConstant(Lg2
, DL
, VT
);
13954 SDValue Op
= DAG
.getNode(PPCISD::SRA_ADDZE
, DL
, VT
, N0
, ShiftAmt
);
13955 Created
.push_back(Op
.getNode());
13958 Op
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Op
);
13959 Created
.push_back(Op
.getNode());
13965 //===----------------------------------------------------------------------===//
13966 // Inline Assembly Support
13967 //===----------------------------------------------------------------------===//
13969 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
13971 const APInt
&DemandedElts
,
13972 const SelectionDAG
&DAG
,
13973 unsigned Depth
) const {
13975 switch (Op
.getOpcode()) {
13977 case PPCISD::LBRX
: {
13978 // lhbrx is known to have the top bits cleared out.
13979 if (cast
<VTSDNode
>(Op
.getOperand(2))->getVT() == MVT::i16
)
13980 Known
.Zero
= 0xFFFF0000;
13983 case ISD::INTRINSIC_WO_CHAIN
: {
13984 switch (cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue()) {
13986 case Intrinsic::ppc_altivec_vcmpbfp_p
:
13987 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
13988 case Intrinsic::ppc_altivec_vcmpequb_p
:
13989 case Intrinsic::ppc_altivec_vcmpequh_p
:
13990 case Intrinsic::ppc_altivec_vcmpequw_p
:
13991 case Intrinsic::ppc_altivec_vcmpequd_p
:
13992 case Intrinsic::ppc_altivec_vcmpgefp_p
:
13993 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
13994 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
13995 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
13996 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
13997 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
13998 case Intrinsic::ppc_altivec_vcmpgtub_p
:
13999 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
14000 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
14001 case Intrinsic::ppc_altivec_vcmpgtud_p
:
14002 Known
.Zero
= ~1U; // All bits but the low one are known to be zero.
14009 unsigned PPCTargetLowering::getPrefLoopLogAlignment(MachineLoop
*ML
) const {
14010 switch (Subtarget
.getDarwinDirective()) {
14013 case PPC::DIR_PWR4
:
14014 case PPC::DIR_PWR5
:
14015 case PPC::DIR_PWR5X
:
14016 case PPC::DIR_PWR6
:
14017 case PPC::DIR_PWR6X
:
14018 case PPC::DIR_PWR7
:
14019 case PPC::DIR_PWR8
:
14020 case PPC::DIR_PWR9
: {
14024 if (!DisableInnermostLoopAlign32
) {
14025 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14026 // so that we can decrease cache misses and branch-prediction misses.
14027 // Actual alignment of the loop will depend on the hotness check and other
14028 // logic in alignBlocks.
14029 if (ML
->getLoopDepth() > 1 && ML
->getSubLoops().empty())
14033 const PPCInstrInfo
*TII
= Subtarget
.getInstrInfo();
14035 // For small loops (between 5 and 8 instructions), align to a 32-byte
14036 // boundary so that the entire loop fits in one instruction-cache line.
14037 uint64_t LoopSize
= 0;
14038 for (auto I
= ML
->block_begin(), IE
= ML
->block_end(); I
!= IE
; ++I
)
14039 for (auto J
= (*I
)->begin(), JE
= (*I
)->end(); J
!= JE
; ++J
) {
14040 LoopSize
+= TII
->getInstSizeInBytes(*J
);
14045 if (LoopSize
> 16 && LoopSize
<= 32)
14052 return TargetLowering::getPrefLoopLogAlignment(ML
);
14055 /// getConstraintType - Given a constraint, return the type of
14056 /// constraint it is for this target.
14057 PPCTargetLowering::ConstraintType
14058 PPCTargetLowering::getConstraintType(StringRef Constraint
) const {
14059 if (Constraint
.size() == 1) {
14060 switch (Constraint
[0]) {
14068 return C_RegisterClass
;
14070 // FIXME: While Z does indicate a memory constraint, it specifically
14071 // indicates an r+r address (used in conjunction with the 'y' modifier
14072 // in the replacement string). Currently, we're forcing the base
14073 // register to be r0 in the asm printer (which is interpreted as zero)
14074 // and forming the complete address in the second register. This is
14078 } else if (Constraint
== "wc") { // individual CR bits.
14079 return C_RegisterClass
;
14080 } else if (Constraint
== "wa" || Constraint
== "wd" ||
14081 Constraint
== "wf" || Constraint
== "ws" ||
14082 Constraint
== "wi" || Constraint
== "ww") {
14083 return C_RegisterClass
; // VSX registers.
14085 return TargetLowering::getConstraintType(Constraint
);
14088 /// Examine constraint type and operand type and determine a weight value.
14089 /// This object must already have been set up with the operand type
14090 /// and the current alternative constraint selected.
14091 TargetLowering::ConstraintWeight
14092 PPCTargetLowering::getSingleConstraintMatchWeight(
14093 AsmOperandInfo
&info
, const char *constraint
) const {
14094 ConstraintWeight weight
= CW_Invalid
;
14095 Value
*CallOperandVal
= info
.CallOperandVal
;
14096 // If we don't have a value, we can't do a match,
14097 // but allow it at the lowest weight.
14098 if (!CallOperandVal
)
14100 Type
*type
= CallOperandVal
->getType();
14102 // Look at the constraint type.
14103 if (StringRef(constraint
) == "wc" && type
->isIntegerTy(1))
14104 return CW_Register
; // an individual CR bit.
14105 else if ((StringRef(constraint
) == "wa" ||
14106 StringRef(constraint
) == "wd" ||
14107 StringRef(constraint
) == "wf") &&
14108 type
->isVectorTy())
14109 return CW_Register
;
14110 else if (StringRef(constraint
) == "wi" && type
->isIntegerTy(64))
14111 return CW_Register
; // just hold 64-bit integers data.
14112 else if (StringRef(constraint
) == "ws" && type
->isDoubleTy())
14113 return CW_Register
;
14114 else if (StringRef(constraint
) == "ww" && type
->isFloatTy())
14115 return CW_Register
;
14117 switch (*constraint
) {
14119 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
14122 if (type
->isIntegerTy())
14123 weight
= CW_Register
;
14126 if (type
->isFloatTy())
14127 weight
= CW_Register
;
14130 if (type
->isDoubleTy())
14131 weight
= CW_Register
;
14134 if (type
->isVectorTy())
14135 weight
= CW_Register
;
14138 weight
= CW_Register
;
14141 weight
= CW_Memory
;
14147 std::pair
<unsigned, const TargetRegisterClass
*>
14148 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
14149 StringRef Constraint
,
14151 if (Constraint
.size() == 1) {
14152 // GCC RS6000 Constraint Letters
14153 switch (Constraint
[0]) {
14154 case 'b': // R1-R31
14155 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
14156 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass
);
14157 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass
);
14158 case 'r': // R0-R31
14159 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
14160 return std::make_pair(0U, &PPC::G8RCRegClass
);
14161 return std::make_pair(0U, &PPC::GPRCRegClass
);
14162 // 'd' and 'f' constraints are both defined to be "the floating point
14163 // registers", where one is for 32-bit and the other for 64-bit. We don't
14164 // really care overly much here so just give them all the same reg classes.
14167 if (Subtarget
.hasSPE()) {
14168 if (VT
== MVT::f32
|| VT
== MVT::i32
)
14169 return std::make_pair(0U, &PPC::SPE4RCRegClass
);
14170 if (VT
== MVT::f64
|| VT
== MVT::i64
)
14171 return std::make_pair(0U, &PPC::SPERCRegClass
);
14173 if (VT
== MVT::f32
|| VT
== MVT::i32
)
14174 return std::make_pair(0U, &PPC::F4RCRegClass
);
14175 if (VT
== MVT::f64
|| VT
== MVT::i64
)
14176 return std::make_pair(0U, &PPC::F8RCRegClass
);
14177 if (VT
== MVT::v4f64
&& Subtarget
.hasQPX())
14178 return std::make_pair(0U, &PPC::QFRCRegClass
);
14179 if (VT
== MVT::v4f32
&& Subtarget
.hasQPX())
14180 return std::make_pair(0U, &PPC::QSRCRegClass
);
14184 if (VT
== MVT::v4f64
&& Subtarget
.hasQPX())
14185 return std::make_pair(0U, &PPC::QFRCRegClass
);
14186 if (VT
== MVT::v4f32
&& Subtarget
.hasQPX())
14187 return std::make_pair(0U, &PPC::QSRCRegClass
);
14188 if (Subtarget
.hasAltivec())
14189 return std::make_pair(0U, &PPC::VRRCRegClass
);
14192 return std::make_pair(0U, &PPC::CRRCRegClass
);
14194 } else if (Constraint
== "wc" && Subtarget
.useCRBits()) {
14195 // An individual CR bit.
14196 return std::make_pair(0U, &PPC::CRBITRCRegClass
);
14197 } else if ((Constraint
== "wa" || Constraint
== "wd" ||
14198 Constraint
== "wf" || Constraint
== "wi") &&
14199 Subtarget
.hasVSX()) {
14200 return std::make_pair(0U, &PPC::VSRCRegClass
);
14201 } else if ((Constraint
== "ws" || Constraint
== "ww") && Subtarget
.hasVSX()) {
14202 if (VT
== MVT::f32
&& Subtarget
.hasP8Vector())
14203 return std::make_pair(0U, &PPC::VSSRCRegClass
);
14205 return std::make_pair(0U, &PPC::VSFRCRegClass
);
14208 std::pair
<unsigned, const TargetRegisterClass
*> R
=
14209 TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
14211 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
14212 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
14213 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
14215 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
14216 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
14217 if (R
.first
&& VT
== MVT::i64
&& Subtarget
.isPPC64() &&
14218 PPC::GPRCRegClass
.contains(R
.first
))
14219 return std::make_pair(TRI
->getMatchingSuperReg(R
.first
,
14220 PPC::sub_32
, &PPC::G8RCRegClass
),
14221 &PPC::G8RCRegClass
);
14223 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
14224 if (!R
.second
&& StringRef("{cc}").equals_lower(Constraint
)) {
14225 R
.first
= PPC::CR0
;
14226 R
.second
= &PPC::CRRCRegClass
;
14232 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
14233 /// vector. If it is invalid, don't add anything to Ops.
14234 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
14235 std::string
&Constraint
,
14236 std::vector
<SDValue
>&Ops
,
14237 SelectionDAG
&DAG
) const {
14240 // Only support length 1 constraints.
14241 if (Constraint
.length() > 1) return;
14243 char Letter
= Constraint
[0];
14254 ConstantSDNode
*CST
= dyn_cast
<ConstantSDNode
>(Op
);
14255 if (!CST
) return; // Must be an immediate to match.
14257 int64_t Value
= CST
->getSExtValue();
14258 EVT TCVT
= MVT::i64
; // All constants taken to be 64 bits so that negative
14259 // numbers are printed as such.
14261 default: llvm_unreachable("Unknown constraint letter!");
14262 case 'I': // "I" is a signed 16-bit constant.
14263 if (isInt
<16>(Value
))
14264 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14266 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
14267 if (isShiftedUInt
<16, 16>(Value
))
14268 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14270 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
14271 if (isShiftedInt
<16, 16>(Value
))
14272 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14274 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
14275 if (isUInt
<16>(Value
))
14276 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14278 case 'M': // "M" is a constant that is greater than 31.
14280 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14282 case 'N': // "N" is a positive constant that is an exact power of two.
14283 if (Value
> 0 && isPowerOf2_64(Value
))
14284 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14286 case 'O': // "O" is the constant zero.
14288 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14290 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
14291 if (isInt
<16>(-Value
))
14292 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14299 if (Result
.getNode()) {
14300 Ops
.push_back(Result
);
14304 // Handle standard constraint letters.
14305 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
14308 // isLegalAddressingMode - Return true if the addressing mode represented
14309 // by AM is legal for this target, for a load/store of the specified type.
14310 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
14311 const AddrMode
&AM
, Type
*Ty
,
14312 unsigned AS
, Instruction
*I
) const {
14313 // PPC does not allow r+i addressing modes for vectors!
14314 if (Ty
->isVectorTy() && AM
.BaseOffs
!= 0)
14317 // PPC allows a sign-extended 16-bit immediate field.
14318 if (AM
.BaseOffs
<= -(1LL << 16) || AM
.BaseOffs
>= (1LL << 16)-1)
14321 // No global is ever allowed as a base.
14325 // PPC only support r+r,
14326 switch (AM
.Scale
) {
14327 case 0: // "r+i" or just "i", depending on HasBaseReg.
14330 if (AM
.HasBaseReg
&& AM
.BaseOffs
) // "r+r+i" is not allowed.
14332 // Otherwise we have r+r or r+i.
14335 if (AM
.HasBaseReg
|| AM
.BaseOffs
) // 2*r+r or 2*r+i is not allowed.
14337 // Allow 2*r as r+r.
14340 // No other scales are supported.
14347 SDValue
PPCTargetLowering::LowerRETURNADDR(SDValue Op
,
14348 SelectionDAG
&DAG
) const {
14349 MachineFunction
&MF
= DAG
.getMachineFunction();
14350 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
14351 MFI
.setReturnAddressIsTaken(true);
14353 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
14357 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
14359 // Make sure the function does not optimize away the store of the RA to
14361 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
14362 FuncInfo
->setLRStoreRequired();
14363 bool isPPC64
= Subtarget
.isPPC64();
14364 auto PtrVT
= getPointerTy(MF
.getDataLayout());
14367 SDValue FrameAddr
= LowerFRAMEADDR(Op
, DAG
);
14369 DAG
.getConstant(Subtarget
.getFrameLowering()->getReturnSaveOffset(), dl
,
14370 isPPC64
? MVT::i64
: MVT::i32
);
14371 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(),
14372 DAG
.getNode(ISD::ADD
, dl
, PtrVT
, FrameAddr
, Offset
),
14373 MachinePointerInfo());
14376 // Just load the return address off the stack.
14377 SDValue RetAddrFI
= getReturnAddrFrameIndex(DAG
);
14378 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), RetAddrFI
,
14379 MachinePointerInfo());
14382 SDValue
PPCTargetLowering::LowerFRAMEADDR(SDValue Op
,
14383 SelectionDAG
&DAG
) const {
14385 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
14387 MachineFunction
&MF
= DAG
.getMachineFunction();
14388 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
14389 MFI
.setFrameAddressIsTaken(true);
14391 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
14392 bool isPPC64
= PtrVT
== MVT::i64
;
14394 // Naked functions never have a frame pointer, and so we use r1. For all
14395 // other functions, this decision must be delayed until during PEI.
14397 if (MF
.getFunction().hasFnAttribute(Attribute::Naked
))
14398 FrameReg
= isPPC64
? PPC::X1
: PPC::R1
;
14400 FrameReg
= isPPC64
? PPC::FP8
: PPC::FP
;
14402 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, FrameReg
,
14405 FrameAddr
= DAG
.getLoad(Op
.getValueType(), dl
, DAG
.getEntryNode(),
14406 FrameAddr
, MachinePointerInfo());
14410 // FIXME? Maybe this could be a TableGen attribute on some registers and
14411 // this table could be generated automatically from RegInfo.
14412 unsigned PPCTargetLowering::getRegisterByName(const char* RegName
, EVT VT
,
14413 SelectionDAG
&DAG
) const {
14414 bool isPPC64
= Subtarget
.isPPC64();
14415 bool isDarwinABI
= Subtarget
.isDarwinABI();
14417 if ((isPPC64
&& VT
!= MVT::i64
&& VT
!= MVT::i32
) ||
14418 (!isPPC64
&& VT
!= MVT::i32
))
14419 report_fatal_error("Invalid register global variable type");
14421 bool is64Bit
= isPPC64
&& VT
== MVT::i64
;
14422 unsigned Reg
= StringSwitch
<unsigned>(RegName
)
14423 .Case("r1", is64Bit
? PPC::X1
: PPC::R1
)
14424 .Case("r2", (isDarwinABI
|| isPPC64
) ? 0 : PPC::R2
)
14425 .Case("r13", (!isPPC64
&& isDarwinABI
) ? 0 :
14426 (is64Bit
? PPC::X13
: PPC::R13
))
14431 report_fatal_error("Invalid register name global variable");
14434 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA
) const {
14435 // 32-bit SVR4 ABI access everything as got-indirect.
14436 if (Subtarget
.is32BitELFABI())
14439 // AIX accesses everything indirectly through the TOC, which is similar to
14441 if (Subtarget
.isAIXABI())
14444 CodeModel::Model CModel
= getTargetMachine().getCodeModel();
14445 // If it is small or large code model, module locals are accessed
14446 // indirectly by loading their address from .toc/.got.
14447 if (CModel
== CodeModel::Small
|| CModel
== CodeModel::Large
)
14450 // JumpTable and BlockAddress are accessed as got-indirect.
14451 if (isa
<JumpTableSDNode
>(GA
) || isa
<BlockAddressSDNode
>(GA
))
14454 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(GA
)) {
14455 const GlobalValue
*GV
= G
->getGlobal();
14456 unsigned char GVFlags
= Subtarget
.classifyGlobalReference(GV
);
14457 // The NLP flag indicates that a global access has to use an
14458 // extra indirection.
14459 if (GVFlags
& PPCII::MO_NLP_FLAG
)
14467 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
14468 // The PowerPC target isn't yet aware of offsets.
14472 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
14474 MachineFunction
&MF
,
14475 unsigned Intrinsic
) const {
14476 switch (Intrinsic
) {
14477 case Intrinsic::ppc_qpx_qvlfd
:
14478 case Intrinsic::ppc_qpx_qvlfs
:
14479 case Intrinsic::ppc_qpx_qvlfcd
:
14480 case Intrinsic::ppc_qpx_qvlfcs
:
14481 case Intrinsic::ppc_qpx_qvlfiwa
:
14482 case Intrinsic::ppc_qpx_qvlfiwz
:
14483 case Intrinsic::ppc_altivec_lvx
:
14484 case Intrinsic::ppc_altivec_lvxl
:
14485 case Intrinsic::ppc_altivec_lvebx
:
14486 case Intrinsic::ppc_altivec_lvehx
:
14487 case Intrinsic::ppc_altivec_lvewx
:
14488 case Intrinsic::ppc_vsx_lxvd2x
:
14489 case Intrinsic::ppc_vsx_lxvw4x
: {
14491 switch (Intrinsic
) {
14492 case Intrinsic::ppc_altivec_lvebx
:
14495 case Intrinsic::ppc_altivec_lvehx
:
14498 case Intrinsic::ppc_altivec_lvewx
:
14501 case Intrinsic::ppc_vsx_lxvd2x
:
14504 case Intrinsic::ppc_qpx_qvlfd
:
14507 case Intrinsic::ppc_qpx_qvlfs
:
14510 case Intrinsic::ppc_qpx_qvlfcd
:
14513 case Intrinsic::ppc_qpx_qvlfcs
:
14521 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
14523 Info
.ptrVal
= I
.getArgOperand(0);
14524 Info
.offset
= -VT
.getStoreSize()+1;
14525 Info
.size
= 2*VT
.getStoreSize()-1;
14526 Info
.align
= Align(1);
14527 Info
.flags
= MachineMemOperand::MOLoad
;
14530 case Intrinsic::ppc_qpx_qvlfda
:
14531 case Intrinsic::ppc_qpx_qvlfsa
:
14532 case Intrinsic::ppc_qpx_qvlfcda
:
14533 case Intrinsic::ppc_qpx_qvlfcsa
:
14534 case Intrinsic::ppc_qpx_qvlfiwaa
:
14535 case Intrinsic::ppc_qpx_qvlfiwza
: {
14537 switch (Intrinsic
) {
14538 case Intrinsic::ppc_qpx_qvlfda
:
14541 case Intrinsic::ppc_qpx_qvlfsa
:
14544 case Intrinsic::ppc_qpx_qvlfcda
:
14547 case Intrinsic::ppc_qpx_qvlfcsa
:
14555 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
14557 Info
.ptrVal
= I
.getArgOperand(0);
14559 Info
.size
= VT
.getStoreSize();
14560 Info
.align
= Align(1);
14561 Info
.flags
= MachineMemOperand::MOLoad
;
14564 case Intrinsic::ppc_qpx_qvstfd
:
14565 case Intrinsic::ppc_qpx_qvstfs
:
14566 case Intrinsic::ppc_qpx_qvstfcd
:
14567 case Intrinsic::ppc_qpx_qvstfcs
:
14568 case Intrinsic::ppc_qpx_qvstfiw
:
14569 case Intrinsic::ppc_altivec_stvx
:
14570 case Intrinsic::ppc_altivec_stvxl
:
14571 case Intrinsic::ppc_altivec_stvebx
:
14572 case Intrinsic::ppc_altivec_stvehx
:
14573 case Intrinsic::ppc_altivec_stvewx
:
14574 case Intrinsic::ppc_vsx_stxvd2x
:
14575 case Intrinsic::ppc_vsx_stxvw4x
: {
14577 switch (Intrinsic
) {
14578 case Intrinsic::ppc_altivec_stvebx
:
14581 case Intrinsic::ppc_altivec_stvehx
:
14584 case Intrinsic::ppc_altivec_stvewx
:
14587 case Intrinsic::ppc_vsx_stxvd2x
:
14590 case Intrinsic::ppc_qpx_qvstfd
:
14593 case Intrinsic::ppc_qpx_qvstfs
:
14596 case Intrinsic::ppc_qpx_qvstfcd
:
14599 case Intrinsic::ppc_qpx_qvstfcs
:
14607 Info
.opc
= ISD::INTRINSIC_VOID
;
14609 Info
.ptrVal
= I
.getArgOperand(1);
14610 Info
.offset
= -VT
.getStoreSize()+1;
14611 Info
.size
= 2*VT
.getStoreSize()-1;
14612 Info
.align
= Align(1);
14613 Info
.flags
= MachineMemOperand::MOStore
;
14616 case Intrinsic::ppc_qpx_qvstfda
:
14617 case Intrinsic::ppc_qpx_qvstfsa
:
14618 case Intrinsic::ppc_qpx_qvstfcda
:
14619 case Intrinsic::ppc_qpx_qvstfcsa
:
14620 case Intrinsic::ppc_qpx_qvstfiwa
: {
14622 switch (Intrinsic
) {
14623 case Intrinsic::ppc_qpx_qvstfda
:
14626 case Intrinsic::ppc_qpx_qvstfsa
:
14629 case Intrinsic::ppc_qpx_qvstfcda
:
14632 case Intrinsic::ppc_qpx_qvstfcsa
:
14640 Info
.opc
= ISD::INTRINSIC_VOID
;
14642 Info
.ptrVal
= I
.getArgOperand(1);
14644 Info
.size
= VT
.getStoreSize();
14645 Info
.align
= Align(1);
14646 Info
.flags
= MachineMemOperand::MOStore
;
14656 /// getOptimalMemOpType - Returns the target specific optimal type for load
14657 /// and store operations as a result of memset, memcpy, and memmove
14658 /// lowering. If DstAlign is zero that means it's safe to destination
14659 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14660 /// means there isn't a need to check it against alignment requirement,
14661 /// probably because the source does not need to be loaded. If 'IsMemset' is
14662 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14663 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14664 /// source is constant so it does not need to be loaded.
14665 /// It returns EVT::Other if the type should be determined using generic
14666 /// target-independent logic.
14667 EVT
PPCTargetLowering::getOptimalMemOpType(
14668 uint64_t Size
, unsigned DstAlign
, unsigned SrcAlign
, bool IsMemset
,
14669 bool ZeroMemset
, bool MemcpyStrSrc
,
14670 const AttributeList
&FuncAttributes
) const {
14671 if (getTargetMachine().getOptLevel() != CodeGenOpt::None
) {
14672 // When expanding a memset, require at least two QPX instructions to cover
14673 // the cost of loading the value to be stored from the constant pool.
14674 if (Subtarget
.hasQPX() && Size
>= 32 && (!IsMemset
|| Size
>= 64) &&
14675 (!SrcAlign
|| SrcAlign
>= 32) && (!DstAlign
|| DstAlign
>= 32) &&
14676 !FuncAttributes
.hasFnAttribute(Attribute::NoImplicitFloat
)) {
14680 // We should use Altivec/VSX loads and stores when available. For unaligned
14681 // addresses, unaligned VSX loads are only fast starting with the P8.
14682 if (Subtarget
.hasAltivec() && Size
>= 16 &&
14683 (((!SrcAlign
|| SrcAlign
>= 16) && (!DstAlign
|| DstAlign
>= 16)) ||
14684 ((IsMemset
&& Subtarget
.hasVSX()) || Subtarget
.hasP8Vector())))
14688 if (Subtarget
.isPPC64()) {
14695 /// Returns true if it is beneficial to convert a load of a constant
14696 /// to just the constant itself.
14697 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt
&Imm
,
14699 assert(Ty
->isIntegerTy());
14701 unsigned BitSize
= Ty
->getPrimitiveSizeInBits();
14702 return !(BitSize
== 0 || BitSize
> 64);
14705 bool PPCTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
14706 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
14708 unsigned NumBits1
= Ty1
->getPrimitiveSizeInBits();
14709 unsigned NumBits2
= Ty2
->getPrimitiveSizeInBits();
14710 return NumBits1
== 64 && NumBits2
== 32;
14713 bool PPCTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
14714 if (!VT1
.isInteger() || !VT2
.isInteger())
14716 unsigned NumBits1
= VT1
.getSizeInBits();
14717 unsigned NumBits2
= VT2
.getSizeInBits();
14718 return NumBits1
== 64 && NumBits2
== 32;
14721 bool PPCTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
14722 // Generally speaking, zexts are not free, but they are free when they can be
14723 // folded with other operations.
14724 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Val
)) {
14725 EVT MemVT
= LD
->getMemoryVT();
14726 if ((MemVT
== MVT::i1
|| MemVT
== MVT::i8
|| MemVT
== MVT::i16
||
14727 (Subtarget
.isPPC64() && MemVT
== MVT::i32
)) &&
14728 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
14729 LD
->getExtensionType() == ISD::ZEXTLOAD
))
14733 // FIXME: Add other cases...
14734 // - 32-bit shifts with a zext to i64
14735 // - zext after ctlz, bswap, etc.
14736 // - zext after and by a constant mask
14738 return TargetLowering::isZExtFree(Val
, VT2
);
14741 bool PPCTargetLowering::isFPExtFree(EVT DestVT
, EVT SrcVT
) const {
14742 assert(DestVT
.isFloatingPoint() && SrcVT
.isFloatingPoint() &&
14743 "invalid fpext types");
14744 // Extending to float128 is not free.
14745 if (DestVT
== MVT::f128
)
14750 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
14751 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
14754 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
14755 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
14758 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT
,
14761 MachineMemOperand::Flags
,
14762 bool *Fast
) const {
14763 if (DisablePPCUnaligned
)
14766 // PowerPC supports unaligned memory access for simple non-vector types.
14767 // Although accessing unaligned addresses is not as efficient as accessing
14768 // aligned addresses, it is generally more efficient than manual expansion,
14769 // and generally only traps for software emulation when crossing page
14772 if (!VT
.isSimple())
14775 if (VT
.getSimpleVT().isVector()) {
14776 if (Subtarget
.hasVSX()) {
14777 if (VT
!= MVT::v2f64
&& VT
!= MVT::v2i64
&&
14778 VT
!= MVT::v4f32
&& VT
!= MVT::v4i32
)
14785 if (VT
== MVT::ppcf128
)
14794 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT
) const {
14795 VT
= VT
.getScalarType();
14797 if (!VT
.isSimple())
14800 switch (VT
.getSimpleVT().SimpleTy
) {
14805 return (EnableQuadPrecision
&& Subtarget
.hasP9Vector());
14814 PPCTargetLowering::getScratchRegisters(CallingConv::ID
) const {
14815 // LR is a callee-save register, but we must treat it as clobbered by any call
14816 // site. Hence we include LR in the scratch registers, which are in turn added
14817 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14818 // to CTR, which is used by any indirect call.
14819 static const MCPhysReg ScratchRegs
[] = {
14820 PPC::X12
, PPC::LR8
, PPC::CTR8
, 0
14823 return ScratchRegs
;
14826 unsigned PPCTargetLowering::getExceptionPointerRegister(
14827 const Constant
*PersonalityFn
) const {
14828 return Subtarget
.isPPC64() ? PPC::X3
: PPC::R3
;
14831 unsigned PPCTargetLowering::getExceptionSelectorRegister(
14832 const Constant
*PersonalityFn
) const {
14833 return Subtarget
.isPPC64() ? PPC::X4
: PPC::R4
;
14837 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
14838 EVT VT
, unsigned DefinedValues
) const {
14839 if (VT
== MVT::v2i64
)
14840 return Subtarget
.hasDirectMove(); // Don't need stack ops with direct moves
14842 if (Subtarget
.hasVSX() || Subtarget
.hasQPX())
14845 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT
, DefinedValues
);
14848 Sched::Preference
PPCTargetLowering::getSchedulingPreference(SDNode
*N
) const {
14849 if (DisableILPPref
|| Subtarget
.enableMachineScheduler())
14850 return TargetLowering::getSchedulingPreference(N
);
14855 // Create a fast isel object.
14857 PPCTargetLowering::createFastISel(FunctionLoweringInfo
&FuncInfo
,
14858 const TargetLibraryInfo
*LibInfo
) const {
14859 return PPC::createFastISel(FuncInfo
, LibInfo
);
14862 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock
*Entry
) const {
14863 if (Subtarget
.isDarwinABI()) return;
14864 if (!Subtarget
.isPPC64()) return;
14866 // Update IsSplitCSR in PPCFunctionInfo
14867 PPCFunctionInfo
*PFI
= Entry
->getParent()->getInfo
<PPCFunctionInfo
>();
14868 PFI
->setIsSplitCSR(true);
14871 void PPCTargetLowering::insertCopiesSplitCSR(
14872 MachineBasicBlock
*Entry
,
14873 const SmallVectorImpl
<MachineBasicBlock
*> &Exits
) const {
14874 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
14875 const MCPhysReg
*IStart
= TRI
->getCalleeSavedRegsViaCopy(Entry
->getParent());
14879 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
14880 MachineRegisterInfo
*MRI
= &Entry
->getParent()->getRegInfo();
14881 MachineBasicBlock::iterator MBBI
= Entry
->begin();
14882 for (const MCPhysReg
*I
= IStart
; *I
; ++I
) {
14883 const TargetRegisterClass
*RC
= nullptr;
14884 if (PPC::G8RCRegClass
.contains(*I
))
14885 RC
= &PPC::G8RCRegClass
;
14886 else if (PPC::F8RCRegClass
.contains(*I
))
14887 RC
= &PPC::F8RCRegClass
;
14888 else if (PPC::CRRCRegClass
.contains(*I
))
14889 RC
= &PPC::CRRCRegClass
;
14890 else if (PPC::VRRCRegClass
.contains(*I
))
14891 RC
= &PPC::VRRCRegClass
;
14893 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
14895 Register NewVR
= MRI
->createVirtualRegister(RC
);
14896 // Create copy from CSR to a virtual register.
14897 // FIXME: this currently does not emit CFI pseudo-instructions, it works
14898 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14899 // nounwind. If we want to generalize this later, we may need to emit
14900 // CFI pseudo-instructions.
14901 assert(Entry
->getParent()->getFunction().hasFnAttribute(
14902 Attribute::NoUnwind
) &&
14903 "Function should be nounwind in insertCopiesSplitCSR!");
14904 Entry
->addLiveIn(*I
);
14905 BuildMI(*Entry
, MBBI
, DebugLoc(), TII
->get(TargetOpcode::COPY
), NewVR
)
14908 // Insert the copy-back instructions right before the terminator.
14909 for (auto *Exit
: Exits
)
14910 BuildMI(*Exit
, Exit
->getFirstTerminator(), DebugLoc(),
14911 TII
->get(TargetOpcode::COPY
), *I
)
14916 // Override to enable LOAD_STACK_GUARD lowering on Linux.
14917 bool PPCTargetLowering::useLoadStackGuardNode() const {
14918 if (!Subtarget
.isTargetLinux())
14919 return TargetLowering::useLoadStackGuardNode();
14923 // Override to disable global variable loading on Linux.
14924 void PPCTargetLowering::insertSSPDeclarations(Module
&M
) const {
14925 if (!Subtarget
.isTargetLinux())
14926 return TargetLowering::insertSSPDeclarations(M
);
14929 bool PPCTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
14930 bool ForCodeSize
) const {
14931 if (!VT
.isSimple() || !Subtarget
.hasVSX())
14934 switch(VT
.getSimpleVT().SimpleTy
) {
14936 // For FP types that are currently not supported by PPC backend, return
14937 // false. Examples: f16, f80.
14942 return Imm
.isPosZero();
14946 // For vector shift operation op, fold
14947 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
14948 static SDValue
stripModuloOnShift(const TargetLowering
&TLI
, SDNode
*N
,
14949 SelectionDAG
&DAG
) {
14950 SDValue N0
= N
->getOperand(0);
14951 SDValue N1
= N
->getOperand(1);
14952 EVT VT
= N0
.getValueType();
14953 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
14954 unsigned Opcode
= N
->getOpcode();
14955 unsigned TargetOpcode
;
14959 llvm_unreachable("Unexpected shift operation");
14961 TargetOpcode
= PPCISD::SHL
;
14964 TargetOpcode
= PPCISD::SRL
;
14967 TargetOpcode
= PPCISD::SRA
;
14971 if (VT
.isVector() && TLI
.isOperationLegal(Opcode
, VT
) &&
14972 N1
->getOpcode() == ISD::AND
)
14973 if (ConstantSDNode
*Mask
= isConstOrConstSplat(N1
->getOperand(1)))
14974 if (Mask
->getZExtValue() == OpSizeInBits
- 1)
14975 return DAG
.getNode(TargetOpcode
, SDLoc(N
), VT
, N0
, N1
->getOperand(0));
14980 SDValue
PPCTargetLowering::combineSHL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
14981 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
14984 SDValue N0
= N
->getOperand(0);
14985 ConstantSDNode
*CN1
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
14986 if (!Subtarget
.isISA3_0() ||
14987 N0
.getOpcode() != ISD::SIGN_EXTEND
||
14988 N0
.getOperand(0).getValueType() != MVT::i32
||
14989 CN1
== nullptr || N
->getValueType(0) != MVT::i64
)
14992 // We can't save an operation here if the value is already extended, and
14993 // the existing shift is easier to combine.
14994 SDValue ExtsSrc
= N0
.getOperand(0);
14995 if (ExtsSrc
.getOpcode() == ISD::TRUNCATE
&&
14996 ExtsSrc
.getOperand(0).getOpcode() == ISD::AssertSext
)
15000 SDValue ShiftBy
= SDValue(CN1
, 0);
15001 // We want the shift amount to be i32 on the extswli, but the shift could
15003 if (ShiftBy
.getValueType() == MVT::i64
)
15004 ShiftBy
= DCI
.DAG
.getConstant(CN1
->getZExtValue(), DL
, MVT::i32
);
15006 return DCI
.DAG
.getNode(PPCISD::EXTSWSLI
, DL
, MVT::i64
, N0
->getOperand(0),
15010 SDValue
PPCTargetLowering::combineSRA(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15011 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
15017 SDValue
PPCTargetLowering::combineSRL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15018 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
15024 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
15025 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
15026 // When C is zero, the equation (addi Z, -C) can be simplified to Z
15027 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
15028 static SDValue
combineADDToADDZE(SDNode
*N
, SelectionDAG
&DAG
,
15029 const PPCSubtarget
&Subtarget
) {
15030 if (!Subtarget
.isPPC64())
15033 SDValue LHS
= N
->getOperand(0);
15034 SDValue RHS
= N
->getOperand(1);
15036 auto isZextOfCompareWithConstant
= [](SDValue Op
) {
15037 if (Op
.getOpcode() != ISD::ZERO_EXTEND
|| !Op
.hasOneUse() ||
15038 Op
.getValueType() != MVT::i64
)
15041 SDValue Cmp
= Op
.getOperand(0);
15042 if (Cmp
.getOpcode() != ISD::SETCC
|| !Cmp
.hasOneUse() ||
15043 Cmp
.getOperand(0).getValueType() != MVT::i64
)
15046 if (auto *Constant
= dyn_cast
<ConstantSDNode
>(Cmp
.getOperand(1))) {
15047 int64_t NegConstant
= 0 - Constant
->getSExtValue();
15048 // Due to the limitations of the addi instruction,
15049 // -C is required to be [-32768, 32767].
15050 return isInt
<16>(NegConstant
);
15056 bool LHSHasPattern
= isZextOfCompareWithConstant(LHS
);
15057 bool RHSHasPattern
= isZextOfCompareWithConstant(RHS
);
15059 // If there is a pattern, canonicalize a zext operand to the RHS.
15060 if (LHSHasPattern
&& !RHSHasPattern
)
15061 std::swap(LHS
, RHS
);
15062 else if (!LHSHasPattern
&& !RHSHasPattern
)
15066 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Glue
);
15067 SDValue Cmp
= RHS
.getOperand(0);
15068 SDValue Z
= Cmp
.getOperand(0);
15069 auto *Constant
= dyn_cast
<ConstantSDNode
>(Cmp
.getOperand(1));
15071 assert(Constant
&& "Constant Should not be a null pointer.");
15072 int64_t NegConstant
= 0 - Constant
->getSExtValue();
15074 switch(cast
<CondCodeSDNode
>(Cmp
.getOperand(2))->get()) {
15078 // --> addze X, (addic Z, -1).carry
15080 // add X, (zext(setne Z, C))--
15081 // \ when -32768 <= -C <= 32767 && C != 0
15082 // --> addze X, (addic (addi Z, -C), -1).carry
15083 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
15084 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
15085 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
15086 SDValue Addc
= DAG
.getNode(ISD::ADDC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
15087 AddOrZ
, DAG
.getConstant(-1ULL, DL
, MVT::i64
));
15088 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
15089 SDValue(Addc
.getNode(), 1));
15093 // --> addze X, (subfic Z, 0).carry
15095 // add X, (zext(sete Z, C))--
15096 // \ when -32768 <= -C <= 32767 && C != 0
15097 // --> addze X, (subfic (addi Z, -C), 0).carry
15098 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
15099 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
15100 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
15101 SDValue Subc
= DAG
.getNode(ISD::SUBC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
15102 DAG
.getConstant(0, DL
, MVT::i64
), AddOrZ
);
15103 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
15104 SDValue(Subc
.getNode(), 1));
15111 SDValue
PPCTargetLowering::combineADD(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15112 if (auto Value
= combineADDToADDZE(N
, DCI
.DAG
, Subtarget
))
15118 // Detect TRUNCATE operations on bitcasts of float128 values.
15119 // What we are looking for here is the situtation where we extract a subset
15120 // of bits from a 128 bit float.
15121 // This can be of two forms:
15122 // 1) BITCAST of f128 feeding TRUNCATE
15123 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
15124 // The reason this is required is because we do not have a legal i128 type
15125 // and so we want to prevent having to store the f128 and then reload part
15127 SDValue
PPCTargetLowering::combineTRUNCATE(SDNode
*N
,
15128 DAGCombinerInfo
&DCI
) const {
15129 // If we are using CRBits then try that first.
15130 if (Subtarget
.useCRBits()) {
15131 // Check if CRBits did anything and return that if it did.
15132 if (SDValue CRTruncValue
= DAGCombineTruncBoolExt(N
, DCI
))
15133 return CRTruncValue
;
15137 SDValue Op0
= N
->getOperand(0);
15139 // Looking for a truncate of i128 to i64.
15140 if (Op0
.getValueType() != MVT::i128
|| N
->getValueType(0) != MVT::i64
)
15143 int EltToExtract
= DCI
.DAG
.getDataLayout().isBigEndian() ? 1 : 0;
15145 // SRL feeding TRUNCATE.
15146 if (Op0
.getOpcode() == ISD::SRL
) {
15147 ConstantSDNode
*ConstNode
= dyn_cast
<ConstantSDNode
>(Op0
.getOperand(1));
15148 // The right shift has to be by 64 bits.
15149 if (!ConstNode
|| ConstNode
->getZExtValue() != 64)
15152 // Switch the element number to extract.
15153 EltToExtract
= EltToExtract
? 0 : 1;
15154 // Update Op0 past the SRL.
15155 Op0
= Op0
.getOperand(0);
15158 // BITCAST feeding a TRUNCATE possibly via SRL.
15159 if (Op0
.getOpcode() == ISD::BITCAST
&&
15160 Op0
.getValueType() == MVT::i128
&&
15161 Op0
.getOperand(0).getValueType() == MVT::f128
) {
15162 SDValue Bitcast
= DCI
.DAG
.getBitcast(MVT::v2i64
, Op0
.getOperand(0));
15163 return DCI
.DAG
.getNode(
15164 ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Bitcast
,
15165 DCI
.DAG
.getTargetConstant(EltToExtract
, dl
, MVT::i32
));
15170 SDValue
PPCTargetLowering::combineMUL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15171 SelectionDAG
&DAG
= DCI
.DAG
;
15173 ConstantSDNode
*ConstOpOrElement
= isConstOrConstSplat(N
->getOperand(1));
15174 if (!ConstOpOrElement
)
15177 // An imul is usually smaller than the alternative sequence for legal type.
15178 if (DAG
.getMachineFunction().getFunction().hasMinSize() &&
15179 isOperationLegal(ISD::MUL
, N
->getValueType(0)))
15182 auto IsProfitable
= [this](bool IsNeg
, bool IsAddOne
, EVT VT
) -> bool {
15183 switch (this->Subtarget
.getDarwinDirective()) {
15185 // TODO: enhance the condition for subtarget before pwr8
15187 case PPC::DIR_PWR8
:
15188 // type mul add shl
15192 case PPC::DIR_PWR9
:
15193 // type mul add shl
15197 // The cycle RATIO of related operations are showed as a table above.
15198 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
15199 // scalar and vector type. For 2 instrs patterns, add/sub + shl
15200 // are 4, it is always profitable; but for 3 instrs patterns
15201 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
15202 // So we should only do it for vector type.
15203 return IsAddOne
&& IsNeg
? VT
.isVector() : true;
15207 EVT VT
= N
->getValueType(0);
15210 const APInt
&MulAmt
= ConstOpOrElement
->getAPIntValue();
15211 bool IsNeg
= MulAmt
.isNegative();
15212 APInt MulAmtAbs
= MulAmt
.abs();
15214 if ((MulAmtAbs
- 1).isPowerOf2()) {
15215 // (mul x, 2^N + 1) => (add (shl x, N), x)
15216 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
15218 if (!IsProfitable(IsNeg
, true, VT
))
15221 SDValue Op0
= N
->getOperand(0);
15223 DAG
.getNode(ISD::SHL
, DL
, VT
, N
->getOperand(0),
15224 DAG
.getConstant((MulAmtAbs
- 1).logBase2(), DL
, VT
));
15225 SDValue Res
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
, Op1
);
15230 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Res
);
15231 } else if ((MulAmtAbs
+ 1).isPowerOf2()) {
15232 // (mul x, 2^N - 1) => (sub (shl x, N), x)
15233 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
15235 if (!IsProfitable(IsNeg
, false, VT
))
15238 SDValue Op0
= N
->getOperand(0);
15240 DAG
.getNode(ISD::SHL
, DL
, VT
, N
->getOperand(0),
15241 DAG
.getConstant((MulAmtAbs
+ 1).logBase2(), DL
, VT
));
15244 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op1
, Op0
);
15246 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op0
, Op1
);
15253 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
15254 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
15255 if (!Subtarget
.is64BitELFABI())
15258 // If not a tail call then no need to proceed.
15259 if (!CI
->isTailCall())
15262 // If tail calls are disabled for the caller then we are done.
15263 const Function
*Caller
= CI
->getParent()->getParent();
15264 auto Attr
= Caller
->getFnAttribute("disable-tail-calls");
15265 if (Attr
.getValueAsString() == "true")
15268 // If sibling calls have been disabled and tail-calls aren't guaranteed
15269 // there is no reason to duplicate.
15270 auto &TM
= getTargetMachine();
15271 if (!TM
.Options
.GuaranteedTailCallOpt
&& DisableSCO
)
15274 // Can't tail call a function called indirectly, or if it has variadic args.
15275 const Function
*Callee
= CI
->getCalledFunction();
15276 if (!Callee
|| Callee
->isVarArg())
15279 // Make sure the callee and caller calling conventions are eligible for tco.
15280 if (!areCallingConvEligibleForTCO_64SVR4(Caller
->getCallingConv(),
15281 CI
->getCallingConv()))
15284 // If the function is local then we have a good chance at tail-calling it
15285 return getTargetMachine().shouldAssumeDSOLocal(*Caller
->getParent(), Callee
);
15288 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT
) const {
15289 if (!Subtarget
.hasVSX())
15291 if (Subtarget
.hasP9Vector() && VT
== MVT::f128
)
15293 return VT
== MVT::f32
|| VT
== MVT::f64
||
15294 VT
== MVT::v4f32
|| VT
== MVT::v2f64
;
15297 bool PPCTargetLowering::
15298 isMaskAndCmp0FoldingBeneficial(const Instruction
&AndI
) const {
15299 const Value
*Mask
= AndI
.getOperand(1);
15300 // If the mask is suitable for andi. or andis. we should sink the and.
15301 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Mask
)) {
15302 // Can't handle constants wider than 64-bits.
15303 if (CI
->getBitWidth() > 64)
15305 int64_t ConstVal
= CI
->getZExtValue();
15306 return isUInt
<16>(ConstVal
) ||
15307 (isUInt
<16>(ConstVal
>> 16) && !(ConstVal
& 0xFFFF));
15310 // For non-constant masks, we can always use the record-form and.
15314 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
15315 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
15316 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
15317 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
15318 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
15319 SDValue
PPCTargetLowering::combineABS(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15320 assert((N
->getOpcode() == ISD::ABS
) && "Need ABS node here");
15321 assert(Subtarget
.hasP9Altivec() &&
15322 "Only combine this when P9 altivec supported!");
15323 EVT VT
= N
->getValueType(0);
15324 if (VT
!= MVT::v4i32
&& VT
!= MVT::v8i16
&& VT
!= MVT::v16i8
)
15327 SelectionDAG
&DAG
= DCI
.DAG
;
15329 if (N
->getOperand(0).getOpcode() == ISD::SUB
) {
15330 // Even for signed integers, if it's known to be positive (as signed
15331 // integer) due to zero-extended inputs.
15332 unsigned SubOpcd0
= N
->getOperand(0)->getOperand(0).getOpcode();
15333 unsigned SubOpcd1
= N
->getOperand(0)->getOperand(1).getOpcode();
15334 if ((SubOpcd0
== ISD::ZERO_EXTEND
||
15335 SubOpcd0
== ISD::ZERO_EXTEND_VECTOR_INREG
) &&
15336 (SubOpcd1
== ISD::ZERO_EXTEND
||
15337 SubOpcd1
== ISD::ZERO_EXTEND_VECTOR_INREG
)) {
15338 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(0).getValueType(),
15339 N
->getOperand(0)->getOperand(0),
15340 N
->getOperand(0)->getOperand(1),
15341 DAG
.getTargetConstant(0, dl
, MVT::i32
));
15344 // For type v4i32, it can be optimized with xvnegsp + vabsduw
15345 if (N
->getOperand(0).getValueType() == MVT::v4i32
&&
15346 N
->getOperand(0).hasOneUse()) {
15347 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(0).getValueType(),
15348 N
->getOperand(0)->getOperand(0),
15349 N
->getOperand(0)->getOperand(1),
15350 DAG
.getTargetConstant(1, dl
, MVT::i32
));
15357 // For type v4i32/v8ii16/v16i8, transform
15358 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
15359 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
15360 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
15361 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
15362 SDValue
PPCTargetLowering::combineVSelect(SDNode
*N
,
15363 DAGCombinerInfo
&DCI
) const {
15364 assert((N
->getOpcode() == ISD::VSELECT
) && "Need VSELECT node here");
15365 assert(Subtarget
.hasP9Altivec() &&
15366 "Only combine this when P9 altivec supported!");
15368 SelectionDAG
&DAG
= DCI
.DAG
;
15370 SDValue Cond
= N
->getOperand(0);
15371 SDValue TrueOpnd
= N
->getOperand(1);
15372 SDValue FalseOpnd
= N
->getOperand(2);
15373 EVT VT
= N
->getOperand(1).getValueType();
15375 if (Cond
.getOpcode() != ISD::SETCC
|| TrueOpnd
.getOpcode() != ISD::SUB
||
15376 FalseOpnd
.getOpcode() != ISD::SUB
)
15379 // ABSD only available for type v4i32/v8i16/v16i8
15380 if (VT
!= MVT::v4i32
&& VT
!= MVT::v8i16
&& VT
!= MVT::v16i8
)
15383 // At least to save one more dependent computation
15384 if (!(Cond
.hasOneUse() || TrueOpnd
.hasOneUse() || FalseOpnd
.hasOneUse()))
15387 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Cond
.getOperand(2))->get();
15389 // Can only handle unsigned comparison here
15398 std::swap(TrueOpnd
, FalseOpnd
);
15402 SDValue CmpOpnd1
= Cond
.getOperand(0);
15403 SDValue CmpOpnd2
= Cond
.getOperand(1);
15405 // SETCC CmpOpnd1 CmpOpnd2 cond
15406 // TrueOpnd = CmpOpnd1 - CmpOpnd2
15407 // FalseOpnd = CmpOpnd2 - CmpOpnd1
15408 if (TrueOpnd
.getOperand(0) == CmpOpnd1
&&
15409 TrueOpnd
.getOperand(1) == CmpOpnd2
&&
15410 FalseOpnd
.getOperand(0) == CmpOpnd2
&&
15411 FalseOpnd
.getOperand(1) == CmpOpnd1
) {
15412 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(1).getValueType(),
15413 CmpOpnd1
, CmpOpnd2
,
15414 DAG
.getTargetConstant(0, dl
, MVT::i32
));