1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the PPCISelLowering class.
11 //===----------------------------------------------------------------------===//
13 #include "PPCISelLowering.h"
14 #include "MCTargetDesc/PPCPredicates.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
37 #include "llvm/CodeGen/CallingConvLower.h"
38 #include "llvm/CodeGen/ISDOpcodes.h"
39 #include "llvm/CodeGen/MachineBasicBlock.h"
40 #include "llvm/CodeGen/MachineFrameInfo.h"
41 #include "llvm/CodeGen/MachineFunction.h"
42 #include "llvm/CodeGen/MachineInstr.h"
43 #include "llvm/CodeGen/MachineInstrBuilder.h"
44 #include "llvm/CodeGen/MachineJumpTableInfo.h"
45 #include "llvm/CodeGen/MachineLoopInfo.h"
46 #include "llvm/CodeGen/MachineMemOperand.h"
47 #include "llvm/CodeGen/MachineModuleInfo.h"
48 #include "llvm/CodeGen/MachineOperand.h"
49 #include "llvm/CodeGen/MachineRegisterInfo.h"
50 #include "llvm/CodeGen/RuntimeLibcalls.h"
51 #include "llvm/CodeGen/SelectionDAG.h"
52 #include "llvm/CodeGen/SelectionDAGNodes.h"
53 #include "llvm/CodeGen/TargetInstrInfo.h"
54 #include "llvm/CodeGen/TargetLowering.h"
55 #include "llvm/CodeGen/TargetRegisterInfo.h"
56 #include "llvm/CodeGen/ValueTypes.h"
57 #include "llvm/IR/CallSite.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/Module.h"
70 #include "llvm/IR/Type.h"
71 #include "llvm/IR/Use.h"
72 #include "llvm/IR/Value.h"
73 #include "llvm/MC/MCContext.h"
74 #include "llvm/MC/MCExpr.h"
75 #include "llvm/MC/MCRegisterInfo.h"
76 #include "llvm/MC/MCSymbolXCOFF.h"
77 #include "llvm/Support/AtomicOrdering.h"
78 #include "llvm/Support/BranchProbability.h"
79 #include "llvm/Support/Casting.h"
80 #include "llvm/Support/CodeGen.h"
81 #include "llvm/Support/CommandLine.h"
82 #include "llvm/Support/Compiler.h"
83 #include "llvm/Support/Debug.h"
84 #include "llvm/Support/ErrorHandling.h"
85 #include "llvm/Support/Format.h"
86 #include "llvm/Support/KnownBits.h"
87 #include "llvm/Support/MachineValueType.h"
88 #include "llvm/Support/MathExtras.h"
89 #include "llvm/Support/raw_ostream.h"
90 #include "llvm/Target/TargetMachine.h"
91 #include "llvm/Target/TargetOptions.h"
100 using namespace llvm
;
102 #define DEBUG_TYPE "ppc-lowering"
104 static cl::opt
<bool> DisablePPCPreinc("disable-ppc-preinc",
105 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden
);
107 static cl::opt
<bool> DisableILPPref("disable-ppc-ilp-pref",
108 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden
);
110 static cl::opt
<bool> DisablePPCUnaligned("disable-ppc-unaligned",
111 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden
);
113 static cl::opt
<bool> DisableSCO("disable-ppc-sco",
114 cl::desc("disable sibling call optimization on ppc"), cl::Hidden
);
116 static cl::opt
<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
117 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden
);
119 static cl::opt
<bool> EnableQuadPrecision("enable-ppc-quad-precision",
120 cl::desc("enable quad precision float support on ppc"), cl::Hidden
);
122 STATISTIC(NumTailCalls
, "Number of tail calls");
123 STATISTIC(NumSiblingCalls
, "Number of sibling calls");
125 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*, unsigned, int);
127 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
);
129 // FIXME: Remove this once the bug has been fixed!
130 extern cl::opt
<bool> ANDIGlueBug
;
132 PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine
&TM
,
133 const PPCSubtarget
&STI
)
134 : TargetLowering(TM
), Subtarget(STI
) {
135 // Use _setjmp/_longjmp instead of setjmp/longjmp.
136 setUseUnderscoreSetJmp(true);
137 setUseUnderscoreLongJmp(true);
139 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
140 // arguments are at least 4/8 bytes aligned.
141 bool isPPC64
= Subtarget
.isPPC64();
142 setMinStackArgumentAlignment(isPPC64
? 8:4);
144 // Set up the register classes.
145 addRegisterClass(MVT::i32
, &PPC::GPRCRegClass
);
146 if (!useSoftFloat()) {
148 addRegisterClass(MVT::f32
, &PPC::SPE4RCRegClass
);
149 addRegisterClass(MVT::f64
, &PPC::SPERCRegClass
);
151 addRegisterClass(MVT::f32
, &PPC::F4RCRegClass
);
152 addRegisterClass(MVT::f64
, &PPC::F8RCRegClass
);
156 // Match BITREVERSE to customized fast code sequence in the td file.
157 setOperationAction(ISD::BITREVERSE
, MVT::i32
, Legal
);
158 setOperationAction(ISD::BITREVERSE
, MVT::i64
, Legal
);
160 // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
161 setOperationAction(ISD::ATOMIC_CMP_SWAP
, MVT::i32
, Custom
);
163 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
164 for (MVT VT
: MVT::integer_valuetypes()) {
165 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
166 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i8
, Expand
);
169 setTruncStoreAction(MVT::f64
, MVT::f32
, Expand
);
171 // PowerPC has pre-inc load and store's.
172 setIndexedLoadAction(ISD::PRE_INC
, MVT::i1
, Legal
);
173 setIndexedLoadAction(ISD::PRE_INC
, MVT::i8
, Legal
);
174 setIndexedLoadAction(ISD::PRE_INC
, MVT::i16
, Legal
);
175 setIndexedLoadAction(ISD::PRE_INC
, MVT::i32
, Legal
);
176 setIndexedLoadAction(ISD::PRE_INC
, MVT::i64
, Legal
);
177 setIndexedStoreAction(ISD::PRE_INC
, MVT::i1
, Legal
);
178 setIndexedStoreAction(ISD::PRE_INC
, MVT::i8
, Legal
);
179 setIndexedStoreAction(ISD::PRE_INC
, MVT::i16
, Legal
);
180 setIndexedStoreAction(ISD::PRE_INC
, MVT::i32
, Legal
);
181 setIndexedStoreAction(ISD::PRE_INC
, MVT::i64
, Legal
);
182 if (!Subtarget
.hasSPE()) {
183 setIndexedLoadAction(ISD::PRE_INC
, MVT::f32
, Legal
);
184 setIndexedLoadAction(ISD::PRE_INC
, MVT::f64
, Legal
);
185 setIndexedStoreAction(ISD::PRE_INC
, MVT::f32
, Legal
);
186 setIndexedStoreAction(ISD::PRE_INC
, MVT::f64
, Legal
);
189 // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
190 const MVT ScalarIntVTs
[] = { MVT::i32
, MVT::i64
};
191 for (MVT VT
: ScalarIntVTs
) {
192 setOperationAction(ISD::ADDC
, VT
, Legal
);
193 setOperationAction(ISD::ADDE
, VT
, Legal
);
194 setOperationAction(ISD::SUBC
, VT
, Legal
);
195 setOperationAction(ISD::SUBE
, VT
, Legal
);
198 if (Subtarget
.useCRBits()) {
199 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
201 if (isPPC64
|| Subtarget
.hasFPCVT()) {
202 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Promote
);
203 AddPromotedToType (ISD::SINT_TO_FP
, MVT::i1
,
204 isPPC64
? MVT::i64
: MVT::i32
);
205 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Promote
);
206 AddPromotedToType(ISD::UINT_TO_FP
, MVT::i1
,
207 isPPC64
? MVT::i64
: MVT::i32
);
209 setOperationAction(ISD::SINT_TO_FP
, MVT::i1
, Custom
);
210 setOperationAction(ISD::UINT_TO_FP
, MVT::i1
, Custom
);
213 // PowerPC does not support direct load/store of condition registers.
214 setOperationAction(ISD::LOAD
, MVT::i1
, Custom
);
215 setOperationAction(ISD::STORE
, MVT::i1
, Custom
);
217 // FIXME: Remove this once the ANDI glue bug is fixed:
219 setOperationAction(ISD::TRUNCATE
, MVT::i1
, Custom
);
221 for (MVT VT
: MVT::integer_valuetypes()) {
222 setLoadExtAction(ISD::SEXTLOAD
, VT
, MVT::i1
, Promote
);
223 setLoadExtAction(ISD::ZEXTLOAD
, VT
, MVT::i1
, Promote
);
224 setTruncStoreAction(VT
, MVT::i1
, Expand
);
227 addRegisterClass(MVT::i1
, &PPC::CRBITRCRegClass
);
230 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
231 // PPC (the libcall is not available).
232 setOperationAction(ISD::FP_TO_SINT
, MVT::ppcf128
, Custom
);
233 setOperationAction(ISD::FP_TO_UINT
, MVT::ppcf128
, Custom
);
235 // We do not currently implement these libm ops for PowerPC.
236 setOperationAction(ISD::FFLOOR
, MVT::ppcf128
, Expand
);
237 setOperationAction(ISD::FCEIL
, MVT::ppcf128
, Expand
);
238 setOperationAction(ISD::FTRUNC
, MVT::ppcf128
, Expand
);
239 setOperationAction(ISD::FRINT
, MVT::ppcf128
, Expand
);
240 setOperationAction(ISD::FNEARBYINT
, MVT::ppcf128
, Expand
);
241 setOperationAction(ISD::FREM
, MVT::ppcf128
, Expand
);
243 // PowerPC has no SREM/UREM instructions unless we are on P9
244 // On P9 we may use a hardware instruction to compute the remainder.
245 // The instructions are not legalized directly because in the cases where the
246 // result of both the remainder and the division is required it is more
247 // efficient to compute the remainder from the result of the division rather
248 // than use the remainder instruction.
249 if (Subtarget
.isISA3_0()) {
250 setOperationAction(ISD::SREM
, MVT::i32
, Custom
);
251 setOperationAction(ISD::UREM
, MVT::i32
, Custom
);
252 setOperationAction(ISD::SREM
, MVT::i64
, Custom
);
253 setOperationAction(ISD::UREM
, MVT::i64
, Custom
);
255 setOperationAction(ISD::SREM
, MVT::i32
, Expand
);
256 setOperationAction(ISD::UREM
, MVT::i32
, Expand
);
257 setOperationAction(ISD::SREM
, MVT::i64
, Expand
);
258 setOperationAction(ISD::UREM
, MVT::i64
, Expand
);
261 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
262 setOperationAction(ISD::UMUL_LOHI
, MVT::i32
, Expand
);
263 setOperationAction(ISD::SMUL_LOHI
, MVT::i32
, Expand
);
264 setOperationAction(ISD::UMUL_LOHI
, MVT::i64
, Expand
);
265 setOperationAction(ISD::SMUL_LOHI
, MVT::i64
, Expand
);
266 setOperationAction(ISD::UDIVREM
, MVT::i32
, Expand
);
267 setOperationAction(ISD::SDIVREM
, MVT::i32
, Expand
);
268 setOperationAction(ISD::UDIVREM
, MVT::i64
, Expand
);
269 setOperationAction(ISD::SDIVREM
, MVT::i64
, Expand
);
271 // We don't support sin/cos/sqrt/fmod/pow
272 setOperationAction(ISD::FSIN
, MVT::f64
, Expand
);
273 setOperationAction(ISD::FCOS
, MVT::f64
, Expand
);
274 setOperationAction(ISD::FSINCOS
, MVT::f64
, Expand
);
275 setOperationAction(ISD::FREM
, MVT::f64
, Expand
);
276 setOperationAction(ISD::FPOW
, MVT::f64
, Expand
);
277 setOperationAction(ISD::FSIN
, MVT::f32
, Expand
);
278 setOperationAction(ISD::FCOS
, MVT::f32
, Expand
);
279 setOperationAction(ISD::FSINCOS
, MVT::f32
, Expand
);
280 setOperationAction(ISD::FREM
, MVT::f32
, Expand
);
281 setOperationAction(ISD::FPOW
, MVT::f32
, Expand
);
282 if (Subtarget
.hasSPE()) {
283 setOperationAction(ISD::FMA
, MVT::f64
, Expand
);
284 setOperationAction(ISD::FMA
, MVT::f32
, Expand
);
286 setOperationAction(ISD::FMA
, MVT::f64
, Legal
);
287 setOperationAction(ISD::FMA
, MVT::f32
, Legal
);
290 setOperationAction(ISD::FLT_ROUNDS_
, MVT::i32
, Custom
);
292 // If we're enabling GP optimizations, use hardware square root
293 if (!Subtarget
.hasFSQRT() &&
294 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTE() &&
296 setOperationAction(ISD::FSQRT
, MVT::f64
, Expand
);
298 if (!Subtarget
.hasFSQRT() &&
299 !(TM
.Options
.UnsafeFPMath
&& Subtarget
.hasFRSQRTES() &&
300 Subtarget
.hasFRES()))
301 setOperationAction(ISD::FSQRT
, MVT::f32
, Expand
);
303 if (Subtarget
.hasFCPSGN()) {
304 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Legal
);
305 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Legal
);
307 setOperationAction(ISD::FCOPYSIGN
, MVT::f64
, Expand
);
308 setOperationAction(ISD::FCOPYSIGN
, MVT::f32
, Expand
);
311 if (Subtarget
.hasFPRND()) {
312 setOperationAction(ISD::FFLOOR
, MVT::f64
, Legal
);
313 setOperationAction(ISD::FCEIL
, MVT::f64
, Legal
);
314 setOperationAction(ISD::FTRUNC
, MVT::f64
, Legal
);
315 setOperationAction(ISD::FROUND
, MVT::f64
, Legal
);
317 setOperationAction(ISD::FFLOOR
, MVT::f32
, Legal
);
318 setOperationAction(ISD::FCEIL
, MVT::f32
, Legal
);
319 setOperationAction(ISD::FTRUNC
, MVT::f32
, Legal
);
320 setOperationAction(ISD::FROUND
, MVT::f32
, Legal
);
323 // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
324 // to speed up scalar BSWAP64.
325 // CTPOP or CTTZ were introduced in P8/P9 respectively
326 setOperationAction(ISD::BSWAP
, MVT::i32
, Expand
);
327 if (Subtarget
.hasP9Vector())
328 setOperationAction(ISD::BSWAP
, MVT::i64
, Custom
);
330 setOperationAction(ISD::BSWAP
, MVT::i64
, Expand
);
331 if (Subtarget
.isISA3_0()) {
332 setOperationAction(ISD::CTTZ
, MVT::i32
, Legal
);
333 setOperationAction(ISD::CTTZ
, MVT::i64
, Legal
);
335 setOperationAction(ISD::CTTZ
, MVT::i32
, Expand
);
336 setOperationAction(ISD::CTTZ
, MVT::i64
, Expand
);
339 if (Subtarget
.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast
) {
340 setOperationAction(ISD::CTPOP
, MVT::i32
, Legal
);
341 setOperationAction(ISD::CTPOP
, MVT::i64
, Legal
);
343 setOperationAction(ISD::CTPOP
, MVT::i32
, Expand
);
344 setOperationAction(ISD::CTPOP
, MVT::i64
, Expand
);
347 // PowerPC does not have ROTR
348 setOperationAction(ISD::ROTR
, MVT::i32
, Expand
);
349 setOperationAction(ISD::ROTR
, MVT::i64
, Expand
);
351 if (!Subtarget
.useCRBits()) {
352 // PowerPC does not have Select
353 setOperationAction(ISD::SELECT
, MVT::i32
, Expand
);
354 setOperationAction(ISD::SELECT
, MVT::i64
, Expand
);
355 setOperationAction(ISD::SELECT
, MVT::f32
, Expand
);
356 setOperationAction(ISD::SELECT
, MVT::f64
, Expand
);
359 // PowerPC wants to turn select_cc of FP into fsel when possible.
360 setOperationAction(ISD::SELECT_CC
, MVT::f32
, Custom
);
361 setOperationAction(ISD::SELECT_CC
, MVT::f64
, Custom
);
363 // PowerPC wants to optimize integer setcc a bit
364 if (!Subtarget
.useCRBits())
365 setOperationAction(ISD::SETCC
, MVT::i32
, Custom
);
367 // PowerPC does not have BRCOND which requires SetCC
368 if (!Subtarget
.useCRBits())
369 setOperationAction(ISD::BRCOND
, MVT::Other
, Expand
);
371 setOperationAction(ISD::BR_JT
, MVT::Other
, Expand
);
373 if (Subtarget
.hasSPE()) {
374 // SPE has built-in conversions
375 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Legal
);
376 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Legal
);
377 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Legal
);
379 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
380 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
382 // PowerPC does not have [U|S]INT_TO_FP
383 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Expand
);
384 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Expand
);
387 if (Subtarget
.hasDirectMove() && isPPC64
) {
388 setOperationAction(ISD::BITCAST
, MVT::f32
, Legal
);
389 setOperationAction(ISD::BITCAST
, MVT::i32
, Legal
);
390 setOperationAction(ISD::BITCAST
, MVT::i64
, Legal
);
391 setOperationAction(ISD::BITCAST
, MVT::f64
, Legal
);
393 setOperationAction(ISD::BITCAST
, MVT::f32
, Expand
);
394 setOperationAction(ISD::BITCAST
, MVT::i32
, Expand
);
395 setOperationAction(ISD::BITCAST
, MVT::i64
, Expand
);
396 setOperationAction(ISD::BITCAST
, MVT::f64
, Expand
);
399 // We cannot sextinreg(i1). Expand to shifts.
400 setOperationAction(ISD::SIGN_EXTEND_INREG
, MVT::i1
, Expand
);
402 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
403 // SjLj exception handling but a light-weight setjmp/longjmp replacement to
404 // support continuation, user-level threading, and etc.. As a result, no
405 // other SjLj exception interfaces are implemented and please don't build
406 // your own exception handling based on them.
407 // LLVM/Clang supports zero-cost DWARF exception handling.
408 setOperationAction(ISD::EH_SJLJ_SETJMP
, MVT::i32
, Custom
);
409 setOperationAction(ISD::EH_SJLJ_LONGJMP
, MVT::Other
, Custom
);
411 // We want to legalize GlobalAddress and ConstantPool nodes into the
412 // appropriate instructions to materialize the address.
413 setOperationAction(ISD::GlobalAddress
, MVT::i32
, Custom
);
414 setOperationAction(ISD::GlobalTLSAddress
, MVT::i32
, Custom
);
415 setOperationAction(ISD::BlockAddress
, MVT::i32
, Custom
);
416 setOperationAction(ISD::ConstantPool
, MVT::i32
, Custom
);
417 setOperationAction(ISD::JumpTable
, MVT::i32
, Custom
);
418 setOperationAction(ISD::GlobalAddress
, MVT::i64
, Custom
);
419 setOperationAction(ISD::GlobalTLSAddress
, MVT::i64
, Custom
);
420 setOperationAction(ISD::BlockAddress
, MVT::i64
, Custom
);
421 setOperationAction(ISD::ConstantPool
, MVT::i64
, Custom
);
422 setOperationAction(ISD::JumpTable
, MVT::i64
, Custom
);
425 setOperationAction(ISD::TRAP
, MVT::Other
, Legal
);
427 // TRAMPOLINE is custom lowered.
428 setOperationAction(ISD::INIT_TRAMPOLINE
, MVT::Other
, Custom
);
429 setOperationAction(ISD::ADJUST_TRAMPOLINE
, MVT::Other
, Custom
);
431 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
432 setOperationAction(ISD::VASTART
, MVT::Other
, Custom
);
434 if (Subtarget
.is64BitELFABI()) {
435 // VAARG always uses double-word chunks, so promote anything smaller.
436 setOperationAction(ISD::VAARG
, MVT::i1
, Promote
);
437 AddPromotedToType(ISD::VAARG
, MVT::i1
, MVT::i64
);
438 setOperationAction(ISD::VAARG
, MVT::i8
, Promote
);
439 AddPromotedToType(ISD::VAARG
, MVT::i8
, MVT::i64
);
440 setOperationAction(ISD::VAARG
, MVT::i16
, Promote
);
441 AddPromotedToType(ISD::VAARG
, MVT::i16
, MVT::i64
);
442 setOperationAction(ISD::VAARG
, MVT::i32
, Promote
);
443 AddPromotedToType(ISD::VAARG
, MVT::i32
, MVT::i64
);
444 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
445 } else if (Subtarget
.is32BitELFABI()) {
446 // VAARG is custom lowered with the 32-bit SVR4 ABI.
447 setOperationAction(ISD::VAARG
, MVT::Other
, Custom
);
448 setOperationAction(ISD::VAARG
, MVT::i64
, Custom
);
450 setOperationAction(ISD::VAARG
, MVT::Other
, Expand
);
452 // VACOPY is custom lowered with the 32-bit SVR4 ABI.
453 if (Subtarget
.is32BitELFABI())
454 setOperationAction(ISD::VACOPY
, MVT::Other
, Custom
);
456 setOperationAction(ISD::VACOPY
, MVT::Other
, Expand
);
458 // Use the default implementation.
459 setOperationAction(ISD::VAEND
, MVT::Other
, Expand
);
460 setOperationAction(ISD::STACKSAVE
, MVT::Other
, Expand
);
461 setOperationAction(ISD::STACKRESTORE
, MVT::Other
, Custom
);
462 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i32
, Custom
);
463 setOperationAction(ISD::DYNAMIC_STACKALLOC
, MVT::i64
, Custom
);
464 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i32
, Custom
);
465 setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET
, MVT::i64
, Custom
);
466 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i32
, Custom
);
467 setOperationAction(ISD::EH_DWARF_CFA
, MVT::i64
, Custom
);
469 // We want to custom lower some of our intrinsics.
470 setOperationAction(ISD::INTRINSIC_WO_CHAIN
, MVT::Other
, Custom
);
472 // To handle counter-based loop conditions.
473 setOperationAction(ISD::INTRINSIC_W_CHAIN
, MVT::i1
, Custom
);
475 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i8
, Custom
);
476 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i16
, Custom
);
477 setOperationAction(ISD::INTRINSIC_VOID
, MVT::i32
, Custom
);
478 setOperationAction(ISD::INTRINSIC_VOID
, MVT::Other
, Custom
);
480 // Comparisons that require checking two conditions.
481 if (Subtarget
.hasSPE()) {
482 setCondCodeAction(ISD::SETO
, MVT::f32
, Expand
);
483 setCondCodeAction(ISD::SETO
, MVT::f64
, Expand
);
484 setCondCodeAction(ISD::SETUO
, MVT::f32
, Expand
);
485 setCondCodeAction(ISD::SETUO
, MVT::f64
, Expand
);
487 setCondCodeAction(ISD::SETULT
, MVT::f32
, Expand
);
488 setCondCodeAction(ISD::SETULT
, MVT::f64
, Expand
);
489 setCondCodeAction(ISD::SETUGT
, MVT::f32
, Expand
);
490 setCondCodeAction(ISD::SETUGT
, MVT::f64
, Expand
);
491 setCondCodeAction(ISD::SETUEQ
, MVT::f32
, Expand
);
492 setCondCodeAction(ISD::SETUEQ
, MVT::f64
, Expand
);
493 setCondCodeAction(ISD::SETOGE
, MVT::f32
, Expand
);
494 setCondCodeAction(ISD::SETOGE
, MVT::f64
, Expand
);
495 setCondCodeAction(ISD::SETOLE
, MVT::f32
, Expand
);
496 setCondCodeAction(ISD::SETOLE
, MVT::f64
, Expand
);
497 setCondCodeAction(ISD::SETONE
, MVT::f32
, Expand
);
498 setCondCodeAction(ISD::SETONE
, MVT::f64
, Expand
);
500 if (Subtarget
.has64BitSupport()) {
501 // They also have instructions for converting between i64 and fp.
502 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
503 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Expand
);
504 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
505 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Expand
);
506 // This is just the low 32 bits of a (signed) fp->i64 conversion.
507 // We cannot do this with Promote because i64 is not a legal type.
508 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
510 if (Subtarget
.hasLFIWAX() || Subtarget
.isPPC64())
511 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
513 // PowerPC does not have FP_TO_UINT on 32-bit implementations.
514 if (Subtarget
.hasSPE())
515 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Legal
);
517 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Expand
);
520 // With the instructions enabled under FPCVT, we can do everything.
521 if (Subtarget
.hasFPCVT()) {
522 if (Subtarget
.has64BitSupport()) {
523 setOperationAction(ISD::FP_TO_SINT
, MVT::i64
, Custom
);
524 setOperationAction(ISD::FP_TO_UINT
, MVT::i64
, Custom
);
525 setOperationAction(ISD::SINT_TO_FP
, MVT::i64
, Custom
);
526 setOperationAction(ISD::UINT_TO_FP
, MVT::i64
, Custom
);
529 setOperationAction(ISD::FP_TO_SINT
, MVT::i32
, Custom
);
530 setOperationAction(ISD::FP_TO_UINT
, MVT::i32
, Custom
);
531 setOperationAction(ISD::SINT_TO_FP
, MVT::i32
, Custom
);
532 setOperationAction(ISD::UINT_TO_FP
, MVT::i32
, Custom
);
535 if (Subtarget
.use64BitRegs()) {
536 // 64-bit PowerPC implementations can support i64 types directly
537 addRegisterClass(MVT::i64
, &PPC::G8RCRegClass
);
538 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
539 setOperationAction(ISD::BUILD_PAIR
, MVT::i64
, Expand
);
540 // 64-bit PowerPC wants to expand i128 shifts itself.
541 setOperationAction(ISD::SHL_PARTS
, MVT::i64
, Custom
);
542 setOperationAction(ISD::SRA_PARTS
, MVT::i64
, Custom
);
543 setOperationAction(ISD::SRL_PARTS
, MVT::i64
, Custom
);
545 // 32-bit PowerPC wants to expand i64 shifts itself.
546 setOperationAction(ISD::SHL_PARTS
, MVT::i32
, Custom
);
547 setOperationAction(ISD::SRA_PARTS
, MVT::i32
, Custom
);
548 setOperationAction(ISD::SRL_PARTS
, MVT::i32
, Custom
);
551 if (Subtarget
.hasAltivec()) {
552 // First set operation action for all vector types to expand. Then we
553 // will selectively turn on ones that can be effectively codegen'd.
554 for (MVT VT
: MVT::vector_valuetypes()) {
555 // add/sub are legal for all supported vector VT's.
556 setOperationAction(ISD::ADD
, VT
, Legal
);
557 setOperationAction(ISD::SUB
, VT
, Legal
);
559 // For v2i64, these are only valid with P8Vector. This is corrected after
561 if (VT
.getSizeInBits() <= 128 && VT
.getScalarSizeInBits() <= 64) {
562 setOperationAction(ISD::SMAX
, VT
, Legal
);
563 setOperationAction(ISD::SMIN
, VT
, Legal
);
564 setOperationAction(ISD::UMAX
, VT
, Legal
);
565 setOperationAction(ISD::UMIN
, VT
, Legal
);
568 setOperationAction(ISD::SMAX
, VT
, Expand
);
569 setOperationAction(ISD::SMIN
, VT
, Expand
);
570 setOperationAction(ISD::UMAX
, VT
, Expand
);
571 setOperationAction(ISD::UMIN
, VT
, Expand
);
574 if (Subtarget
.hasVSX()) {
575 setOperationAction(ISD::FMAXNUM
, VT
, Legal
);
576 setOperationAction(ISD::FMINNUM
, VT
, Legal
);
579 // Vector instructions introduced in P8
580 if (Subtarget
.hasP8Altivec() && (VT
.SimpleTy
!= MVT::v1i128
)) {
581 setOperationAction(ISD::CTPOP
, VT
, Legal
);
582 setOperationAction(ISD::CTLZ
, VT
, Legal
);
585 setOperationAction(ISD::CTPOP
, VT
, Expand
);
586 setOperationAction(ISD::CTLZ
, VT
, Expand
);
589 // Vector instructions introduced in P9
590 if (Subtarget
.hasP9Altivec() && (VT
.SimpleTy
!= MVT::v1i128
))
591 setOperationAction(ISD::CTTZ
, VT
, Legal
);
593 setOperationAction(ISD::CTTZ
, VT
, Expand
);
595 // We promote all shuffles to v16i8.
596 setOperationAction(ISD::VECTOR_SHUFFLE
, VT
, Promote
);
597 AddPromotedToType (ISD::VECTOR_SHUFFLE
, VT
, MVT::v16i8
);
599 // We promote all non-typed operations to v4i32.
600 setOperationAction(ISD::AND
, VT
, Promote
);
601 AddPromotedToType (ISD::AND
, VT
, MVT::v4i32
);
602 setOperationAction(ISD::OR
, VT
, Promote
);
603 AddPromotedToType (ISD::OR
, VT
, MVT::v4i32
);
604 setOperationAction(ISD::XOR
, VT
, Promote
);
605 AddPromotedToType (ISD::XOR
, VT
, MVT::v4i32
);
606 setOperationAction(ISD::LOAD
, VT
, Promote
);
607 AddPromotedToType (ISD::LOAD
, VT
, MVT::v4i32
);
608 setOperationAction(ISD::SELECT
, VT
, Promote
);
609 AddPromotedToType (ISD::SELECT
, VT
, MVT::v4i32
);
610 setOperationAction(ISD::VSELECT
, VT
, Legal
);
611 setOperationAction(ISD::SELECT_CC
, VT
, Promote
);
612 AddPromotedToType (ISD::SELECT_CC
, VT
, MVT::v4i32
);
613 setOperationAction(ISD::STORE
, VT
, Promote
);
614 AddPromotedToType (ISD::STORE
, VT
, MVT::v4i32
);
616 // No other operations are legal.
617 setOperationAction(ISD::MUL
, VT
, Expand
);
618 setOperationAction(ISD::SDIV
, VT
, Expand
);
619 setOperationAction(ISD::SREM
, VT
, Expand
);
620 setOperationAction(ISD::UDIV
, VT
, Expand
);
621 setOperationAction(ISD::UREM
, VT
, Expand
);
622 setOperationAction(ISD::FDIV
, VT
, Expand
);
623 setOperationAction(ISD::FREM
, VT
, Expand
);
624 setOperationAction(ISD::FNEG
, VT
, Expand
);
625 setOperationAction(ISD::FSQRT
, VT
, Expand
);
626 setOperationAction(ISD::FLOG
, VT
, Expand
);
627 setOperationAction(ISD::FLOG10
, VT
, Expand
);
628 setOperationAction(ISD::FLOG2
, VT
, Expand
);
629 setOperationAction(ISD::FEXP
, VT
, Expand
);
630 setOperationAction(ISD::FEXP2
, VT
, Expand
);
631 setOperationAction(ISD::FSIN
, VT
, Expand
);
632 setOperationAction(ISD::FCOS
, VT
, Expand
);
633 setOperationAction(ISD::FABS
, VT
, Expand
);
634 setOperationAction(ISD::FFLOOR
, VT
, Expand
);
635 setOperationAction(ISD::FCEIL
, VT
, Expand
);
636 setOperationAction(ISD::FTRUNC
, VT
, Expand
);
637 setOperationAction(ISD::FRINT
, VT
, Expand
);
638 setOperationAction(ISD::FNEARBYINT
, VT
, Expand
);
639 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, VT
, Expand
);
640 setOperationAction(ISD::INSERT_VECTOR_ELT
, VT
, Expand
);
641 setOperationAction(ISD::BUILD_VECTOR
, VT
, Expand
);
642 setOperationAction(ISD::MULHU
, VT
, Expand
);
643 setOperationAction(ISD::MULHS
, VT
, Expand
);
644 setOperationAction(ISD::UMUL_LOHI
, VT
, Expand
);
645 setOperationAction(ISD::SMUL_LOHI
, VT
, Expand
);
646 setOperationAction(ISD::UDIVREM
, VT
, Expand
);
647 setOperationAction(ISD::SDIVREM
, VT
, Expand
);
648 setOperationAction(ISD::SCALAR_TO_VECTOR
, VT
, Expand
);
649 setOperationAction(ISD::FPOW
, VT
, Expand
);
650 setOperationAction(ISD::BSWAP
, VT
, Expand
);
651 setOperationAction(ISD::SIGN_EXTEND_INREG
, VT
, Expand
);
652 setOperationAction(ISD::ROTL
, VT
, Expand
);
653 setOperationAction(ISD::ROTR
, VT
, Expand
);
655 for (MVT InnerVT
: MVT::vector_valuetypes()) {
656 setTruncStoreAction(VT
, InnerVT
, Expand
);
657 setLoadExtAction(ISD::SEXTLOAD
, VT
, InnerVT
, Expand
);
658 setLoadExtAction(ISD::ZEXTLOAD
, VT
, InnerVT
, Expand
);
659 setLoadExtAction(ISD::EXTLOAD
, VT
, InnerVT
, Expand
);
662 if (!Subtarget
.hasP8Vector()) {
663 setOperationAction(ISD::SMAX
, MVT::v2i64
, Expand
);
664 setOperationAction(ISD::SMIN
, MVT::v2i64
, Expand
);
665 setOperationAction(ISD::UMAX
, MVT::v2i64
, Expand
);
666 setOperationAction(ISD::UMIN
, MVT::v2i64
, Expand
);
669 for (auto VT
: {MVT::v2i64
, MVT::v4i32
, MVT::v8i16
, MVT::v16i8
})
670 setOperationAction(ISD::ABS
, VT
, Custom
);
672 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
673 // with merges, splats, etc.
674 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v16i8
, Custom
);
676 // Vector truncates to sub-word integer that fit in an Altivec/VSX register
677 // are cheap, so handle them before they get expanded to scalar.
678 setOperationAction(ISD::TRUNCATE
, MVT::v8i8
, Custom
);
679 setOperationAction(ISD::TRUNCATE
, MVT::v4i8
, Custom
);
680 setOperationAction(ISD::TRUNCATE
, MVT::v2i8
, Custom
);
681 setOperationAction(ISD::TRUNCATE
, MVT::v4i16
, Custom
);
682 setOperationAction(ISD::TRUNCATE
, MVT::v2i16
, Custom
);
684 setOperationAction(ISD::AND
, MVT::v4i32
, Legal
);
685 setOperationAction(ISD::OR
, MVT::v4i32
, Legal
);
686 setOperationAction(ISD::XOR
, MVT::v4i32
, Legal
);
687 setOperationAction(ISD::LOAD
, MVT::v4i32
, Legal
);
688 setOperationAction(ISD::SELECT
, MVT::v4i32
,
689 Subtarget
.useCRBits() ? Legal
: Expand
);
690 setOperationAction(ISD::STORE
, MVT::v4i32
, Legal
);
691 setOperationAction(ISD::FP_TO_SINT
, MVT::v4i32
, Legal
);
692 setOperationAction(ISD::FP_TO_UINT
, MVT::v4i32
, Legal
);
693 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i32
, Legal
);
694 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i32
, Legal
);
695 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
696 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
697 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
698 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Legal
);
700 // Without hasP8Altivec set, v2i64 SMAX isn't available.
701 // But ABS custom lowering requires SMAX support.
702 if (!Subtarget
.hasP8Altivec())
703 setOperationAction(ISD::ABS
, MVT::v2i64
, Expand
);
705 addRegisterClass(MVT::v4f32
, &PPC::VRRCRegClass
);
706 addRegisterClass(MVT::v4i32
, &PPC::VRRCRegClass
);
707 addRegisterClass(MVT::v8i16
, &PPC::VRRCRegClass
);
708 addRegisterClass(MVT::v16i8
, &PPC::VRRCRegClass
);
710 setOperationAction(ISD::MUL
, MVT::v4f32
, Legal
);
711 setOperationAction(ISD::FMA
, MVT::v4f32
, Legal
);
713 if (TM
.Options
.UnsafeFPMath
|| Subtarget
.hasVSX()) {
714 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
715 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
718 if (Subtarget
.hasP8Altivec())
719 setOperationAction(ISD::MUL
, MVT::v4i32
, Legal
);
721 setOperationAction(ISD::MUL
, MVT::v4i32
, Custom
);
723 setOperationAction(ISD::MUL
, MVT::v8i16
, Custom
);
724 setOperationAction(ISD::MUL
, MVT::v16i8
, Custom
);
726 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Custom
);
727 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Custom
);
729 setOperationAction(ISD::BUILD_VECTOR
, MVT::v16i8
, Custom
);
730 setOperationAction(ISD::BUILD_VECTOR
, MVT::v8i16
, Custom
);
731 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i32
, Custom
);
732 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
734 // Altivec does not contain unordered floating-point compare instructions
735 setCondCodeAction(ISD::SETUO
, MVT::v4f32
, Expand
);
736 setCondCodeAction(ISD::SETUEQ
, MVT::v4f32
, Expand
);
737 setCondCodeAction(ISD::SETO
, MVT::v4f32
, Expand
);
738 setCondCodeAction(ISD::SETONE
, MVT::v4f32
, Expand
);
740 if (Subtarget
.hasVSX()) {
741 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2f64
, Legal
);
742 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
743 if (Subtarget
.hasP8Vector()) {
744 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
745 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Legal
);
747 if (Subtarget
.hasDirectMove() && isPPC64
) {
748 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v16i8
, Legal
);
749 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v8i16
, Legal
);
750 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i32
, Legal
);
751 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v2i64
, Legal
);
752 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v16i8
, Legal
);
753 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v8i16
, Legal
);
754 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i32
, Legal
);
755 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2i64
, Legal
);
757 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v2f64
, Legal
);
759 setOperationAction(ISD::FFLOOR
, MVT::v2f64
, Legal
);
760 setOperationAction(ISD::FCEIL
, MVT::v2f64
, Legal
);
761 setOperationAction(ISD::FTRUNC
, MVT::v2f64
, Legal
);
762 setOperationAction(ISD::FNEARBYINT
, MVT::v2f64
, Legal
);
763 setOperationAction(ISD::FROUND
, MVT::v2f64
, Legal
);
765 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
767 setOperationAction(ISD::MUL
, MVT::v2f64
, Legal
);
768 setOperationAction(ISD::FMA
, MVT::v2f64
, Legal
);
770 setOperationAction(ISD::FDIV
, MVT::v2f64
, Legal
);
771 setOperationAction(ISD::FSQRT
, MVT::v2f64
, Legal
);
773 // Share the Altivec comparison restrictions.
774 setCondCodeAction(ISD::SETUO
, MVT::v2f64
, Expand
);
775 setCondCodeAction(ISD::SETUEQ
, MVT::v2f64
, Expand
);
776 setCondCodeAction(ISD::SETO
, MVT::v2f64
, Expand
);
777 setCondCodeAction(ISD::SETONE
, MVT::v2f64
, Expand
);
779 setOperationAction(ISD::LOAD
, MVT::v2f64
, Legal
);
780 setOperationAction(ISD::STORE
, MVT::v2f64
, Legal
);
782 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2f64
, Legal
);
784 if (Subtarget
.hasP8Vector())
785 addRegisterClass(MVT::f32
, &PPC::VSSRCRegClass
);
787 addRegisterClass(MVT::f64
, &PPC::VSFRCRegClass
);
789 addRegisterClass(MVT::v4i32
, &PPC::VSRCRegClass
);
790 addRegisterClass(MVT::v4f32
, &PPC::VSRCRegClass
);
791 addRegisterClass(MVT::v2f64
, &PPC::VSRCRegClass
);
793 if (Subtarget
.hasP8Altivec()) {
794 setOperationAction(ISD::SHL
, MVT::v2i64
, Legal
);
795 setOperationAction(ISD::SRA
, MVT::v2i64
, Legal
);
796 setOperationAction(ISD::SRL
, MVT::v2i64
, Legal
);
798 // 128 bit shifts can be accomplished via 3 instructions for SHL and
799 // SRL, but not for SRA because of the instructions available:
800 // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
802 setOperationAction(ISD::SHL
, MVT::v1i128
, Expand
);
803 setOperationAction(ISD::SRL
, MVT::v1i128
, Expand
);
804 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
806 setOperationAction(ISD::SETCC
, MVT::v2i64
, Legal
);
809 setOperationAction(ISD::SHL
, MVT::v2i64
, Expand
);
810 setOperationAction(ISD::SRA
, MVT::v2i64
, Expand
);
811 setOperationAction(ISD::SRL
, MVT::v2i64
, Expand
);
813 setOperationAction(ISD::SETCC
, MVT::v2i64
, Custom
);
815 // VSX v2i64 only supports non-arithmetic operations.
816 setOperationAction(ISD::ADD
, MVT::v2i64
, Expand
);
817 setOperationAction(ISD::SUB
, MVT::v2i64
, Expand
);
820 setOperationAction(ISD::LOAD
, MVT::v2i64
, Promote
);
821 AddPromotedToType (ISD::LOAD
, MVT::v2i64
, MVT::v2f64
);
822 setOperationAction(ISD::STORE
, MVT::v2i64
, Promote
);
823 AddPromotedToType (ISD::STORE
, MVT::v2i64
, MVT::v2f64
);
825 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v2i64
, Legal
);
827 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i64
, Legal
);
828 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i64
, Legal
);
829 setOperationAction(ISD::FP_TO_SINT
, MVT::v2i64
, Legal
);
830 setOperationAction(ISD::FP_TO_UINT
, MVT::v2i64
, Legal
);
832 // Custom handling for partial vectors of integers converted to
833 // floating point. We already have optimal handling for v2i32 through
834 // the DAG combine, so those aren't necessary.
835 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i8
, Custom
);
836 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i8
, Custom
);
837 setOperationAction(ISD::UINT_TO_FP
, MVT::v2i16
, Custom
);
838 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i16
, Custom
);
839 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i8
, Custom
);
840 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i8
, Custom
);
841 setOperationAction(ISD::SINT_TO_FP
, MVT::v2i16
, Custom
);
842 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i16
, Custom
);
844 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
845 setOperationAction(ISD::FNEG
, MVT::v2f64
, Legal
);
846 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
847 setOperationAction(ISD::FABS
, MVT::v2f64
, Legal
);
848 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f32
, Legal
);
849 setOperationAction(ISD::FCOPYSIGN
, MVT::v2f64
, Legal
);
851 if (Subtarget
.hasDirectMove())
852 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2i64
, Custom
);
853 setOperationAction(ISD::BUILD_VECTOR
, MVT::v2f64
, Custom
);
855 addRegisterClass(MVT::v2i64
, &PPC::VSRCRegClass
);
858 if (Subtarget
.hasP8Altivec()) {
859 addRegisterClass(MVT::v2i64
, &PPC::VRRCRegClass
);
860 addRegisterClass(MVT::v1i128
, &PPC::VRRCRegClass
);
863 if (Subtarget
.hasP9Vector()) {
864 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i32
, Custom
);
865 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Custom
);
867 // 128 bit shifts can be accomplished via 3 instructions for SHL and
868 // SRL, but not for SRA because of the instructions available:
869 // VS{RL} and VS{RL}O.
870 setOperationAction(ISD::SHL
, MVT::v1i128
, Legal
);
871 setOperationAction(ISD::SRL
, MVT::v1i128
, Legal
);
872 setOperationAction(ISD::SRA
, MVT::v1i128
, Expand
);
874 if (EnableQuadPrecision
) {
875 addRegisterClass(MVT::f128
, &PPC::VRRCRegClass
);
876 setOperationAction(ISD::FADD
, MVT::f128
, Legal
);
877 setOperationAction(ISD::FSUB
, MVT::f128
, Legal
);
878 setOperationAction(ISD::FDIV
, MVT::f128
, Legal
);
879 setOperationAction(ISD::FMUL
, MVT::f128
, Legal
);
880 setOperationAction(ISD::FP_EXTEND
, MVT::f128
, Legal
);
881 // No extending loads to f128 on PPC.
882 for (MVT FPT
: MVT::fp_valuetypes())
883 setLoadExtAction(ISD::EXTLOAD
, MVT::f128
, FPT
, Expand
);
884 setOperationAction(ISD::FMA
, MVT::f128
, Legal
);
885 setCondCodeAction(ISD::SETULT
, MVT::f128
, Expand
);
886 setCondCodeAction(ISD::SETUGT
, MVT::f128
, Expand
);
887 setCondCodeAction(ISD::SETUEQ
, MVT::f128
, Expand
);
888 setCondCodeAction(ISD::SETOGE
, MVT::f128
, Expand
);
889 setCondCodeAction(ISD::SETOLE
, MVT::f128
, Expand
);
890 setCondCodeAction(ISD::SETONE
, MVT::f128
, Expand
);
892 setOperationAction(ISD::FTRUNC
, MVT::f128
, Legal
);
893 setOperationAction(ISD::FRINT
, MVT::f128
, Legal
);
894 setOperationAction(ISD::FFLOOR
, MVT::f128
, Legal
);
895 setOperationAction(ISD::FCEIL
, MVT::f128
, Legal
);
896 setOperationAction(ISD::FNEARBYINT
, MVT::f128
, Legal
);
897 setOperationAction(ISD::FROUND
, MVT::f128
, Legal
);
899 setOperationAction(ISD::SELECT
, MVT::f128
, Expand
);
900 setOperationAction(ISD::FP_ROUND
, MVT::f64
, Legal
);
901 setOperationAction(ISD::FP_ROUND
, MVT::f32
, Legal
);
902 setTruncStoreAction(MVT::f128
, MVT::f64
, Expand
);
903 setTruncStoreAction(MVT::f128
, MVT::f32
, Expand
);
904 setOperationAction(ISD::BITCAST
, MVT::i128
, Custom
);
905 // No implementation for these ops for PowerPC.
906 setOperationAction(ISD::FSIN
, MVT::f128
, Expand
);
907 setOperationAction(ISD::FCOS
, MVT::f128
, Expand
);
908 setOperationAction(ISD::FPOW
, MVT::f128
, Expand
);
909 setOperationAction(ISD::FPOWI
, MVT::f128
, Expand
);
910 setOperationAction(ISD::FREM
, MVT::f128
, Expand
);
912 setOperationAction(ISD::FP_EXTEND
, MVT::v2f32
, Custom
);
916 if (Subtarget
.hasP9Altivec()) {
917 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v8i16
, Custom
);
918 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v16i8
, Custom
);
922 if (Subtarget
.hasQPX()) {
923 setOperationAction(ISD::FADD
, MVT::v4f64
, Legal
);
924 setOperationAction(ISD::FSUB
, MVT::v4f64
, Legal
);
925 setOperationAction(ISD::FMUL
, MVT::v4f64
, Legal
);
926 setOperationAction(ISD::FREM
, MVT::v4f64
, Expand
);
928 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f64
, Legal
);
929 setOperationAction(ISD::FGETSIGN
, MVT::v4f64
, Expand
);
931 setOperationAction(ISD::LOAD
, MVT::v4f64
, Custom
);
932 setOperationAction(ISD::STORE
, MVT::v4f64
, Custom
);
934 setTruncStoreAction(MVT::v4f64
, MVT::v4f32
, Custom
);
935 setLoadExtAction(ISD::EXTLOAD
, MVT::v4f64
, MVT::v4f32
, Custom
);
937 if (!Subtarget
.useCRBits())
938 setOperationAction(ISD::SELECT
, MVT::v4f64
, Expand
);
939 setOperationAction(ISD::VSELECT
, MVT::v4f64
, Legal
);
941 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f64
, Legal
);
942 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f64
, Expand
);
943 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4f64
, Expand
);
944 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4f64
, Expand
);
945 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4f64
, Custom
);
946 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f64
, Legal
);
947 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f64
, Custom
);
949 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f64
, Legal
);
950 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f64
, Expand
);
952 setOperationAction(ISD::FP_ROUND
, MVT::v4f32
, Legal
);
953 setOperationAction(ISD::FP_ROUND_INREG
, MVT::v4f32
, Expand
);
954 setOperationAction(ISD::FP_EXTEND
, MVT::v4f64
, Legal
);
956 setOperationAction(ISD::FNEG
, MVT::v4f64
, Legal
);
957 setOperationAction(ISD::FABS
, MVT::v4f64
, Legal
);
958 setOperationAction(ISD::FSIN
, MVT::v4f64
, Expand
);
959 setOperationAction(ISD::FCOS
, MVT::v4f64
, Expand
);
960 setOperationAction(ISD::FPOW
, MVT::v4f64
, Expand
);
961 setOperationAction(ISD::FLOG
, MVT::v4f64
, Expand
);
962 setOperationAction(ISD::FLOG2
, MVT::v4f64
, Expand
);
963 setOperationAction(ISD::FLOG10
, MVT::v4f64
, Expand
);
964 setOperationAction(ISD::FEXP
, MVT::v4f64
, Expand
);
965 setOperationAction(ISD::FEXP2
, MVT::v4f64
, Expand
);
967 setOperationAction(ISD::FMINNUM
, MVT::v4f64
, Legal
);
968 setOperationAction(ISD::FMAXNUM
, MVT::v4f64
, Legal
);
970 setIndexedLoadAction(ISD::PRE_INC
, MVT::v4f64
, Legal
);
971 setIndexedStoreAction(ISD::PRE_INC
, MVT::v4f64
, Legal
);
973 addRegisterClass(MVT::v4f64
, &PPC::QFRCRegClass
);
975 setOperationAction(ISD::FADD
, MVT::v4f32
, Legal
);
976 setOperationAction(ISD::FSUB
, MVT::v4f32
, Legal
);
977 setOperationAction(ISD::FMUL
, MVT::v4f32
, Legal
);
978 setOperationAction(ISD::FREM
, MVT::v4f32
, Expand
);
980 setOperationAction(ISD::FCOPYSIGN
, MVT::v4f32
, Legal
);
981 setOperationAction(ISD::FGETSIGN
, MVT::v4f32
, Expand
);
983 setOperationAction(ISD::LOAD
, MVT::v4f32
, Custom
);
984 setOperationAction(ISD::STORE
, MVT::v4f32
, Custom
);
986 if (!Subtarget
.useCRBits())
987 setOperationAction(ISD::SELECT
, MVT::v4f32
, Expand
);
988 setOperationAction(ISD::VSELECT
, MVT::v4f32
, Legal
);
990 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4f32
, Legal
);
991 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4f32
, Expand
);
992 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4f32
, Expand
);
993 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4f32
, Expand
);
994 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4f32
, Custom
);
995 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4f32
, Legal
);
996 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4f32
, Custom
);
998 setOperationAction(ISD::FP_TO_SINT
, MVT::v4f32
, Legal
);
999 setOperationAction(ISD::FP_TO_UINT
, MVT::v4f32
, Expand
);
1001 setOperationAction(ISD::FNEG
, MVT::v4f32
, Legal
);
1002 setOperationAction(ISD::FABS
, MVT::v4f32
, Legal
);
1003 setOperationAction(ISD::FSIN
, MVT::v4f32
, Expand
);
1004 setOperationAction(ISD::FCOS
, MVT::v4f32
, Expand
);
1005 setOperationAction(ISD::FPOW
, MVT::v4f32
, Expand
);
1006 setOperationAction(ISD::FLOG
, MVT::v4f32
, Expand
);
1007 setOperationAction(ISD::FLOG2
, MVT::v4f32
, Expand
);
1008 setOperationAction(ISD::FLOG10
, MVT::v4f32
, Expand
);
1009 setOperationAction(ISD::FEXP
, MVT::v4f32
, Expand
);
1010 setOperationAction(ISD::FEXP2
, MVT::v4f32
, Expand
);
1012 setOperationAction(ISD::FMINNUM
, MVT::v4f32
, Legal
);
1013 setOperationAction(ISD::FMAXNUM
, MVT::v4f32
, Legal
);
1015 setIndexedLoadAction(ISD::PRE_INC
, MVT::v4f32
, Legal
);
1016 setIndexedStoreAction(ISD::PRE_INC
, MVT::v4f32
, Legal
);
1018 addRegisterClass(MVT::v4f32
, &PPC::QSRCRegClass
);
1020 setOperationAction(ISD::AND
, MVT::v4i1
, Legal
);
1021 setOperationAction(ISD::OR
, MVT::v4i1
, Legal
);
1022 setOperationAction(ISD::XOR
, MVT::v4i1
, Legal
);
1024 if (!Subtarget
.useCRBits())
1025 setOperationAction(ISD::SELECT
, MVT::v4i1
, Expand
);
1026 setOperationAction(ISD::VSELECT
, MVT::v4i1
, Legal
);
1028 setOperationAction(ISD::LOAD
, MVT::v4i1
, Custom
);
1029 setOperationAction(ISD::STORE
, MVT::v4i1
, Custom
);
1031 setOperationAction(ISD::EXTRACT_VECTOR_ELT
, MVT::v4i1
, Custom
);
1032 setOperationAction(ISD::INSERT_VECTOR_ELT
, MVT::v4i1
, Expand
);
1033 setOperationAction(ISD::CONCAT_VECTORS
, MVT::v4i1
, Expand
);
1034 setOperationAction(ISD::EXTRACT_SUBVECTOR
, MVT::v4i1
, Expand
);
1035 setOperationAction(ISD::VECTOR_SHUFFLE
, MVT::v4i1
, Custom
);
1036 setOperationAction(ISD::SCALAR_TO_VECTOR
, MVT::v4i1
, Expand
);
1037 setOperationAction(ISD::BUILD_VECTOR
, MVT::v4i1
, Custom
);
1039 setOperationAction(ISD::SINT_TO_FP
, MVT::v4i1
, Custom
);
1040 setOperationAction(ISD::UINT_TO_FP
, MVT::v4i1
, Custom
);
1042 addRegisterClass(MVT::v4i1
, &PPC::QBRCRegClass
);
1044 setOperationAction(ISD::FFLOOR
, MVT::v4f64
, Legal
);
1045 setOperationAction(ISD::FCEIL
, MVT::v4f64
, Legal
);
1046 setOperationAction(ISD::FTRUNC
, MVT::v4f64
, Legal
);
1047 setOperationAction(ISD::FROUND
, MVT::v4f64
, Legal
);
1049 setOperationAction(ISD::FFLOOR
, MVT::v4f32
, Legal
);
1050 setOperationAction(ISD::FCEIL
, MVT::v4f32
, Legal
);
1051 setOperationAction(ISD::FTRUNC
, MVT::v4f32
, Legal
);
1052 setOperationAction(ISD::FROUND
, MVT::v4f32
, Legal
);
1054 setOperationAction(ISD::FNEARBYINT
, MVT::v4f64
, Expand
);
1055 setOperationAction(ISD::FNEARBYINT
, MVT::v4f32
, Expand
);
1057 // These need to set FE_INEXACT, and so cannot be vectorized here.
1058 setOperationAction(ISD::FRINT
, MVT::v4f64
, Expand
);
1059 setOperationAction(ISD::FRINT
, MVT::v4f32
, Expand
);
1061 if (TM
.Options
.UnsafeFPMath
) {
1062 setOperationAction(ISD::FDIV
, MVT::v4f64
, Legal
);
1063 setOperationAction(ISD::FSQRT
, MVT::v4f64
, Legal
);
1065 setOperationAction(ISD::FDIV
, MVT::v4f32
, Legal
);
1066 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Legal
);
1068 setOperationAction(ISD::FDIV
, MVT::v4f64
, Expand
);
1069 setOperationAction(ISD::FSQRT
, MVT::v4f64
, Expand
);
1071 setOperationAction(ISD::FDIV
, MVT::v4f32
, Expand
);
1072 setOperationAction(ISD::FSQRT
, MVT::v4f32
, Expand
);
1076 if (Subtarget
.has64BitSupport())
1077 setOperationAction(ISD::PREFETCH
, MVT::Other
, Legal
);
1079 setOperationAction(ISD::READCYCLECOUNTER
, MVT::i64
, isPPC64
? Legal
: Custom
);
1082 setOperationAction(ISD::ATOMIC_LOAD
, MVT::i64
, Expand
);
1083 setOperationAction(ISD::ATOMIC_STORE
, MVT::i64
, Expand
);
1086 setBooleanContents(ZeroOrOneBooleanContent
);
1088 if (Subtarget
.hasAltivec()) {
1089 // Altivec instructions set fields to all zeros or all ones.
1090 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent
);
1094 // These libcalls are not available in 32-bit.
1095 setLibcallName(RTLIB::SHL_I128
, nullptr);
1096 setLibcallName(RTLIB::SRL_I128
, nullptr);
1097 setLibcallName(RTLIB::SRA_I128
, nullptr);
1100 setStackPointerRegisterToSaveRestore(isPPC64
? PPC::X1
: PPC::R1
);
1102 // We have target-specific dag combine patterns for the following nodes:
1103 setTargetDAGCombine(ISD::ADD
);
1104 setTargetDAGCombine(ISD::SHL
);
1105 setTargetDAGCombine(ISD::SRA
);
1106 setTargetDAGCombine(ISD::SRL
);
1107 setTargetDAGCombine(ISD::MUL
);
1108 setTargetDAGCombine(ISD::SINT_TO_FP
);
1109 setTargetDAGCombine(ISD::BUILD_VECTOR
);
1110 if (Subtarget
.hasFPCVT())
1111 setTargetDAGCombine(ISD::UINT_TO_FP
);
1112 setTargetDAGCombine(ISD::LOAD
);
1113 setTargetDAGCombine(ISD::STORE
);
1114 setTargetDAGCombine(ISD::BR_CC
);
1115 if (Subtarget
.useCRBits())
1116 setTargetDAGCombine(ISD::BRCOND
);
1117 setTargetDAGCombine(ISD::BSWAP
);
1118 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN
);
1119 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN
);
1120 setTargetDAGCombine(ISD::INTRINSIC_VOID
);
1122 setTargetDAGCombine(ISD::SIGN_EXTEND
);
1123 setTargetDAGCombine(ISD::ZERO_EXTEND
);
1124 setTargetDAGCombine(ISD::ANY_EXTEND
);
1126 setTargetDAGCombine(ISD::TRUNCATE
);
1127 setTargetDAGCombine(ISD::VECTOR_SHUFFLE
);
1130 if (Subtarget
.useCRBits()) {
1131 setTargetDAGCombine(ISD::TRUNCATE
);
1132 setTargetDAGCombine(ISD::SETCC
);
1133 setTargetDAGCombine(ISD::SELECT_CC
);
1136 // Use reciprocal estimates.
1137 if (TM
.Options
.UnsafeFPMath
) {
1138 setTargetDAGCombine(ISD::FDIV
);
1139 setTargetDAGCombine(ISD::FSQRT
);
1142 if (Subtarget
.hasP9Altivec()) {
1143 setTargetDAGCombine(ISD::ABS
);
1144 setTargetDAGCombine(ISD::VSELECT
);
1147 // Darwin long double math library functions have $LDBL128 appended.
1148 if (Subtarget
.isDarwin()) {
1149 setLibcallName(RTLIB::COS_PPCF128
, "cosl$LDBL128");
1150 setLibcallName(RTLIB::POW_PPCF128
, "powl$LDBL128");
1151 setLibcallName(RTLIB::REM_PPCF128
, "fmodl$LDBL128");
1152 setLibcallName(RTLIB::SIN_PPCF128
, "sinl$LDBL128");
1153 setLibcallName(RTLIB::SQRT_PPCF128
, "sqrtl$LDBL128");
1154 setLibcallName(RTLIB::LOG_PPCF128
, "logl$LDBL128");
1155 setLibcallName(RTLIB::LOG2_PPCF128
, "log2l$LDBL128");
1156 setLibcallName(RTLIB::LOG10_PPCF128
, "log10l$LDBL128");
1157 setLibcallName(RTLIB::EXP_PPCF128
, "expl$LDBL128");
1158 setLibcallName(RTLIB::EXP2_PPCF128
, "exp2l$LDBL128");
1161 if (EnableQuadPrecision
) {
1162 setLibcallName(RTLIB::LOG_F128
, "logf128");
1163 setLibcallName(RTLIB::LOG2_F128
, "log2f128");
1164 setLibcallName(RTLIB::LOG10_F128
, "log10f128");
1165 setLibcallName(RTLIB::EXP_F128
, "expf128");
1166 setLibcallName(RTLIB::EXP2_F128
, "exp2f128");
1167 setLibcallName(RTLIB::SIN_F128
, "sinf128");
1168 setLibcallName(RTLIB::COS_F128
, "cosf128");
1169 setLibcallName(RTLIB::POW_F128
, "powf128");
1170 setLibcallName(RTLIB::FMIN_F128
, "fminf128");
1171 setLibcallName(RTLIB::FMAX_F128
, "fmaxf128");
1172 setLibcallName(RTLIB::POWI_F128
, "__powikf2");
1173 setLibcallName(RTLIB::REM_F128
, "fmodf128");
1176 // With 32 condition bits, we don't need to sink (and duplicate) compares
1177 // aggressively in CodeGenPrep.
1178 if (Subtarget
.useCRBits()) {
1179 setHasMultipleConditionRegisters();
1180 setJumpIsExpensive();
1183 setMinFunctionAlignment(llvm::Align(4));
1184 if (Subtarget
.isDarwin())
1185 setPrefFunctionLogAlignment(4);
1187 switch (Subtarget
.getDarwinDirective()) {
1192 case PPC::DIR_E500mc
:
1193 case PPC::DIR_E5500
:
1196 case PPC::DIR_PWR5X
:
1198 case PPC::DIR_PWR6X
:
1202 setPrefFunctionLogAlignment(4);
1203 setPrefLoopLogAlignment(4);
1207 if (Subtarget
.enableMachineScheduler())
1208 setSchedulingPreference(Sched::Source
);
1210 setSchedulingPreference(Sched::Hybrid
);
1212 computeRegisterProperties(STI
.getRegisterInfo());
1214 // The Freescale cores do better with aggressive inlining of memcpy and
1215 // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1216 if (Subtarget
.getDarwinDirective() == PPC::DIR_E500mc
||
1217 Subtarget
.getDarwinDirective() == PPC::DIR_E5500
) {
1218 MaxStoresPerMemset
= 32;
1219 MaxStoresPerMemsetOptSize
= 16;
1220 MaxStoresPerMemcpy
= 32;
1221 MaxStoresPerMemcpyOptSize
= 8;
1222 MaxStoresPerMemmove
= 32;
1223 MaxStoresPerMemmoveOptSize
= 8;
1224 } else if (Subtarget
.getDarwinDirective() == PPC::DIR_A2
) {
1225 // The A2 also benefits from (very) aggressive inlining of memcpy and
1226 // friends. The overhead of a the function call, even when warm, can be
1227 // over one hundred cycles.
1228 MaxStoresPerMemset
= 128;
1229 MaxStoresPerMemcpy
= 128;
1230 MaxStoresPerMemmove
= 128;
1231 MaxLoadsPerMemcmp
= 128;
1233 MaxLoadsPerMemcmp
= 8;
1234 MaxLoadsPerMemcmpOptSize
= 4;
1238 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1239 /// the desired ByVal argument alignment.
1240 static void getMaxByValAlign(Type
*Ty
, unsigned &MaxAlign
,
1241 unsigned MaxMaxAlign
) {
1242 if (MaxAlign
== MaxMaxAlign
)
1244 if (VectorType
*VTy
= dyn_cast
<VectorType
>(Ty
)) {
1245 if (MaxMaxAlign
>= 32 && VTy
->getBitWidth() >= 256)
1247 else if (VTy
->getBitWidth() >= 128 && MaxAlign
< 16)
1249 } else if (ArrayType
*ATy
= dyn_cast
<ArrayType
>(Ty
)) {
1250 unsigned EltAlign
= 0;
1251 getMaxByValAlign(ATy
->getElementType(), EltAlign
, MaxMaxAlign
);
1252 if (EltAlign
> MaxAlign
)
1253 MaxAlign
= EltAlign
;
1254 } else if (StructType
*STy
= dyn_cast
<StructType
>(Ty
)) {
1255 for (auto *EltTy
: STy
->elements()) {
1256 unsigned EltAlign
= 0;
1257 getMaxByValAlign(EltTy
, EltAlign
, MaxMaxAlign
);
1258 if (EltAlign
> MaxAlign
)
1259 MaxAlign
= EltAlign
;
1260 if (MaxAlign
== MaxMaxAlign
)
1266 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1267 /// function arguments in the caller parameter area.
1268 unsigned PPCTargetLowering::getByValTypeAlignment(Type
*Ty
,
1269 const DataLayout
&DL
) const {
1270 // Darwin passes everything on 4 byte boundary.
1271 if (Subtarget
.isDarwin())
1274 // 16byte and wider vectors are passed on 16byte boundary.
1275 // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1276 unsigned Align
= Subtarget
.isPPC64() ? 8 : 4;
1277 if (Subtarget
.hasAltivec() || Subtarget
.hasQPX())
1278 getMaxByValAlign(Ty
, Align
, Subtarget
.hasQPX() ? 32 : 16);
1282 bool PPCTargetLowering::useSoftFloat() const {
1283 return Subtarget
.useSoftFloat();
1286 bool PPCTargetLowering::hasSPE() const {
1287 return Subtarget
.hasSPE();
1290 bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT
) const {
1291 return VT
.isScalarInteger();
1294 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode
) const {
1295 switch ((PPCISD::NodeType
)Opcode
) {
1296 case PPCISD::FIRST_NUMBER
: break;
1297 case PPCISD::FSEL
: return "PPCISD::FSEL";
1298 case PPCISD::FCFID
: return "PPCISD::FCFID";
1299 case PPCISD::FCFIDU
: return "PPCISD::FCFIDU";
1300 case PPCISD::FCFIDS
: return "PPCISD::FCFIDS";
1301 case PPCISD::FCFIDUS
: return "PPCISD::FCFIDUS";
1302 case PPCISD::FCTIDZ
: return "PPCISD::FCTIDZ";
1303 case PPCISD::FCTIWZ
: return "PPCISD::FCTIWZ";
1304 case PPCISD::FCTIDUZ
: return "PPCISD::FCTIDUZ";
1305 case PPCISD::FCTIWUZ
: return "PPCISD::FCTIWUZ";
1306 case PPCISD::FP_TO_UINT_IN_VSR
:
1307 return "PPCISD::FP_TO_UINT_IN_VSR,";
1308 case PPCISD::FP_TO_SINT_IN_VSR
:
1309 return "PPCISD::FP_TO_SINT_IN_VSR";
1310 case PPCISD::FRE
: return "PPCISD::FRE";
1311 case PPCISD::FRSQRTE
: return "PPCISD::FRSQRTE";
1312 case PPCISD::STFIWX
: return "PPCISD::STFIWX";
1313 case PPCISD::VMADDFP
: return "PPCISD::VMADDFP";
1314 case PPCISD::VNMSUBFP
: return "PPCISD::VNMSUBFP";
1315 case PPCISD::VPERM
: return "PPCISD::VPERM";
1316 case PPCISD::XXSPLT
: return "PPCISD::XXSPLT";
1317 case PPCISD::VECINSERT
: return "PPCISD::VECINSERT";
1318 case PPCISD::XXREVERSE
: return "PPCISD::XXREVERSE";
1319 case PPCISD::XXPERMDI
: return "PPCISD::XXPERMDI";
1320 case PPCISD::VECSHL
: return "PPCISD::VECSHL";
1321 case PPCISD::CMPB
: return "PPCISD::CMPB";
1322 case PPCISD::Hi
: return "PPCISD::Hi";
1323 case PPCISD::Lo
: return "PPCISD::Lo";
1324 case PPCISD::TOC_ENTRY
: return "PPCISD::TOC_ENTRY";
1325 case PPCISD::ATOMIC_CMP_SWAP_8
: return "PPCISD::ATOMIC_CMP_SWAP_8";
1326 case PPCISD::ATOMIC_CMP_SWAP_16
: return "PPCISD::ATOMIC_CMP_SWAP_16";
1327 case PPCISD::DYNALLOC
: return "PPCISD::DYNALLOC";
1328 case PPCISD::DYNAREAOFFSET
: return "PPCISD::DYNAREAOFFSET";
1329 case PPCISD::GlobalBaseReg
: return "PPCISD::GlobalBaseReg";
1330 case PPCISD::SRL
: return "PPCISD::SRL";
1331 case PPCISD::SRA
: return "PPCISD::SRA";
1332 case PPCISD::SHL
: return "PPCISD::SHL";
1333 case PPCISD::SRA_ADDZE
: return "PPCISD::SRA_ADDZE";
1334 case PPCISD::CALL
: return "PPCISD::CALL";
1335 case PPCISD::CALL_NOP
: return "PPCISD::CALL_NOP";
1336 case PPCISD::MTCTR
: return "PPCISD::MTCTR";
1337 case PPCISD::BCTRL
: return "PPCISD::BCTRL";
1338 case PPCISD::BCTRL_LOAD_TOC
: return "PPCISD::BCTRL_LOAD_TOC";
1339 case PPCISD::RET_FLAG
: return "PPCISD::RET_FLAG";
1340 case PPCISD::READ_TIME_BASE
: return "PPCISD::READ_TIME_BASE";
1341 case PPCISD::EH_SJLJ_SETJMP
: return "PPCISD::EH_SJLJ_SETJMP";
1342 case PPCISD::EH_SJLJ_LONGJMP
: return "PPCISD::EH_SJLJ_LONGJMP";
1343 case PPCISD::MFOCRF
: return "PPCISD::MFOCRF";
1344 case PPCISD::MFVSR
: return "PPCISD::MFVSR";
1345 case PPCISD::MTVSRA
: return "PPCISD::MTVSRA";
1346 case PPCISD::MTVSRZ
: return "PPCISD::MTVSRZ";
1347 case PPCISD::SINT_VEC_TO_FP
: return "PPCISD::SINT_VEC_TO_FP";
1348 case PPCISD::UINT_VEC_TO_FP
: return "PPCISD::UINT_VEC_TO_FP";
1349 case PPCISD::ANDIo_1_EQ_BIT
: return "PPCISD::ANDIo_1_EQ_BIT";
1350 case PPCISD::ANDIo_1_GT_BIT
: return "PPCISD::ANDIo_1_GT_BIT";
1351 case PPCISD::VCMP
: return "PPCISD::VCMP";
1352 case PPCISD::VCMPo
: return "PPCISD::VCMPo";
1353 case PPCISD::LBRX
: return "PPCISD::LBRX";
1354 case PPCISD::STBRX
: return "PPCISD::STBRX";
1355 case PPCISD::LFIWAX
: return "PPCISD::LFIWAX";
1356 case PPCISD::LFIWZX
: return "PPCISD::LFIWZX";
1357 case PPCISD::LXSIZX
: return "PPCISD::LXSIZX";
1358 case PPCISD::STXSIX
: return "PPCISD::STXSIX";
1359 case PPCISD::VEXTS
: return "PPCISD::VEXTS";
1360 case PPCISD::SExtVElems
: return "PPCISD::SExtVElems";
1361 case PPCISD::LXVD2X
: return "PPCISD::LXVD2X";
1362 case PPCISD::STXVD2X
: return "PPCISD::STXVD2X";
1363 case PPCISD::LOAD_VEC_BE
: return "PPCISD::LOAD_VEC_BE";
1364 case PPCISD::STORE_VEC_BE
: return "PPCISD::STORE_VEC_BE";
1365 case PPCISD::ST_VSR_SCAL_INT
:
1366 return "PPCISD::ST_VSR_SCAL_INT";
1367 case PPCISD::COND_BRANCH
: return "PPCISD::COND_BRANCH";
1368 case PPCISD::BDNZ
: return "PPCISD::BDNZ";
1369 case PPCISD::BDZ
: return "PPCISD::BDZ";
1370 case PPCISD::MFFS
: return "PPCISD::MFFS";
1371 case PPCISD::FADDRTZ
: return "PPCISD::FADDRTZ";
1372 case PPCISD::TC_RETURN
: return "PPCISD::TC_RETURN";
1373 case PPCISD::CR6SET
: return "PPCISD::CR6SET";
1374 case PPCISD::CR6UNSET
: return "PPCISD::CR6UNSET";
1375 case PPCISD::PPC32_GOT
: return "PPCISD::PPC32_GOT";
1376 case PPCISD::PPC32_PICGOT
: return "PPCISD::PPC32_PICGOT";
1377 case PPCISD::ADDIS_GOT_TPREL_HA
: return "PPCISD::ADDIS_GOT_TPREL_HA";
1378 case PPCISD::LD_GOT_TPREL_L
: return "PPCISD::LD_GOT_TPREL_L";
1379 case PPCISD::ADD_TLS
: return "PPCISD::ADD_TLS";
1380 case PPCISD::ADDIS_TLSGD_HA
: return "PPCISD::ADDIS_TLSGD_HA";
1381 case PPCISD::ADDI_TLSGD_L
: return "PPCISD::ADDI_TLSGD_L";
1382 case PPCISD::GET_TLS_ADDR
: return "PPCISD::GET_TLS_ADDR";
1383 case PPCISD::ADDI_TLSGD_L_ADDR
: return "PPCISD::ADDI_TLSGD_L_ADDR";
1384 case PPCISD::ADDIS_TLSLD_HA
: return "PPCISD::ADDIS_TLSLD_HA";
1385 case PPCISD::ADDI_TLSLD_L
: return "PPCISD::ADDI_TLSLD_L";
1386 case PPCISD::GET_TLSLD_ADDR
: return "PPCISD::GET_TLSLD_ADDR";
1387 case PPCISD::ADDI_TLSLD_L_ADDR
: return "PPCISD::ADDI_TLSLD_L_ADDR";
1388 case PPCISD::ADDIS_DTPREL_HA
: return "PPCISD::ADDIS_DTPREL_HA";
1389 case PPCISD::ADDI_DTPREL_L
: return "PPCISD::ADDI_DTPREL_L";
1390 case PPCISD::VADD_SPLAT
: return "PPCISD::VADD_SPLAT";
1391 case PPCISD::SC
: return "PPCISD::SC";
1392 case PPCISD::CLRBHRB
: return "PPCISD::CLRBHRB";
1393 case PPCISD::MFBHRBE
: return "PPCISD::MFBHRBE";
1394 case PPCISD::RFEBB
: return "PPCISD::RFEBB";
1395 case PPCISD::XXSWAPD
: return "PPCISD::XXSWAPD";
1396 case PPCISD::SWAP_NO_CHAIN
: return "PPCISD::SWAP_NO_CHAIN";
1397 case PPCISD::VABSD
: return "PPCISD::VABSD";
1398 case PPCISD::QVFPERM
: return "PPCISD::QVFPERM";
1399 case PPCISD::QVGPCI
: return "PPCISD::QVGPCI";
1400 case PPCISD::QVALIGNI
: return "PPCISD::QVALIGNI";
1401 case PPCISD::QVESPLATI
: return "PPCISD::QVESPLATI";
1402 case PPCISD::QBFLT
: return "PPCISD::QBFLT";
1403 case PPCISD::QVLFSb
: return "PPCISD::QVLFSb";
1404 case PPCISD::BUILD_FP128
: return "PPCISD::BUILD_FP128";
1405 case PPCISD::BUILD_SPE64
: return "PPCISD::BUILD_SPE64";
1406 case PPCISD::EXTRACT_SPE
: return "PPCISD::EXTRACT_SPE";
1407 case PPCISD::EXTSWSLI
: return "PPCISD::EXTSWSLI";
1408 case PPCISD::LD_VSX_LH
: return "PPCISD::LD_VSX_LH";
1409 case PPCISD::FP_EXTEND_LH
: return "PPCISD::FP_EXTEND_LH";
1414 EVT
PPCTargetLowering::getSetCCResultType(const DataLayout
&DL
, LLVMContext
&C
,
1417 return Subtarget
.useCRBits() ? MVT::i1
: MVT::i32
;
1419 if (Subtarget
.hasQPX())
1420 return EVT::getVectorVT(C
, MVT::i1
, VT
.getVectorNumElements());
1422 return VT
.changeVectorElementTypeToInteger();
1425 bool PPCTargetLowering::enableAggressiveFMAFusion(EVT VT
) const {
1426 assert(VT
.isFloatingPoint() && "Non-floating-point FMA?");
1430 //===----------------------------------------------------------------------===//
1431 // Node matching predicates, for use by the tblgen matching code.
1432 //===----------------------------------------------------------------------===//
1434 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1435 static bool isFloatingPointZero(SDValue Op
) {
1436 if (ConstantFPSDNode
*CFP
= dyn_cast
<ConstantFPSDNode
>(Op
))
1437 return CFP
->getValueAPF().isZero();
1438 else if (ISD::isEXTLoad(Op
.getNode()) || ISD::isNON_EXTLoad(Op
.getNode())) {
1439 // Maybe this has already been legalized into the constant pool?
1440 if (ConstantPoolSDNode
*CP
= dyn_cast
<ConstantPoolSDNode
>(Op
.getOperand(1)))
1441 if (const ConstantFP
*CFP
= dyn_cast
<ConstantFP
>(CP
->getConstVal()))
1442 return CFP
->getValueAPF().isZero();
1447 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1448 /// true if Op is undef or if it matches the specified value.
1449 static bool isConstantOrUndef(int Op
, int Val
) {
1450 return Op
< 0 || Op
== Val
;
1453 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1454 /// VPKUHUM instruction.
1455 /// The ShuffleKind distinguishes between big-endian operations with
1456 /// two different inputs (0), either-endian operations with two identical
1457 /// inputs (1), and little-endian operations with two different inputs (2).
1458 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1459 bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1460 SelectionDAG
&DAG
) {
1461 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1462 if (ShuffleKind
== 0) {
1465 for (unsigned i
= 0; i
!= 16; ++i
)
1466 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+1))
1468 } else if (ShuffleKind
== 2) {
1471 for (unsigned i
= 0; i
!= 16; ++i
)
1472 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2))
1474 } else if (ShuffleKind
== 1) {
1475 unsigned j
= IsLE
? 0 : 1;
1476 for (unsigned i
= 0; i
!= 8; ++i
)
1477 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1478 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
))
1484 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1485 /// VPKUWUM instruction.
1486 /// The ShuffleKind distinguishes between big-endian operations with
1487 /// two different inputs (0), either-endian operations with two identical
1488 /// inputs (1), and little-endian operations with two different inputs (2).
1489 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1490 bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1491 SelectionDAG
&DAG
) {
1492 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1493 if (ShuffleKind
== 0) {
1496 for (unsigned i
= 0; i
!= 16; i
+= 2)
1497 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+2) ||
1498 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+3))
1500 } else if (ShuffleKind
== 2) {
1503 for (unsigned i
= 0; i
!= 16; i
+= 2)
1504 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1505 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1))
1507 } else if (ShuffleKind
== 1) {
1508 unsigned j
= IsLE
? 0 : 2;
1509 for (unsigned i
= 0; i
!= 8; i
+= 2)
1510 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1511 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1512 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1513 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1))
1519 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1520 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1521 /// current subtarget.
1523 /// The ShuffleKind distinguishes between big-endian operations with
1524 /// two different inputs (0), either-endian operations with two identical
1525 /// inputs (1), and little-endian operations with two different inputs (2).
1526 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1527 bool PPC::isVPKUDUMShuffleMask(ShuffleVectorSDNode
*N
, unsigned ShuffleKind
,
1528 SelectionDAG
&DAG
) {
1529 const PPCSubtarget
& Subtarget
=
1530 static_cast<const PPCSubtarget
&>(DAG
.getSubtarget());
1531 if (!Subtarget
.hasP8Vector())
1534 bool IsLE
= DAG
.getDataLayout().isLittleEndian();
1535 if (ShuffleKind
== 0) {
1538 for (unsigned i
= 0; i
!= 16; i
+= 4)
1539 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+4) ||
1540 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+5) ||
1541 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+6) ||
1542 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+7))
1544 } else if (ShuffleKind
== 2) {
1547 for (unsigned i
= 0; i
!= 16; i
+= 4)
1548 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2) ||
1549 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+1) ||
1550 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+2) ||
1551 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+3))
1553 } else if (ShuffleKind
== 1) {
1554 unsigned j
= IsLE
? 0 : 4;
1555 for (unsigned i
= 0; i
!= 8; i
+= 4)
1556 if (!isConstantOrUndef(N
->getMaskElt(i
), i
*2+j
) ||
1557 !isConstantOrUndef(N
->getMaskElt(i
+1), i
*2+j
+1) ||
1558 !isConstantOrUndef(N
->getMaskElt(i
+2), i
*2+j
+2) ||
1559 !isConstantOrUndef(N
->getMaskElt(i
+3), i
*2+j
+3) ||
1560 !isConstantOrUndef(N
->getMaskElt(i
+8), i
*2+j
) ||
1561 !isConstantOrUndef(N
->getMaskElt(i
+9), i
*2+j
+1) ||
1562 !isConstantOrUndef(N
->getMaskElt(i
+10), i
*2+j
+2) ||
1563 !isConstantOrUndef(N
->getMaskElt(i
+11), i
*2+j
+3))
1569 /// isVMerge - Common function, used to match vmrg* shuffles.
1571 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1572 unsigned LHSStart
, unsigned RHSStart
) {
1573 if (N
->getValueType(0) != MVT::v16i8
)
1575 assert((UnitSize
== 1 || UnitSize
== 2 || UnitSize
== 4) &&
1576 "Unsupported merge size!");
1578 for (unsigned i
= 0; i
!= 8/UnitSize
; ++i
) // Step over units
1579 for (unsigned j
= 0; j
!= UnitSize
; ++j
) { // Step over bytes within unit
1580 if (!isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+j
),
1581 LHSStart
+j
+i
*UnitSize
) ||
1582 !isConstantOrUndef(N
->getMaskElt(i
*UnitSize
*2+UnitSize
+j
),
1583 RHSStart
+j
+i
*UnitSize
))
1589 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1590 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1591 /// The ShuffleKind distinguishes between big-endian merges with two
1592 /// different inputs (0), either-endian merges with two identical inputs (1),
1593 /// and little-endian merges with two different inputs (2). For the latter,
1594 /// the input operands are swapped (see PPCInstrAltivec.td).
1595 bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1596 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1597 if (DAG
.getDataLayout().isLittleEndian()) {
1598 if (ShuffleKind
== 1) // unary
1599 return isVMerge(N
, UnitSize
, 0, 0);
1600 else if (ShuffleKind
== 2) // swapped
1601 return isVMerge(N
, UnitSize
, 0, 16);
1605 if (ShuffleKind
== 1) // unary
1606 return isVMerge(N
, UnitSize
, 8, 8);
1607 else if (ShuffleKind
== 0) // normal
1608 return isVMerge(N
, UnitSize
, 8, 24);
1614 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1615 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1616 /// The ShuffleKind distinguishes between big-endian merges with two
1617 /// different inputs (0), either-endian merges with two identical inputs (1),
1618 /// and little-endian merges with two different inputs (2). For the latter,
1619 /// the input operands are swapped (see PPCInstrAltivec.td).
1620 bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode
*N
, unsigned UnitSize
,
1621 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1622 if (DAG
.getDataLayout().isLittleEndian()) {
1623 if (ShuffleKind
== 1) // unary
1624 return isVMerge(N
, UnitSize
, 8, 8);
1625 else if (ShuffleKind
== 2) // swapped
1626 return isVMerge(N
, UnitSize
, 8, 24);
1630 if (ShuffleKind
== 1) // unary
1631 return isVMerge(N
, UnitSize
, 0, 0);
1632 else if (ShuffleKind
== 0) // normal
1633 return isVMerge(N
, UnitSize
, 0, 16);
1640 * Common function used to match vmrgew and vmrgow shuffles
1642 * The indexOffset determines whether to look for even or odd words in
1643 * the shuffle mask. This is based on the of the endianness of the target
1646 * - Use offset of 0 to check for odd elements
1647 * - Use offset of 4 to check for even elements
1649 * - Use offset of 0 to check for even elements
1650 * - Use offset of 4 to check for odd elements
1651 * A detailed description of the vector element ordering for little endian and
1652 * big endian can be found at
1653 * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1654 * Targeting your applications - what little endian and big endian IBM XL C/C++
1655 * compiler differences mean to you
1657 * The mask to the shuffle vector instruction specifies the indices of the
1658 * elements from the two input vectors to place in the result. The elements are
1659 * numbered in array-access order, starting with the first vector. These vectors
1660 * are always of type v16i8, thus each vector will contain 16 elements of size
1661 * 8. More info on the shuffle vector can be found in the
1662 * http://llvm.org/docs/LangRef.html#shufflevector-instruction
1663 * Language Reference.
1665 * The RHSStartValue indicates whether the same input vectors are used (unary)
1666 * or two different input vectors are used, based on the following:
1667 * - If the instruction uses the same vector for both inputs, the range of the
1668 * indices will be 0 to 15. In this case, the RHSStart value passed should
1670 * - If the instruction has two different vectors then the range of the
1671 * indices will be 0 to 31. In this case, the RHSStart value passed should
1672 * be 16 (indices 0-15 specify elements in the first vector while indices 16
1673 * to 31 specify elements in the second vector).
1675 * \param[in] N The shuffle vector SD Node to analyze
1676 * \param[in] IndexOffset Specifies whether to look for even or odd elements
1677 * \param[in] RHSStartValue Specifies the starting index for the righthand input
1678 * vector to the shuffle_vector instruction
1679 * \return true iff this shuffle vector represents an even or odd word merge
1681 static bool isVMerge(ShuffleVectorSDNode
*N
, unsigned IndexOffset
,
1682 unsigned RHSStartValue
) {
1683 if (N
->getValueType(0) != MVT::v16i8
)
1686 for (unsigned i
= 0; i
< 2; ++i
)
1687 for (unsigned j
= 0; j
< 4; ++j
)
1688 if (!isConstantOrUndef(N
->getMaskElt(i
*4+j
),
1689 i
*RHSStartValue
+j
+IndexOffset
) ||
1690 !isConstantOrUndef(N
->getMaskElt(i
*4+j
+8),
1691 i
*RHSStartValue
+j
+IndexOffset
+8))
1697 * Determine if the specified shuffle mask is suitable for the vmrgew or
1698 * vmrgow instructions.
1700 * \param[in] N The shuffle vector SD Node to analyze
1701 * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
1702 * \param[in] ShuffleKind Identify the type of merge:
1703 * - 0 = big-endian merge with two different inputs;
1704 * - 1 = either-endian merge with two identical inputs;
1705 * - 2 = little-endian merge with two different inputs (inputs are swapped for
1706 * little-endian merges).
1707 * \param[in] DAG The current SelectionDAG
1708 * \return true iff this shuffle mask
1710 bool PPC::isVMRGEOShuffleMask(ShuffleVectorSDNode
*N
, bool CheckEven
,
1711 unsigned ShuffleKind
, SelectionDAG
&DAG
) {
1712 if (DAG
.getDataLayout().isLittleEndian()) {
1713 unsigned indexOffset
= CheckEven
? 4 : 0;
1714 if (ShuffleKind
== 1) // Unary
1715 return isVMerge(N
, indexOffset
, 0);
1716 else if (ShuffleKind
== 2) // swapped
1717 return isVMerge(N
, indexOffset
, 16);
1722 unsigned indexOffset
= CheckEven
? 0 : 4;
1723 if (ShuffleKind
== 1) // Unary
1724 return isVMerge(N
, indexOffset
, 0);
1725 else if (ShuffleKind
== 0) // Normal
1726 return isVMerge(N
, indexOffset
, 16);
1733 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
1734 /// amount, otherwise return -1.
1735 /// The ShuffleKind distinguishes between big-endian operations with two
1736 /// different inputs (0), either-endian operations with two identical inputs
1737 /// (1), and little-endian operations with two different inputs (2). For the
1738 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
1739 int PPC::isVSLDOIShuffleMask(SDNode
*N
, unsigned ShuffleKind
,
1740 SelectionDAG
&DAG
) {
1741 if (N
->getValueType(0) != MVT::v16i8
)
1744 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
1746 // Find the first non-undef value in the shuffle mask.
1748 for (i
= 0; i
!= 16 && SVOp
->getMaskElt(i
) < 0; ++i
)
1751 if (i
== 16) return -1; // all undef.
1753 // Otherwise, check to see if the rest of the elements are consecutively
1754 // numbered from this value.
1755 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
1756 if (ShiftAmt
< i
) return -1;
1759 bool isLE
= DAG
.getDataLayout().isLittleEndian();
1761 if ((ShuffleKind
== 0 && !isLE
) || (ShuffleKind
== 2 && isLE
)) {
1762 // Check the rest of the elements to see if they are consecutive.
1763 for (++i
; i
!= 16; ++i
)
1764 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
1766 } else if (ShuffleKind
== 1) {
1767 // Check the rest of the elements to see if they are consecutive.
1768 for (++i
; i
!= 16; ++i
)
1769 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), (ShiftAmt
+i
) & 15))
1775 ShiftAmt
= 16 - ShiftAmt
;
1780 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
1781 /// specifies a splat of a single element that is suitable for input to
1782 /// VSPLTB/VSPLTH/VSPLTW.
1783 bool PPC::isSplatShuffleMask(ShuffleVectorSDNode
*N
, unsigned EltSize
) {
1784 assert(N
->getValueType(0) == MVT::v16i8
&&
1785 (EltSize
== 1 || EltSize
== 2 || EltSize
== 4));
1787 // The consecutive indices need to specify an element, not part of two
1788 // different elements. So abandon ship early if this isn't the case.
1789 if (N
->getMaskElt(0) % EltSize
!= 0)
1792 // This is a splat operation if each element of the permute is the same, and
1793 // if the value doesn't reference the second vector.
1794 unsigned ElementBase
= N
->getMaskElt(0);
1796 // FIXME: Handle UNDEF elements too!
1797 if (ElementBase
>= 16)
1800 // Check that the indices are consecutive, in the case of a multi-byte element
1801 // splatted with a v16i8 mask.
1802 for (unsigned i
= 1; i
!= EltSize
; ++i
)
1803 if (N
->getMaskElt(i
) < 0 || N
->getMaskElt(i
) != (int)(i
+ElementBase
))
1806 for (unsigned i
= EltSize
, e
= 16; i
!= e
; i
+= EltSize
) {
1807 if (N
->getMaskElt(i
) < 0) continue;
1808 for (unsigned j
= 0; j
!= EltSize
; ++j
)
1809 if (N
->getMaskElt(i
+j
) != N
->getMaskElt(j
))
1815 /// Check that the mask is shuffling N byte elements. Within each N byte
1816 /// element of the mask, the indices could be either in increasing or
1817 /// decreasing order as long as they are consecutive.
1818 /// \param[in] N the shuffle vector SD Node to analyze
1819 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
1820 /// Word/DoubleWord/QuadWord).
1821 /// \param[in] StepLen the delta indices number among the N byte element, if
1822 /// the mask is in increasing/decreasing order then it is 1/-1.
1823 /// \return true iff the mask is shuffling N byte elements.
1824 static bool isNByteElemShuffleMask(ShuffleVectorSDNode
*N
, unsigned Width
,
1826 assert((Width
== 2 || Width
== 4 || Width
== 8 || Width
== 16) &&
1827 "Unexpected element width.");
1828 assert((StepLen
== 1 || StepLen
== -1) && "Unexpected element width.");
1830 unsigned NumOfElem
= 16 / Width
;
1831 unsigned MaskVal
[16]; // Width is never greater than 16
1832 for (unsigned i
= 0; i
< NumOfElem
; ++i
) {
1833 MaskVal
[0] = N
->getMaskElt(i
* Width
);
1834 if ((StepLen
== 1) && (MaskVal
[0] % Width
)) {
1836 } else if ((StepLen
== -1) && ((MaskVal
[0] + 1) % Width
)) {
1840 for (unsigned int j
= 1; j
< Width
; ++j
) {
1841 MaskVal
[j
] = N
->getMaskElt(i
* Width
+ j
);
1842 if (MaskVal
[j
] != MaskVal
[j
-1] + StepLen
) {
1851 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
1852 unsigned &InsertAtByte
, bool &Swap
, bool IsLE
) {
1853 if (!isNByteElemShuffleMask(N
, 4, 1))
1856 // Now we look at mask elements 0,4,8,12
1857 unsigned M0
= N
->getMaskElt(0) / 4;
1858 unsigned M1
= N
->getMaskElt(4) / 4;
1859 unsigned M2
= N
->getMaskElt(8) / 4;
1860 unsigned M3
= N
->getMaskElt(12) / 4;
1861 unsigned LittleEndianShifts
[] = { 2, 1, 0, 3 };
1862 unsigned BigEndianShifts
[] = { 3, 0, 1, 2 };
1864 // Below, let H and L be arbitrary elements of the shuffle mask
1865 // where H is in the range [4,7] and L is in the range [0,3].
1866 // H, 1, 2, 3 or L, 5, 6, 7
1867 if ((M0
> 3 && M1
== 1 && M2
== 2 && M3
== 3) ||
1868 (M0
< 4 && M1
== 5 && M2
== 6 && M3
== 7)) {
1869 ShiftElts
= IsLE
? LittleEndianShifts
[M0
& 0x3] : BigEndianShifts
[M0
& 0x3];
1870 InsertAtByte
= IsLE
? 12 : 0;
1874 // 0, H, 2, 3 or 4, L, 6, 7
1875 if ((M1
> 3 && M0
== 0 && M2
== 2 && M3
== 3) ||
1876 (M1
< 4 && M0
== 4 && M2
== 6 && M3
== 7)) {
1877 ShiftElts
= IsLE
? LittleEndianShifts
[M1
& 0x3] : BigEndianShifts
[M1
& 0x3];
1878 InsertAtByte
= IsLE
? 8 : 4;
1882 // 0, 1, H, 3 or 4, 5, L, 7
1883 if ((M2
> 3 && M0
== 0 && M1
== 1 && M3
== 3) ||
1884 (M2
< 4 && M0
== 4 && M1
== 5 && M3
== 7)) {
1885 ShiftElts
= IsLE
? LittleEndianShifts
[M2
& 0x3] : BigEndianShifts
[M2
& 0x3];
1886 InsertAtByte
= IsLE
? 4 : 8;
1890 // 0, 1, 2, H or 4, 5, 6, L
1891 if ((M3
> 3 && M0
== 0 && M1
== 1 && M2
== 2) ||
1892 (M3
< 4 && M0
== 4 && M1
== 5 && M2
== 6)) {
1893 ShiftElts
= IsLE
? LittleEndianShifts
[M3
& 0x3] : BigEndianShifts
[M3
& 0x3];
1894 InsertAtByte
= IsLE
? 0 : 12;
1899 // If both vector operands for the shuffle are the same vector, the mask will
1900 // contain only elements from the first one and the second one will be undef.
1901 if (N
->getOperand(1).isUndef()) {
1904 unsigned XXINSERTWSrcElem
= IsLE
? 2 : 1;
1905 if (M0
== XXINSERTWSrcElem
&& M1
== 1 && M2
== 2 && M3
== 3) {
1906 InsertAtByte
= IsLE
? 12 : 0;
1909 if (M0
== 0 && M1
== XXINSERTWSrcElem
&& M2
== 2 && M3
== 3) {
1910 InsertAtByte
= IsLE
? 8 : 4;
1913 if (M0
== 0 && M1
== 1 && M2
== XXINSERTWSrcElem
&& M3
== 3) {
1914 InsertAtByte
= IsLE
? 4 : 8;
1917 if (M0
== 0 && M1
== 1 && M2
== 2 && M3
== XXINSERTWSrcElem
) {
1918 InsertAtByte
= IsLE
? 0 : 12;
1926 bool PPC::isXXSLDWIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &ShiftElts
,
1927 bool &Swap
, bool IsLE
) {
1928 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
1929 // Ensure each byte index of the word is consecutive.
1930 if (!isNByteElemShuffleMask(N
, 4, 1))
1933 // Now we look at mask elements 0,4,8,12, which are the beginning of words.
1934 unsigned M0
= N
->getMaskElt(0) / 4;
1935 unsigned M1
= N
->getMaskElt(4) / 4;
1936 unsigned M2
= N
->getMaskElt(8) / 4;
1937 unsigned M3
= N
->getMaskElt(12) / 4;
1939 // If both vector operands for the shuffle are the same vector, the mask will
1940 // contain only elements from the first one and the second one will be undef.
1941 if (N
->getOperand(1).isUndef()) {
1942 assert(M0
< 4 && "Indexing into an undef vector?");
1943 if (M1
!= (M0
+ 1) % 4 || M2
!= (M1
+ 1) % 4 || M3
!= (M2
+ 1) % 4)
1946 ShiftElts
= IsLE
? (4 - M0
) % 4 : M0
;
1951 // Ensure each word index of the ShuffleVector Mask is consecutive.
1952 if (M1
!= (M0
+ 1) % 8 || M2
!= (M1
+ 1) % 8 || M3
!= (M2
+ 1) % 8)
1956 if (M0
== 0 || M0
== 7 || M0
== 6 || M0
== 5) {
1957 // Input vectors don't need to be swapped if the leading element
1958 // of the result is one of the 3 left elements of the second vector
1959 // (or if there is no shift to be done at all).
1961 ShiftElts
= (8 - M0
) % 8;
1962 } else if (M0
== 4 || M0
== 3 || M0
== 2 || M0
== 1) {
1963 // Input vectors need to be swapped if the leading element
1964 // of the result is one of the 3 left elements of the first vector
1965 // (or if we're shifting by 4 - thereby simply swapping the vectors).
1967 ShiftElts
= (4 - M0
) % 4;
1972 if (M0
== 0 || M0
== 1 || M0
== 2 || M0
== 3) {
1973 // Input vectors don't need to be swapped if the leading element
1974 // of the result is one of the 4 elements of the first vector.
1977 } else if (M0
== 4 || M0
== 5 || M0
== 6 || M0
== 7) {
1978 // Input vectors need to be swapped if the leading element
1979 // of the result is one of the 4 elements of the right vector.
1988 bool static isXXBRShuffleMaskHelper(ShuffleVectorSDNode
*N
, int Width
) {
1989 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
1991 if (!isNByteElemShuffleMask(N
, Width
, -1))
1994 for (int i
= 0; i
< 16; i
+= Width
)
1995 if (N
->getMaskElt(i
) != i
+ Width
- 1)
2001 bool PPC::isXXBRHShuffleMask(ShuffleVectorSDNode
*N
) {
2002 return isXXBRShuffleMaskHelper(N
, 2);
2005 bool PPC::isXXBRWShuffleMask(ShuffleVectorSDNode
*N
) {
2006 return isXXBRShuffleMaskHelper(N
, 4);
2009 bool PPC::isXXBRDShuffleMask(ShuffleVectorSDNode
*N
) {
2010 return isXXBRShuffleMaskHelper(N
, 8);
2013 bool PPC::isXXBRQShuffleMask(ShuffleVectorSDNode
*N
) {
2014 return isXXBRShuffleMaskHelper(N
, 16);
2017 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2018 /// if the inputs to the instruction should be swapped and set \p DM to the
2019 /// value for the immediate.
2020 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2021 /// AND element 0 of the result comes from the first input (LE) or second input
2022 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2023 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2025 bool PPC::isXXPERMDIShuffleMask(ShuffleVectorSDNode
*N
, unsigned &DM
,
2026 bool &Swap
, bool IsLE
) {
2027 assert(N
->getValueType(0) == MVT::v16i8
&& "Shuffle vector expects v16i8");
2029 // Ensure each byte index of the double word is consecutive.
2030 if (!isNByteElemShuffleMask(N
, 8, 1))
2033 unsigned M0
= N
->getMaskElt(0) / 8;
2034 unsigned M1
= N
->getMaskElt(8) / 8;
2035 assert(((M0
| M1
) < 4) && "A mask element out of bounds?");
2037 // If both vector operands for the shuffle are the same vector, the mask will
2038 // contain only elements from the first one and the second one will be undef.
2039 if (N
->getOperand(1).isUndef()) {
2040 if ((M0
| M1
) < 2) {
2041 DM
= IsLE
? (((~M1
) & 1) << 1) + ((~M0
) & 1) : (M0
<< 1) + (M1
& 1);
2049 if (M0
> 1 && M1
< 2) {
2051 } else if (M0
< 2 && M1
> 1) {
2058 // Note: if control flow comes here that means Swap is already set above
2059 DM
= (((~M1
) & 1) << 1) + ((~M0
) & 1);
2062 if (M0
< 2 && M1
> 1) {
2064 } else if (M0
> 1 && M1
< 2) {
2071 // Note: if control flow comes here that means Swap is already set above
2072 DM
= (M0
<< 1) + (M1
& 1);
2078 /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the
2079 /// specified isSplatShuffleMask VECTOR_SHUFFLE mask.
2080 unsigned PPC::getVSPLTImmediate(SDNode
*N
, unsigned EltSize
,
2081 SelectionDAG
&DAG
) {
2082 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2083 assert(isSplatShuffleMask(SVOp
, EltSize
));
2084 if (DAG
.getDataLayout().isLittleEndian())
2085 return (16 / EltSize
) - 1 - (SVOp
->getMaskElt(0) / EltSize
);
2087 return SVOp
->getMaskElt(0) / EltSize
;
2090 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2091 /// by using a vspltis[bhw] instruction of the specified element size, return
2092 /// the constant being splatted. The ByteSize field indicates the number of
2093 /// bytes of each element [124] -> [bhw].
2094 SDValue
PPC::get_VSPLTI_elt(SDNode
*N
, unsigned ByteSize
, SelectionDAG
&DAG
) {
2095 SDValue
OpVal(nullptr, 0);
2097 // If ByteSize of the splat is bigger than the element size of the
2098 // build_vector, then we have a case where we are checking for a splat where
2099 // multiple elements of the buildvector are folded together into a single
2100 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2101 unsigned EltSize
= 16/N
->getNumOperands();
2102 if (EltSize
< ByteSize
) {
2103 unsigned Multiple
= ByteSize
/EltSize
; // Number of BV entries per spltval.
2104 SDValue UniquedVals
[4];
2105 assert(Multiple
> 1 && Multiple
<= 4 && "How can this happen?");
2107 // See if all of the elements in the buildvector agree across.
2108 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2109 if (N
->getOperand(i
).isUndef()) continue;
2110 // If the element isn't a constant, bail fully out.
2111 if (!isa
<ConstantSDNode
>(N
->getOperand(i
))) return SDValue();
2113 if (!UniquedVals
[i
&(Multiple
-1)].getNode())
2114 UniquedVals
[i
&(Multiple
-1)] = N
->getOperand(i
);
2115 else if (UniquedVals
[i
&(Multiple
-1)] != N
->getOperand(i
))
2116 return SDValue(); // no match.
2119 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2120 // either constant or undef values that are identical for each chunk. See
2121 // if these chunks can form into a larger vspltis*.
2123 // Check to see if all of the leading entries are either 0 or -1. If
2124 // neither, then this won't fit into the immediate field.
2125 bool LeadingZero
= true;
2126 bool LeadingOnes
= true;
2127 for (unsigned i
= 0; i
!= Multiple
-1; ++i
) {
2128 if (!UniquedVals
[i
].getNode()) continue; // Must have been undefs.
2130 LeadingZero
&= isNullConstant(UniquedVals
[i
]);
2131 LeadingOnes
&= isAllOnesConstant(UniquedVals
[i
]);
2133 // Finally, check the least significant entry.
2135 if (!UniquedVals
[Multiple
-1].getNode())
2136 return DAG
.getTargetConstant(0, SDLoc(N
), MVT::i32
); // 0,0,0,undef
2137 int Val
= cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getZExtValue();
2138 if (Val
< 16) // 0,0,0,4 -> vspltisw(4)
2139 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2142 if (!UniquedVals
[Multiple
-1].getNode())
2143 return DAG
.getTargetConstant(~0U, SDLoc(N
), MVT::i32
); // -1,-1,-1,undef
2144 int Val
=cast
<ConstantSDNode
>(UniquedVals
[Multiple
-1])->getSExtValue();
2145 if (Val
>= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2146 return DAG
.getTargetConstant(Val
, SDLoc(N
), MVT::i32
);
2152 // Check to see if this buildvec has a single non-undef value in its elements.
2153 for (unsigned i
= 0, e
= N
->getNumOperands(); i
!= e
; ++i
) {
2154 if (N
->getOperand(i
).isUndef()) continue;
2155 if (!OpVal
.getNode())
2156 OpVal
= N
->getOperand(i
);
2157 else if (OpVal
!= N
->getOperand(i
))
2161 if (!OpVal
.getNode()) return SDValue(); // All UNDEF: use implicit def.
2163 unsigned ValSizeInBytes
= EltSize
;
2165 if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(OpVal
)) {
2166 Value
= CN
->getZExtValue();
2167 } else if (ConstantFPSDNode
*CN
= dyn_cast
<ConstantFPSDNode
>(OpVal
)) {
2168 assert(CN
->getValueType(0) == MVT::f32
&& "Only one legal FP vector type!");
2169 Value
= FloatToBits(CN
->getValueAPF().convertToFloat());
2172 // If the splat value is larger than the element value, then we can never do
2173 // this splat. The only case that we could fit the replicated bits into our
2174 // immediate field for would be zero, and we prefer to use vxor for it.
2175 if (ValSizeInBytes
< ByteSize
) return SDValue();
2177 // If the element value is larger than the splat value, check if it consists
2178 // of a repeated bit pattern of size ByteSize.
2179 if (!APInt(ValSizeInBytes
* 8, Value
).isSplat(ByteSize
* 8))
2182 // Properly sign extend the value.
2183 int MaskVal
= SignExtend32(Value
, ByteSize
* 8);
2185 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2186 if (MaskVal
== 0) return SDValue();
2188 // Finally, if this value fits in a 5 bit sext field, return it
2189 if (SignExtend32
<5>(MaskVal
) == MaskVal
)
2190 return DAG
.getTargetConstant(MaskVal
, SDLoc(N
), MVT::i32
);
2194 /// isQVALIGNIShuffleMask - If this is a qvaligni shuffle mask, return the shift
2195 /// amount, otherwise return -1.
2196 int PPC::isQVALIGNIShuffleMask(SDNode
*N
) {
2197 EVT VT
= N
->getValueType(0);
2198 if (VT
!= MVT::v4f64
&& VT
!= MVT::v4f32
&& VT
!= MVT::v4i1
)
2201 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(N
);
2203 // Find the first non-undef value in the shuffle mask.
2205 for (i
= 0; i
!= 4 && SVOp
->getMaskElt(i
) < 0; ++i
)
2208 if (i
== 4) return -1; // all undef.
2210 // Otherwise, check to see if the rest of the elements are consecutively
2211 // numbered from this value.
2212 unsigned ShiftAmt
= SVOp
->getMaskElt(i
);
2213 if (ShiftAmt
< i
) return -1;
2216 // Check the rest of the elements to see if they are consecutive.
2217 for (++i
; i
!= 4; ++i
)
2218 if (!isConstantOrUndef(SVOp
->getMaskElt(i
), ShiftAmt
+i
))
2224 //===----------------------------------------------------------------------===//
2225 // Addressing Mode Selection
2226 //===----------------------------------------------------------------------===//
2228 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2229 /// or 64-bit immediate, and if the value can be accurately represented as a
2230 /// sign extension from a 16-bit value. If so, this returns true and the
2232 bool llvm::isIntS16Immediate(SDNode
*N
, int16_t &Imm
) {
2233 if (!isa
<ConstantSDNode
>(N
))
2236 Imm
= (int16_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2237 if (N
->getValueType(0) == MVT::i32
)
2238 return Imm
== (int32_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2240 return Imm
== (int64_t)cast
<ConstantSDNode
>(N
)->getZExtValue();
2242 bool llvm::isIntS16Immediate(SDValue Op
, int16_t &Imm
) {
2243 return isIntS16Immediate(Op
.getNode(), Imm
);
2247 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2248 /// be represented as an indexed [r+r] operation.
2249 bool PPCTargetLowering::SelectAddressEVXRegReg(SDValue N
, SDValue
&Base
,
2251 SelectionDAG
&DAG
) const {
2252 for (SDNode::use_iterator UI
= N
->use_begin(), E
= N
->use_end();
2254 if (MemSDNode
*Memop
= dyn_cast
<MemSDNode
>(*UI
)) {
2255 if (Memop
->getMemoryVT() == MVT::f64
) {
2256 Base
= N
.getOperand(0);
2257 Index
= N
.getOperand(1);
2265 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2266 /// can be represented as an indexed [r+r] operation. Returns false if it
2267 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2268 /// non-zero and N can be represented by a base register plus a signed 16-bit
2269 /// displacement, make a more precise judgement by checking (displacement % \p
2270 /// EncodingAlignment).
2271 bool PPCTargetLowering::SelectAddressRegReg(SDValue N
, SDValue
&Base
,
2272 SDValue
&Index
, SelectionDAG
&DAG
,
2273 unsigned EncodingAlignment
) const {
2275 if (N
.getOpcode() == ISD::ADD
) {
2276 // Is there any SPE load/store (f64), which can't handle 16bit offset?
2277 // SPE load/store can only handle 8-bit offsets.
2278 if (hasSPE() && SelectAddressEVXRegReg(N
, Base
, Index
, DAG
))
2280 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2281 (!EncodingAlignment
|| !(imm
% EncodingAlignment
)))
2282 return false; // r+i
2283 if (N
.getOperand(1).getOpcode() == PPCISD::Lo
)
2284 return false; // r+i
2286 Base
= N
.getOperand(0);
2287 Index
= N
.getOperand(1);
2289 } else if (N
.getOpcode() == ISD::OR
) {
2290 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2291 (!EncodingAlignment
|| !(imm
% EncodingAlignment
)))
2292 return false; // r+i can fold it if we can.
2294 // If this is an or of disjoint bitfields, we can codegen this as an add
2295 // (for better address arithmetic) if the LHS and RHS of the OR are provably
2297 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2299 if (LHSKnown
.Zero
.getBoolValue()) {
2300 KnownBits RHSKnown
= DAG
.computeKnownBits(N
.getOperand(1));
2301 // If all of the bits are known zero on the LHS or RHS, the add won't
2303 if (~(LHSKnown
.Zero
| RHSKnown
.Zero
) == 0) {
2304 Base
= N
.getOperand(0);
2305 Index
= N
.getOperand(1);
2314 // If we happen to be doing an i64 load or store into a stack slot that has
2315 // less than a 4-byte alignment, then the frame-index elimination may need to
2316 // use an indexed load or store instruction (because the offset may not be a
2317 // multiple of 4). The extra register needed to hold the offset comes from the
2318 // register scavenger, and it is possible that the scavenger will need to use
2319 // an emergency spill slot. As a result, we need to make sure that a spill slot
2320 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2322 static void fixupFuncForFI(SelectionDAG
&DAG
, int FrameIdx
, EVT VT
) {
2323 // FIXME: This does not handle the LWA case.
2327 // NOTE: We'll exclude negative FIs here, which come from argument
2328 // lowering, because there are no known test cases triggering this problem
2329 // using packed structures (or similar). We can remove this exclusion if
2330 // we find such a test case. The reason why this is so test-case driven is
2331 // because this entire 'fixup' is only to prevent crashes (from the
2332 // register scavenger) on not-really-valid inputs. For example, if we have:
2334 // %b = bitcast i1* %a to i64*
2335 // store i64* a, i64 b
2336 // then the store should really be marked as 'align 1', but is not. If it
2337 // were marked as 'align 1' then the indexed form would have been
2338 // instruction-selected initially, and the problem this 'fixup' is preventing
2339 // won't happen regardless.
2343 MachineFunction
&MF
= DAG
.getMachineFunction();
2344 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
2346 unsigned Align
= MFI
.getObjectAlignment(FrameIdx
);
2350 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
2351 FuncInfo
->setHasNonRISpills();
2354 /// Returns true if the address N can be represented by a base register plus
2355 /// a signed 16-bit displacement [r+imm], and if it is not better
2356 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2357 /// displacements that are multiples of that value.
2358 bool PPCTargetLowering::SelectAddressRegImm(SDValue N
, SDValue
&Disp
,
2361 unsigned EncodingAlignment
) const {
2362 // FIXME dl should come from parent load or store, not from address
2364 // If this can be more profitably realized as r+r, fail.
2365 if (SelectAddressRegReg(N
, Disp
, Base
, DAG
, EncodingAlignment
))
2368 if (N
.getOpcode() == ISD::ADD
) {
2370 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2371 (!EncodingAlignment
|| (imm
% EncodingAlignment
) == 0)) {
2372 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2373 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2374 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2375 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2377 Base
= N
.getOperand(0);
2379 return true; // [r+i]
2380 } else if (N
.getOperand(1).getOpcode() == PPCISD::Lo
) {
2381 // Match LOAD (ADD (X, Lo(G))).
2382 assert(!cast
<ConstantSDNode
>(N
.getOperand(1).getOperand(1))->getZExtValue()
2383 && "Cannot handle constant offsets yet!");
2384 Disp
= N
.getOperand(1).getOperand(0); // The global address.
2385 assert(Disp
.getOpcode() == ISD::TargetGlobalAddress
||
2386 Disp
.getOpcode() == ISD::TargetGlobalTLSAddress
||
2387 Disp
.getOpcode() == ISD::TargetConstantPool
||
2388 Disp
.getOpcode() == ISD::TargetJumpTable
);
2389 Base
= N
.getOperand(0);
2390 return true; // [&g+r]
2392 } else if (N
.getOpcode() == ISD::OR
) {
2394 if (isIntS16Immediate(N
.getOperand(1), imm
) &&
2395 (!EncodingAlignment
|| (imm
% EncodingAlignment
) == 0)) {
2396 // If this is an or of disjoint bitfields, we can codegen this as an add
2397 // (for better address arithmetic) if the LHS and RHS of the OR are
2398 // provably disjoint.
2399 KnownBits LHSKnown
= DAG
.computeKnownBits(N
.getOperand(0));
2401 if ((LHSKnown
.Zero
.getZExtValue()|~(uint64_t)imm
) == ~0ULL) {
2402 // If all of the bits are known zero on the LHS or RHS, the add won't
2404 if (FrameIndexSDNode
*FI
=
2405 dyn_cast
<FrameIndexSDNode
>(N
.getOperand(0))) {
2406 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2407 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2409 Base
= N
.getOperand(0);
2411 Disp
= DAG
.getTargetConstant(imm
, dl
, N
.getValueType());
2415 } else if (ConstantSDNode
*CN
= dyn_cast
<ConstantSDNode
>(N
)) {
2416 // Loading from a constant address.
2418 // If this address fits entirely in a 16-bit sext immediate field, codegen
2421 if (isIntS16Immediate(CN
, Imm
) &&
2422 (!EncodingAlignment
|| (Imm
% EncodingAlignment
) == 0)) {
2423 Disp
= DAG
.getTargetConstant(Imm
, dl
, CN
->getValueType(0));
2424 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2425 CN
->getValueType(0));
2429 // Handle 32-bit sext immediates with LIS + addr mode.
2430 if ((CN
->getValueType(0) == MVT::i32
||
2431 (int64_t)CN
->getZExtValue() == (int)CN
->getZExtValue()) &&
2432 (!EncodingAlignment
|| (CN
->getZExtValue() % EncodingAlignment
) == 0)) {
2433 int Addr
= (int)CN
->getZExtValue();
2435 // Otherwise, break this down into an LIS + disp.
2436 Disp
= DAG
.getTargetConstant((short)Addr
, dl
, MVT::i32
);
2438 Base
= DAG
.getTargetConstant((Addr
- (signed short)Addr
) >> 16, dl
,
2440 unsigned Opc
= CN
->getValueType(0) == MVT::i32
? PPC::LIS
: PPC::LIS8
;
2441 Base
= SDValue(DAG
.getMachineNode(Opc
, dl
, CN
->getValueType(0), Base
), 0);
2446 Disp
= DAG
.getTargetConstant(0, dl
, getPointerTy(DAG
.getDataLayout()));
2447 if (FrameIndexSDNode
*FI
= dyn_cast
<FrameIndexSDNode
>(N
)) {
2448 Base
= DAG
.getTargetFrameIndex(FI
->getIndex(), N
.getValueType());
2449 fixupFuncForFI(DAG
, FI
->getIndex(), N
.getValueType());
2452 return true; // [r+0]
2455 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2456 /// represented as an indexed [r+r] operation.
2457 bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N
, SDValue
&Base
,
2459 SelectionDAG
&DAG
) const {
2460 // Check to see if we can easily represent this as an [r+r] address. This
2461 // will fail if it thinks that the address is more profitably represented as
2462 // reg+imm, e.g. where imm = 0.
2463 if (SelectAddressRegReg(N
, Base
, Index
, DAG
))
2466 // If the address is the result of an add, we will utilize the fact that the
2467 // address calculation includes an implicit add. However, we can reduce
2468 // register pressure if we do not materialize a constant just for use as the
2469 // index register. We only get rid of the add if it is not an add of a
2470 // value and a 16-bit signed constant and both have a single use.
2472 if (N
.getOpcode() == ISD::ADD
&&
2473 (!isIntS16Immediate(N
.getOperand(1), imm
) ||
2474 !N
.getOperand(1).hasOneUse() || !N
.getOperand(0).hasOneUse())) {
2475 Base
= N
.getOperand(0);
2476 Index
= N
.getOperand(1);
2480 // Otherwise, do it the hard way, using R0 as the base register.
2481 Base
= DAG
.getRegister(Subtarget
.isPPC64() ? PPC::ZERO8
: PPC::ZERO
,
2487 /// Returns true if we should use a direct load into vector instruction
2488 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2489 static bool usePartialVectorLoads(SDNode
*N
, const PPCSubtarget
& ST
) {
2491 // If there are any other uses other than scalar to vector, then we should
2492 // keep it as a scalar load -> direct move pattern to prevent multiple
2494 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
);
2498 EVT MemVT
= LD
->getMemoryVT();
2499 if (!MemVT
.isSimple())
2501 switch(MemVT
.getSimpleVT().SimpleTy
) {
2505 if (!ST
.hasP8Vector())
2510 if (!ST
.hasP9Vector())
2517 SDValue
LoadedVal(N
, 0);
2518 if (!LoadedVal
.hasOneUse())
2521 for (SDNode::use_iterator UI
= LD
->use_begin(), UE
= LD
->use_end();
2523 if (UI
.getUse().get().getResNo() == 0 &&
2524 UI
->getOpcode() != ISD::SCALAR_TO_VECTOR
)
2530 /// getPreIndexedAddressParts - returns true by value, base pointer and
2531 /// offset pointer and addressing mode by reference if the node's address
2532 /// can be legally represented as pre-indexed load / store address.
2533 bool PPCTargetLowering::getPreIndexedAddressParts(SDNode
*N
, SDValue
&Base
,
2535 ISD::MemIndexedMode
&AM
,
2536 SelectionDAG
&DAG
) const {
2537 if (DisablePPCPreinc
) return false;
2543 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
2544 Ptr
= LD
->getBasePtr();
2545 VT
= LD
->getMemoryVT();
2546 Alignment
= LD
->getAlignment();
2547 } else if (StoreSDNode
*ST
= dyn_cast
<StoreSDNode
>(N
)) {
2548 Ptr
= ST
->getBasePtr();
2549 VT
= ST
->getMemoryVT();
2550 Alignment
= ST
->getAlignment();
2555 // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2556 // instructions because we can fold these into a more efficient instruction
2557 // instead, (such as LXSD).
2558 if (isLoad
&& usePartialVectorLoads(N
, Subtarget
)) {
2562 // PowerPC doesn't have preinc load/store instructions for vectors (except
2563 // for QPX, which does have preinc r+r forms).
2564 if (VT
.isVector()) {
2565 if (!Subtarget
.hasQPX() || (VT
!= MVT::v4f64
&& VT
!= MVT::v4f32
)) {
2567 } else if (SelectAddressRegRegOnly(Ptr
, Offset
, Base
, DAG
)) {
2573 if (SelectAddressRegReg(Ptr
, Base
, Offset
, DAG
)) {
2574 // Common code will reject creating a pre-inc form if the base pointer
2575 // is a frame index, or if N is a store and the base pointer is either
2576 // the same as or a predecessor of the value being stored. Check for
2577 // those situations here, and try with swapped Base/Offset instead.
2580 if (isa
<FrameIndexSDNode
>(Base
) || isa
<RegisterSDNode
>(Base
))
2583 SDValue Val
= cast
<StoreSDNode
>(N
)->getValue();
2584 if (Val
== Base
|| Base
.getNode()->isPredecessorOf(Val
.getNode()))
2589 std::swap(Base
, Offset
);
2595 // LDU/STU can only handle immediates that are a multiple of 4.
2596 if (VT
!= MVT::i64
) {
2597 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, 0))
2600 // LDU/STU need an address with at least 4-byte alignment.
2604 if (!SelectAddressRegImm(Ptr
, Offset
, Base
, DAG
, 4))
2608 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(N
)) {
2609 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
2610 // sext i32 to i64 when addr mode is r+i.
2611 if (LD
->getValueType(0) == MVT::i64
&& LD
->getMemoryVT() == MVT::i32
&&
2612 LD
->getExtensionType() == ISD::SEXTLOAD
&&
2613 isa
<ConstantSDNode
>(Offset
))
2621 //===----------------------------------------------------------------------===//
2622 // LowerOperation implementation
2623 //===----------------------------------------------------------------------===//
2625 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
2626 /// and LoOpFlags to the target MO flags.
2627 static void getLabelAccessInfo(bool IsPIC
, const PPCSubtarget
&Subtarget
,
2628 unsigned &HiOpFlags
, unsigned &LoOpFlags
,
2629 const GlobalValue
*GV
= nullptr) {
2630 HiOpFlags
= PPCII::MO_HA
;
2631 LoOpFlags
= PPCII::MO_LO
;
2633 // Don't use the pic base if not in PIC relocation model.
2635 HiOpFlags
|= PPCII::MO_PIC_FLAG
;
2636 LoOpFlags
|= PPCII::MO_PIC_FLAG
;
2639 // If this is a reference to a global value that requires a non-lazy-ptr, make
2640 // sure that instruction lowering adds it.
2641 if (GV
&& Subtarget
.hasLazyResolverStub(GV
)) {
2642 HiOpFlags
|= PPCII::MO_NLP_FLAG
;
2643 LoOpFlags
|= PPCII::MO_NLP_FLAG
;
2645 if (GV
->hasHiddenVisibility()) {
2646 HiOpFlags
|= PPCII::MO_NLP_HIDDEN_FLAG
;
2647 LoOpFlags
|= PPCII::MO_NLP_HIDDEN_FLAG
;
2652 static SDValue
LowerLabelRef(SDValue HiPart
, SDValue LoPart
, bool isPIC
,
2653 SelectionDAG
&DAG
) {
2655 EVT PtrVT
= HiPart
.getValueType();
2656 SDValue Zero
= DAG
.getConstant(0, DL
, PtrVT
);
2658 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, DL
, PtrVT
, HiPart
, Zero
);
2659 SDValue Lo
= DAG
.getNode(PPCISD::Lo
, DL
, PtrVT
, LoPart
, Zero
);
2661 // With PIC, the first instruction is actually "GR+hi(&G)".
2663 Hi
= DAG
.getNode(ISD::ADD
, DL
, PtrVT
,
2664 DAG
.getNode(PPCISD::GlobalBaseReg
, DL
, PtrVT
), Hi
);
2666 // Generate non-pic code that has direct accesses to the constant pool.
2667 // The address of the global is just (hi(&g)+lo(&g)).
2668 return DAG
.getNode(ISD::ADD
, DL
, PtrVT
, Hi
, Lo
);
2671 static void setUsesTOCBasePtr(MachineFunction
&MF
) {
2672 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
2673 FuncInfo
->setUsesTOCBasePtr();
2676 static void setUsesTOCBasePtr(SelectionDAG
&DAG
) {
2677 setUsesTOCBasePtr(DAG
.getMachineFunction());
2680 SDValue
PPCTargetLowering::getTOCEntry(SelectionDAG
&DAG
, const SDLoc
&dl
,
2682 const bool Is64Bit
= Subtarget
.isPPC64();
2683 EVT VT
= Is64Bit
? MVT::i64
: MVT::i32
;
2684 SDValue Reg
= Is64Bit
? DAG
.getRegister(PPC::X2
, VT
)
2685 : Subtarget
.isAIXABI()
2686 ? DAG
.getRegister(PPC::R2
, VT
)
2687 : DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, VT
);
2688 SDValue Ops
[] = { GA
, Reg
};
2689 return DAG
.getMemIntrinsicNode(
2690 PPCISD::TOC_ENTRY
, dl
, DAG
.getVTList(VT
, MVT::Other
), Ops
, VT
,
2691 MachinePointerInfo::getGOT(DAG
.getMachineFunction()), 0,
2692 MachineMemOperand::MOLoad
);
2695 SDValue
PPCTargetLowering::LowerConstantPool(SDValue Op
,
2696 SelectionDAG
&DAG
) const {
2697 EVT PtrVT
= Op
.getValueType();
2698 ConstantPoolSDNode
*CP
= cast
<ConstantPoolSDNode
>(Op
);
2699 const Constant
*C
= CP
->getConstVal();
2701 // 64-bit SVR4 ABI code is always position-independent.
2702 // The actual address of the GlobalValue is stored in the TOC.
2703 if (Subtarget
.is64BitELFABI()) {
2704 setUsesTOCBasePtr(DAG
);
2705 SDValue GA
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0);
2706 return getTOCEntry(DAG
, SDLoc(CP
), GA
);
2709 unsigned MOHiFlag
, MOLoFlag
;
2710 bool IsPIC
= isPositionIndependent();
2711 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2713 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2714 SDValue GA
= DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(),
2715 PPCII::MO_PIC_FLAG
);
2716 return getTOCEntry(DAG
, SDLoc(CP
), GA
);
2720 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0, MOHiFlag
);
2722 DAG
.getTargetConstantPool(C
, PtrVT
, CP
->getAlignment(), 0, MOLoFlag
);
2723 return LowerLabelRef(CPIHi
, CPILo
, IsPIC
, DAG
);
2726 // For 64-bit PowerPC, prefer the more compact relative encodings.
2727 // This trades 32 bits per jump table entry for one or two instructions
2728 // on the jump site.
2729 unsigned PPCTargetLowering::getJumpTableEncoding() const {
2730 if (isJumpTableRelative())
2731 return MachineJumpTableInfo::EK_LabelDifference32
;
2733 return TargetLowering::getJumpTableEncoding();
2736 bool PPCTargetLowering::isJumpTableRelative() const {
2737 if (Subtarget
.isPPC64())
2739 return TargetLowering::isJumpTableRelative();
2742 SDValue
PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table
,
2743 SelectionDAG
&DAG
) const {
2744 if (!Subtarget
.isPPC64())
2745 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
2747 switch (getTargetMachine().getCodeModel()) {
2748 case CodeModel::Small
:
2749 case CodeModel::Medium
:
2750 return TargetLowering::getPICJumpTableRelocBase(Table
, DAG
);
2752 return DAG
.getNode(PPCISD::GlobalBaseReg
, SDLoc(),
2753 getPointerTy(DAG
.getDataLayout()));
2758 PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction
*MF
,
2760 MCContext
&Ctx
) const {
2761 if (!Subtarget
.isPPC64())
2762 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
2764 switch (getTargetMachine().getCodeModel()) {
2765 case CodeModel::Small
:
2766 case CodeModel::Medium
:
2767 return TargetLowering::getPICJumpTableRelocBaseExpr(MF
, JTI
, Ctx
);
2769 return MCSymbolRefExpr::create(MF
->getPICBaseSymbol(), Ctx
);
2773 SDValue
PPCTargetLowering::LowerJumpTable(SDValue Op
, SelectionDAG
&DAG
) const {
2774 EVT PtrVT
= Op
.getValueType();
2775 JumpTableSDNode
*JT
= cast
<JumpTableSDNode
>(Op
);
2777 // 64-bit SVR4 ABI code is always position-independent.
2778 // The actual address of the GlobalValue is stored in the TOC.
2779 if (Subtarget
.is64BitELFABI()) {
2780 setUsesTOCBasePtr(DAG
);
2781 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
);
2782 return getTOCEntry(DAG
, SDLoc(JT
), GA
);
2785 unsigned MOHiFlag
, MOLoFlag
;
2786 bool IsPIC
= isPositionIndependent();
2787 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2789 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2790 SDValue GA
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
,
2791 PPCII::MO_PIC_FLAG
);
2792 return getTOCEntry(DAG
, SDLoc(GA
), GA
);
2795 SDValue JTIHi
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOHiFlag
);
2796 SDValue JTILo
= DAG
.getTargetJumpTable(JT
->getIndex(), PtrVT
, MOLoFlag
);
2797 return LowerLabelRef(JTIHi
, JTILo
, IsPIC
, DAG
);
2800 SDValue
PPCTargetLowering::LowerBlockAddress(SDValue Op
,
2801 SelectionDAG
&DAG
) const {
2802 EVT PtrVT
= Op
.getValueType();
2803 BlockAddressSDNode
*BASDN
= cast
<BlockAddressSDNode
>(Op
);
2804 const BlockAddress
*BA
= BASDN
->getBlockAddress();
2806 // 64-bit SVR4 ABI code is always position-independent.
2807 // The actual BlockAddress is stored in the TOC.
2808 if (Subtarget
.is64BitELFABI()) {
2809 setUsesTOCBasePtr(DAG
);
2810 SDValue GA
= DAG
.getTargetBlockAddress(BA
, PtrVT
, BASDN
->getOffset());
2811 return getTOCEntry(DAG
, SDLoc(BASDN
), GA
);
2814 // 32-bit position-independent ELF stores the BlockAddress in the .got.
2815 if (Subtarget
.is32BitELFABI() && isPositionIndependent())
2818 DAG
.getTargetBlockAddress(BA
, PtrVT
, BASDN
->getOffset()));
2820 unsigned MOHiFlag
, MOLoFlag
;
2821 bool IsPIC
= isPositionIndependent();
2822 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
);
2823 SDValue TgtBAHi
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOHiFlag
);
2824 SDValue TgtBALo
= DAG
.getTargetBlockAddress(BA
, PtrVT
, 0, MOLoFlag
);
2825 return LowerLabelRef(TgtBAHi
, TgtBALo
, IsPIC
, DAG
);
2828 SDValue
PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op
,
2829 SelectionDAG
&DAG
) const {
2830 // FIXME: TLS addresses currently use medium model code sequences,
2831 // which is the most useful form. Eventually support for small and
2832 // large models could be added if users need it, at the cost of
2833 // additional complexity.
2834 GlobalAddressSDNode
*GA
= cast
<GlobalAddressSDNode
>(Op
);
2835 if (DAG
.getTarget().useEmulatedTLS())
2836 return LowerToTLSEmulatedModel(GA
, DAG
);
2839 const GlobalValue
*GV
= GA
->getGlobal();
2840 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
2841 bool is64bit
= Subtarget
.isPPC64();
2842 const Module
*M
= DAG
.getMachineFunction().getFunction().getParent();
2843 PICLevel::Level picLevel
= M
->getPICLevel();
2845 const TargetMachine
&TM
= getTargetMachine();
2846 TLSModel::Model Model
= TM
.getTLSModel(GV
);
2848 if (Model
== TLSModel::LocalExec
) {
2849 SDValue TGAHi
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2850 PPCII::MO_TPREL_HA
);
2851 SDValue TGALo
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2852 PPCII::MO_TPREL_LO
);
2853 SDValue TLSReg
= is64bit
? DAG
.getRegister(PPC::X13
, MVT::i64
)
2854 : DAG
.getRegister(PPC::R2
, MVT::i32
);
2856 SDValue Hi
= DAG
.getNode(PPCISD::Hi
, dl
, PtrVT
, TGAHi
, TLSReg
);
2857 return DAG
.getNode(PPCISD::Lo
, dl
, PtrVT
, TGALo
, Hi
);
2860 if (Model
== TLSModel::InitialExec
) {
2861 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2862 SDValue TGATLS
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0,
2866 setUsesTOCBasePtr(DAG
);
2867 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2868 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_GOT_TPREL_HA
, dl
,
2869 PtrVT
, GOTReg
, TGA
);
2871 if (!TM
.isPositionIndependent())
2872 GOTPtr
= DAG
.getNode(PPCISD::PPC32_GOT
, dl
, PtrVT
);
2873 else if (picLevel
== PICLevel::SmallPIC
)
2874 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2876 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2878 SDValue TPOffset
= DAG
.getNode(PPCISD::LD_GOT_TPREL_L
, dl
,
2879 PtrVT
, TGA
, GOTPtr
);
2880 return DAG
.getNode(PPCISD::ADD_TLS
, dl
, PtrVT
, TPOffset
, TGATLS
);
2883 if (Model
== TLSModel::GeneralDynamic
) {
2884 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2887 setUsesTOCBasePtr(DAG
);
2888 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2889 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSGD_HA
, dl
, PtrVT
,
2892 if (picLevel
== PICLevel::SmallPIC
)
2893 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2895 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2897 return DAG
.getNode(PPCISD::ADDI_TLSGD_L_ADDR
, dl
, PtrVT
,
2901 if (Model
== TLSModel::LocalDynamic
) {
2902 SDValue TGA
= DAG
.getTargetGlobalAddress(GV
, dl
, PtrVT
, 0, 0);
2905 setUsesTOCBasePtr(DAG
);
2906 SDValue GOTReg
= DAG
.getRegister(PPC::X2
, MVT::i64
);
2907 GOTPtr
= DAG
.getNode(PPCISD::ADDIS_TLSLD_HA
, dl
, PtrVT
,
2910 if (picLevel
== PICLevel::SmallPIC
)
2911 GOTPtr
= DAG
.getNode(PPCISD::GlobalBaseReg
, dl
, PtrVT
);
2913 GOTPtr
= DAG
.getNode(PPCISD::PPC32_PICGOT
, dl
, PtrVT
);
2915 SDValue TLSAddr
= DAG
.getNode(PPCISD::ADDI_TLSLD_L_ADDR
, dl
,
2916 PtrVT
, GOTPtr
, TGA
, TGA
);
2917 SDValue DtvOffsetHi
= DAG
.getNode(PPCISD::ADDIS_DTPREL_HA
, dl
,
2918 PtrVT
, TLSAddr
, TGA
);
2919 return DAG
.getNode(PPCISD::ADDI_DTPREL_L
, dl
, PtrVT
, DtvOffsetHi
, TGA
);
2922 llvm_unreachable("Unknown TLS model!");
2925 SDValue
PPCTargetLowering::LowerGlobalAddress(SDValue Op
,
2926 SelectionDAG
&DAG
) const {
2927 EVT PtrVT
= Op
.getValueType();
2928 GlobalAddressSDNode
*GSDN
= cast
<GlobalAddressSDNode
>(Op
);
2930 const GlobalValue
*GV
= GSDN
->getGlobal();
2932 // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
2933 // The actual address of the GlobalValue is stored in the TOC.
2934 if (Subtarget
.is64BitELFABI() || Subtarget
.isAIXABI()) {
2935 setUsesTOCBasePtr(DAG
);
2936 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset());
2937 return getTOCEntry(DAG
, DL
, GA
);
2940 unsigned MOHiFlag
, MOLoFlag
;
2941 bool IsPIC
= isPositionIndependent();
2942 getLabelAccessInfo(IsPIC
, Subtarget
, MOHiFlag
, MOLoFlag
, GV
);
2944 if (IsPIC
&& Subtarget
.isSVR4ABI()) {
2945 SDValue GA
= DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
,
2947 PPCII::MO_PIC_FLAG
);
2948 return getTOCEntry(DAG
, DL
, GA
);
2952 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOHiFlag
);
2954 DAG
.getTargetGlobalAddress(GV
, DL
, PtrVT
, GSDN
->getOffset(), MOLoFlag
);
2956 SDValue Ptr
= LowerLabelRef(GAHi
, GALo
, IsPIC
, DAG
);
2958 // If the global reference is actually to a non-lazy-pointer, we have to do an
2959 // extra load to get the address of the global.
2960 if (MOHiFlag
& PPCII::MO_NLP_FLAG
)
2961 Ptr
= DAG
.getLoad(PtrVT
, DL
, DAG
.getEntryNode(), Ptr
, MachinePointerInfo());
2965 SDValue
PPCTargetLowering::LowerSETCC(SDValue Op
, SelectionDAG
&DAG
) const {
2966 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(2))->get();
2969 if (Op
.getValueType() == MVT::v2i64
) {
2970 // When the operands themselves are v2i64 values, we need to do something
2971 // special because VSX has no underlying comparison operations for these.
2972 if (Op
.getOperand(0).getValueType() == MVT::v2i64
) {
2973 // Equality can be handled by casting to the legal type for Altivec
2974 // comparisons, everything else needs to be expanded.
2975 if (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) {
2976 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
,
2977 DAG
.getSetCC(dl
, MVT::v4i32
,
2978 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, Op
.getOperand(0)),
2979 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, Op
.getOperand(1)),
2986 // We handle most of these in the usual way.
2990 // If we're comparing for equality to zero, expose the fact that this is
2991 // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
2992 // fold the new nodes.
2993 if (SDValue V
= lowerCmpEqZeroToCtlzSrl(Op
, DAG
))
2996 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(1))) {
2997 // Leave comparisons against 0 and -1 alone for now, since they're usually
2998 // optimized. FIXME: revisit this when we can custom lower all setcc
3000 if (C
->isAllOnesValue() || C
->isNullValue())
3004 // If we have an integer seteq/setne, turn it into a compare against zero
3005 // by xor'ing the rhs with the lhs, which is faster than setting a
3006 // condition register, reading it back out, and masking the correct bit. The
3007 // normal approach here uses sub to do this instead of xor. Using xor exposes
3008 // the result to other bit-twiddling opportunities.
3009 EVT LHSVT
= Op
.getOperand(0).getValueType();
3010 if (LHSVT
.isInteger() && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
)) {
3011 EVT VT
= Op
.getValueType();
3012 SDValue Sub
= DAG
.getNode(ISD::XOR
, dl
, LHSVT
, Op
.getOperand(0),
3014 return DAG
.getSetCC(dl
, VT
, Sub
, DAG
.getConstant(0, dl
, LHSVT
), CC
);
3019 SDValue
PPCTargetLowering::LowerVAARG(SDValue Op
, SelectionDAG
&DAG
) const {
3020 SDNode
*Node
= Op
.getNode();
3021 EVT VT
= Node
->getValueType(0);
3022 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3023 SDValue InChain
= Node
->getOperand(0);
3024 SDValue VAListPtr
= Node
->getOperand(1);
3025 const Value
*SV
= cast
<SrcValueSDNode
>(Node
->getOperand(2))->getValue();
3028 assert(!Subtarget
.isPPC64() && "LowerVAARG is PPC32 only");
3031 SDValue GprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
3032 VAListPtr
, MachinePointerInfo(SV
), MVT::i8
);
3033 InChain
= GprIndex
.getValue(1);
3035 if (VT
== MVT::i64
) {
3036 // Check if GprIndex is even
3037 SDValue GprAnd
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, GprIndex
,
3038 DAG
.getConstant(1, dl
, MVT::i32
));
3039 SDValue CC64
= DAG
.getSetCC(dl
, MVT::i32
, GprAnd
,
3040 DAG
.getConstant(0, dl
, MVT::i32
), ISD::SETNE
);
3041 SDValue GprIndexPlusOne
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, GprIndex
,
3042 DAG
.getConstant(1, dl
, MVT::i32
));
3043 // Align GprIndex to be even if it isn't
3044 GprIndex
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC64
, GprIndexPlusOne
,
3048 // fpr index is 1 byte after gpr
3049 SDValue FprPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3050 DAG
.getConstant(1, dl
, MVT::i32
));
3053 SDValue FprIndex
= DAG
.getExtLoad(ISD::ZEXTLOAD
, dl
, MVT::i32
, InChain
,
3054 FprPtr
, MachinePointerInfo(SV
), MVT::i8
);
3055 InChain
= FprIndex
.getValue(1);
3057 SDValue RegSaveAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3058 DAG
.getConstant(8, dl
, MVT::i32
));
3060 SDValue OverflowAreaPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, VAListPtr
,
3061 DAG
.getConstant(4, dl
, MVT::i32
));
3064 SDValue OverflowArea
=
3065 DAG
.getLoad(MVT::i32
, dl
, InChain
, OverflowAreaPtr
, MachinePointerInfo());
3066 InChain
= OverflowArea
.getValue(1);
3068 SDValue RegSaveArea
=
3069 DAG
.getLoad(MVT::i32
, dl
, InChain
, RegSaveAreaPtr
, MachinePointerInfo());
3070 InChain
= RegSaveArea
.getValue(1);
3072 // select overflow_area if index > 8
3073 SDValue CC
= DAG
.getSetCC(dl
, MVT::i32
, VT
.isInteger() ? GprIndex
: FprIndex
,
3074 DAG
.getConstant(8, dl
, MVT::i32
), ISD::SETLT
);
3076 // adjustment constant gpr_index * 4/8
3077 SDValue RegConstant
= DAG
.getNode(ISD::MUL
, dl
, MVT::i32
,
3078 VT
.isInteger() ? GprIndex
: FprIndex
,
3079 DAG
.getConstant(VT
.isInteger() ? 4 : 8, dl
,
3082 // OurReg = RegSaveArea + RegConstant
3083 SDValue OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, RegSaveArea
,
3086 // Floating types are 32 bytes into RegSaveArea
3087 if (VT
.isFloatingPoint())
3088 OurReg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OurReg
,
3089 DAG
.getConstant(32, dl
, MVT::i32
));
3091 // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3092 SDValue IndexPlus1
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
,
3093 VT
.isInteger() ? GprIndex
: FprIndex
,
3094 DAG
.getConstant(VT
== MVT::i64
? 2 : 1, dl
,
3097 InChain
= DAG
.getTruncStore(InChain
, dl
, IndexPlus1
,
3098 VT
.isInteger() ? VAListPtr
: FprPtr
,
3099 MachinePointerInfo(SV
), MVT::i8
);
3101 // determine if we should load from reg_save_area or overflow_area
3102 SDValue Result
= DAG
.getNode(ISD::SELECT
, dl
, PtrVT
, CC
, OurReg
, OverflowArea
);
3104 // increase overflow_area by 4/8 if gpr/fpr > 8
3105 SDValue OverflowAreaPlusN
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, OverflowArea
,
3106 DAG
.getConstant(VT
.isInteger() ? 4 : 8,
3109 OverflowArea
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i32
, CC
, OverflowArea
,
3112 InChain
= DAG
.getTruncStore(InChain
, dl
, OverflowArea
, OverflowAreaPtr
,
3113 MachinePointerInfo(), MVT::i32
);
3115 return DAG
.getLoad(VT
, dl
, InChain
, Result
, MachinePointerInfo());
3118 SDValue
PPCTargetLowering::LowerVACOPY(SDValue Op
, SelectionDAG
&DAG
) const {
3119 assert(!Subtarget
.isPPC64() && "LowerVACOPY is PPC32 only");
3121 // We have to copy the entire va_list struct:
3122 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3123 return DAG
.getMemcpy(Op
.getOperand(0), Op
,
3124 Op
.getOperand(1), Op
.getOperand(2),
3125 DAG
.getConstant(12, SDLoc(Op
), MVT::i32
), 8, false, true,
3126 false, MachinePointerInfo(), MachinePointerInfo());
3129 SDValue
PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op
,
3130 SelectionDAG
&DAG
) const {
3131 return Op
.getOperand(0);
3134 SDValue
PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op
,
3135 SelectionDAG
&DAG
) const {
3136 SDValue Chain
= Op
.getOperand(0);
3137 SDValue Trmp
= Op
.getOperand(1); // trampoline
3138 SDValue FPtr
= Op
.getOperand(2); // nested function
3139 SDValue Nest
= Op
.getOperand(3); // 'nest' parameter value
3142 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
3143 bool isPPC64
= (PtrVT
== MVT::i64
);
3144 Type
*IntPtrTy
= DAG
.getDataLayout().getIntPtrType(*DAG
.getContext());
3146 TargetLowering::ArgListTy Args
;
3147 TargetLowering::ArgListEntry Entry
;
3149 Entry
.Ty
= IntPtrTy
;
3150 Entry
.Node
= Trmp
; Args
.push_back(Entry
);
3152 // TrampSize == (isPPC64 ? 48 : 40);
3153 Entry
.Node
= DAG
.getConstant(isPPC64
? 48 : 40, dl
,
3154 isPPC64
? MVT::i64
: MVT::i32
);
3155 Args
.push_back(Entry
);
3157 Entry
.Node
= FPtr
; Args
.push_back(Entry
);
3158 Entry
.Node
= Nest
; Args
.push_back(Entry
);
3160 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3161 TargetLowering::CallLoweringInfo
CLI(DAG
);
3162 CLI
.setDebugLoc(dl
).setChain(Chain
).setLibCallee(
3163 CallingConv::C
, Type::getVoidTy(*DAG
.getContext()),
3164 DAG
.getExternalSymbol("__trampoline_setup", PtrVT
), std::move(Args
));
3166 std::pair
<SDValue
, SDValue
> CallResult
= LowerCallTo(CLI
);
3167 return CallResult
.second
;
3170 SDValue
PPCTargetLowering::LowerVASTART(SDValue Op
, SelectionDAG
&DAG
) const {
3171 MachineFunction
&MF
= DAG
.getMachineFunction();
3172 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3173 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3177 if (Subtarget
.isDarwinABI() || Subtarget
.isPPC64()) {
3178 // vastart just stores the address of the VarArgsFrameIndex slot into the
3179 // memory location argument.
3180 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
3181 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3182 return DAG
.getStore(Op
.getOperand(0), dl
, FR
, Op
.getOperand(1),
3183 MachinePointerInfo(SV
));
3186 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3187 // We suppose the given va_list is already allocated.
3190 // char gpr; /* index into the array of 8 GPRs
3191 // * stored in the register save area
3192 // * gpr=0 corresponds to r3,
3193 // * gpr=1 to r4, etc.
3195 // char fpr; /* index into the array of 8 FPRs
3196 // * stored in the register save area
3197 // * fpr=0 corresponds to f1,
3198 // * fpr=1 to f2, etc.
3200 // char *overflow_arg_area;
3201 // /* location on stack that holds
3202 // * the next overflow argument
3204 // char *reg_save_area;
3205 // /* where r3:r10 and f1:f8 (if saved)
3210 SDValue ArgGPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumGPR(), dl
, MVT::i32
);
3211 SDValue ArgFPR
= DAG
.getConstant(FuncInfo
->getVarArgsNumFPR(), dl
, MVT::i32
);
3212 SDValue StackOffsetFI
= DAG
.getFrameIndex(FuncInfo
->getVarArgsStackOffset(),
3214 SDValue FR
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(),
3217 uint64_t FrameOffset
= PtrVT
.getSizeInBits()/8;
3218 SDValue ConstFrameOffset
= DAG
.getConstant(FrameOffset
, dl
, PtrVT
);
3220 uint64_t StackOffset
= PtrVT
.getSizeInBits()/8 - 1;
3221 SDValue ConstStackOffset
= DAG
.getConstant(StackOffset
, dl
, PtrVT
);
3223 uint64_t FPROffset
= 1;
3224 SDValue ConstFPROffset
= DAG
.getConstant(FPROffset
, dl
, PtrVT
);
3226 const Value
*SV
= cast
<SrcValueSDNode
>(Op
.getOperand(2))->getValue();
3228 // Store first byte : number of int regs
3229 SDValue firstStore
=
3230 DAG
.getTruncStore(Op
.getOperand(0), dl
, ArgGPR
, Op
.getOperand(1),
3231 MachinePointerInfo(SV
), MVT::i8
);
3232 uint64_t nextOffset
= FPROffset
;
3233 SDValue nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Op
.getOperand(1),
3236 // Store second byte : number of float regs
3237 SDValue secondStore
=
3238 DAG
.getTruncStore(firstStore
, dl
, ArgFPR
, nextPtr
,
3239 MachinePointerInfo(SV
, nextOffset
), MVT::i8
);
3240 nextOffset
+= StackOffset
;
3241 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstStackOffset
);
3243 // Store second word : arguments given on stack
3244 SDValue thirdStore
= DAG
.getStore(secondStore
, dl
, StackOffsetFI
, nextPtr
,
3245 MachinePointerInfo(SV
, nextOffset
));
3246 nextOffset
+= FrameOffset
;
3247 nextPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, nextPtr
, ConstFrameOffset
);
3249 // Store third word : arguments given in registers
3250 return DAG
.getStore(thirdStore
, dl
, FR
, nextPtr
,
3251 MachinePointerInfo(SV
, nextOffset
));
3254 /// FPR - The set of FP registers that should be allocated for arguments
3255 /// on Darwin and AIX.
3256 static const MCPhysReg FPR
[] = {PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
,
3257 PPC::F6
, PPC::F7
, PPC::F8
, PPC::F9
, PPC::F10
,
3258 PPC::F11
, PPC::F12
, PPC::F13
};
3260 /// QFPR - The set of QPX registers that should be allocated for arguments.
3261 static const MCPhysReg QFPR
[] = {
3262 PPC::QF1
, PPC::QF2
, PPC::QF3
, PPC::QF4
, PPC::QF5
, PPC::QF6
, PPC::QF7
,
3263 PPC::QF8
, PPC::QF9
, PPC::QF10
, PPC::QF11
, PPC::QF12
, PPC::QF13
};
3265 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3267 static unsigned CalculateStackSlotSize(EVT ArgVT
, ISD::ArgFlagsTy Flags
,
3268 unsigned PtrByteSize
) {
3269 unsigned ArgSize
= ArgVT
.getStoreSize();
3270 if (Flags
.isByVal())
3271 ArgSize
= Flags
.getByValSize();
3273 // Round up to multiples of the pointer size, except for array members,
3274 // which are always packed.
3275 if (!Flags
.isInConsecutiveRegs())
3276 ArgSize
= ((ArgSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3281 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3283 static unsigned CalculateStackSlotAlignment(EVT ArgVT
, EVT OrigVT
,
3284 ISD::ArgFlagsTy Flags
,
3285 unsigned PtrByteSize
) {
3286 unsigned Align
= PtrByteSize
;
3288 // Altivec parameters are padded to a 16 byte boundary.
3289 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
3290 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
3291 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
3292 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
3294 // QPX vector types stored in double-precision are padded to a 32 byte
3296 else if (ArgVT
== MVT::v4f64
|| ArgVT
== MVT::v4i1
)
3299 // ByVal parameters are aligned as requested.
3300 if (Flags
.isByVal()) {
3301 unsigned BVAlign
= Flags
.getByValAlign();
3302 if (BVAlign
> PtrByteSize
) {
3303 if (BVAlign
% PtrByteSize
!= 0)
3305 "ByVal alignment is not a multiple of the pointer size");
3311 // Array members are always packed to their original alignment.
3312 if (Flags
.isInConsecutiveRegs()) {
3313 // If the array member was split into multiple registers, the first
3314 // needs to be aligned to the size of the full type. (Except for
3315 // ppcf128, which is only aligned as its f64 components.)
3316 if (Flags
.isSplit() && OrigVT
!= MVT::ppcf128
)
3317 Align
= OrigVT
.getStoreSize();
3319 Align
= ArgVT
.getStoreSize();
3325 /// CalculateStackSlotUsed - Return whether this argument will use its
3326 /// stack slot (instead of being passed in registers). ArgOffset,
3327 /// AvailableFPRs, and AvailableVRs must hold the current argument
3328 /// position, and will be updated to account for this argument.
3329 static bool CalculateStackSlotUsed(EVT ArgVT
, EVT OrigVT
,
3330 ISD::ArgFlagsTy Flags
,
3331 unsigned PtrByteSize
,
3332 unsigned LinkageSize
,
3333 unsigned ParamAreaSize
,
3334 unsigned &ArgOffset
,
3335 unsigned &AvailableFPRs
,
3336 unsigned &AvailableVRs
, bool HasQPX
) {
3337 bool UseMemory
= false;
3339 // Respect alignment of argument on the stack.
3341 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
3342 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
3343 // If there's no space left in the argument save area, we must
3344 // use memory (this check also catches zero-sized arguments).
3345 if (ArgOffset
>= LinkageSize
+ ParamAreaSize
)
3348 // Allocate argument on the stack.
3349 ArgOffset
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
3350 if (Flags
.isInConsecutiveRegsLast())
3351 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3352 // If we overran the argument save area, we must use memory
3353 // (this check catches arguments passed partially in memory)
3354 if (ArgOffset
> LinkageSize
+ ParamAreaSize
)
3357 // However, if the argument is actually passed in an FPR or a VR,
3358 // we don't use memory after all.
3359 if (!Flags
.isByVal()) {
3360 if (ArgVT
== MVT::f32
|| ArgVT
== MVT::f64
||
3361 // QPX registers overlap with the scalar FP registers.
3362 (HasQPX
&& (ArgVT
== MVT::v4f32
||
3363 ArgVT
== MVT::v4f64
||
3364 ArgVT
== MVT::v4i1
)))
3365 if (AvailableFPRs
> 0) {
3369 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
3370 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
3371 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
||
3372 ArgVT
== MVT::v1i128
|| ArgVT
== MVT::f128
)
3373 if (AvailableVRs
> 0) {
3382 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3383 /// ensure minimum alignment required for target.
3384 static unsigned EnsureStackAlignment(const PPCFrameLowering
*Lowering
,
3385 unsigned NumBytes
) {
3386 unsigned TargetAlign
= Lowering
->getStackAlignment();
3387 unsigned AlignMask
= TargetAlign
- 1;
3388 NumBytes
= (NumBytes
+ AlignMask
) & ~AlignMask
;
3392 SDValue
PPCTargetLowering::LowerFormalArguments(
3393 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3394 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3395 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3396 if (Subtarget
.is64BitELFABI())
3397 return LowerFormalArguments_64SVR4(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
3399 else if (Subtarget
.is32BitELFABI())
3400 return LowerFormalArguments_32SVR4(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
3403 // FIXME: We are using this for both AIX and Darwin. We should add appropriate
3404 // AIX testing, and rename it appropriately.
3405 return LowerFormalArguments_Darwin(Chain
, CallConv
, isVarArg
, Ins
, dl
, DAG
,
3409 SDValue
PPCTargetLowering::LowerFormalArguments_32SVR4(
3410 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3411 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3412 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3414 // 32-bit SVR4 ABI Stack Frame Layout:
3415 // +-----------------------------------+
3416 // +--> | Back chain |
3417 // | +-----------------------------------+
3418 // | | Floating-point register save area |
3419 // | +-----------------------------------+
3420 // | | General register save area |
3421 // | +-----------------------------------+
3422 // | | CR save word |
3423 // | +-----------------------------------+
3424 // | | VRSAVE save word |
3425 // | +-----------------------------------+
3426 // | | Alignment padding |
3427 // | +-----------------------------------+
3428 // | | Vector register save area |
3429 // | +-----------------------------------+
3430 // | | Local variable space |
3431 // | +-----------------------------------+
3432 // | | Parameter list area |
3433 // | +-----------------------------------+
3434 // | | LR save word |
3435 // | +-----------------------------------+
3436 // SP--> +--- | Back chain |
3437 // +-----------------------------------+
3440 // System V Application Binary Interface PowerPC Processor Supplement
3441 // AltiVec Technology Programming Interface Manual
3443 MachineFunction
&MF
= DAG
.getMachineFunction();
3444 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3445 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3447 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3448 // Potential tail calls could cause overwriting of argument stack slots.
3449 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
3450 (CallConv
== CallingConv::Fast
));
3451 unsigned PtrByteSize
= 4;
3453 // Assign locations to all of the incoming arguments.
3454 SmallVector
<CCValAssign
, 16> ArgLocs
;
3455 PPCCCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), ArgLocs
,
3458 // Reserve space for the linkage area on the stack.
3459 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
3460 CCInfo
.AllocateStack(LinkageSize
, PtrByteSize
);
3462 CCInfo
.PreAnalyzeFormalArguments(Ins
);
3464 CCInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4
);
3465 CCInfo
.clearWasPPCF128();
3467 for (unsigned i
= 0, e
= ArgLocs
.size(); i
!= e
; ++i
) {
3468 CCValAssign
&VA
= ArgLocs
[i
];
3470 // Arguments stored in registers.
3471 if (VA
.isRegLoc()) {
3472 const TargetRegisterClass
*RC
;
3473 EVT ValVT
= VA
.getValVT();
3475 switch (ValVT
.getSimpleVT().SimpleTy
) {
3477 llvm_unreachable("ValVT not supported by formal arguments Lowering");
3480 RC
= &PPC::GPRCRegClass
;
3483 if (Subtarget
.hasP8Vector())
3484 RC
= &PPC::VSSRCRegClass
;
3485 else if (Subtarget
.hasSPE())
3486 RC
= &PPC::SPE4RCRegClass
;
3488 RC
= &PPC::F4RCRegClass
;
3491 if (Subtarget
.hasVSX())
3492 RC
= &PPC::VSFRCRegClass
;
3493 else if (Subtarget
.hasSPE())
3494 // SPE passes doubles in GPR pairs.
3495 RC
= &PPC::GPRCRegClass
;
3497 RC
= &PPC::F8RCRegClass
;
3502 RC
= &PPC::VRRCRegClass
;
3505 RC
= Subtarget
.hasQPX() ? &PPC::QSRCRegClass
: &PPC::VRRCRegClass
;
3509 RC
= &PPC::VRRCRegClass
;
3512 RC
= &PPC::QFRCRegClass
;
3515 RC
= &PPC::QBRCRegClass
;
3520 // Transform the arguments stored in physical registers into
3522 if (VA
.getLocVT() == MVT::f64
&& Subtarget
.hasSPE()) {
3523 assert(i
+ 1 < e
&& "No second half of double precision argument");
3524 unsigned RegLo
= MF
.addLiveIn(VA
.getLocReg(), RC
);
3525 unsigned RegHi
= MF
.addLiveIn(ArgLocs
[++i
].getLocReg(), RC
);
3526 SDValue ArgValueLo
= DAG
.getCopyFromReg(Chain
, dl
, RegLo
, MVT::i32
);
3527 SDValue ArgValueHi
= DAG
.getCopyFromReg(Chain
, dl
, RegHi
, MVT::i32
);
3528 if (!Subtarget
.isLittleEndian())
3529 std::swap (ArgValueLo
, ArgValueHi
);
3530 ArgValue
= DAG
.getNode(PPCISD::BUILD_SPE64
, dl
, MVT::f64
, ArgValueLo
,
3533 unsigned Reg
= MF
.addLiveIn(VA
.getLocReg(), RC
);
3534 ArgValue
= DAG
.getCopyFromReg(Chain
, dl
, Reg
,
3535 ValVT
== MVT::i1
? MVT::i32
: ValVT
);
3536 if (ValVT
== MVT::i1
)
3537 ArgValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, ArgValue
);
3540 InVals
.push_back(ArgValue
);
3542 // Argument stored in memory.
3543 assert(VA
.isMemLoc());
3545 // Get the extended size of the argument type in stack
3546 unsigned ArgSize
= VA
.getLocVT().getStoreSize();
3547 // Get the actual size of the argument type
3548 unsigned ObjSize
= VA
.getValVT().getStoreSize();
3549 unsigned ArgOffset
= VA
.getLocMemOffset();
3550 // Stack objects in PPC32 are right justified.
3551 ArgOffset
+= ArgSize
- ObjSize
;
3552 int FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, isImmutable
);
3554 // Create load nodes to retrieve arguments from the stack.
3555 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3557 DAG
.getLoad(VA
.getValVT(), dl
, Chain
, FIN
, MachinePointerInfo()));
3561 // Assign locations to all of the incoming aggregate by value arguments.
3562 // Aggregates passed by value are stored in the local variable space of the
3563 // caller's stack frame, right above the parameter list area.
3564 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
3565 CCState
CCByValInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(),
3566 ByValArgLocs
, *DAG
.getContext());
3568 // Reserve stack space for the allocations in CCInfo.
3569 CCByValInfo
.AllocateStack(CCInfo
.getNextStackOffset(), PtrByteSize
);
3571 CCByValInfo
.AnalyzeFormalArguments(Ins
, CC_PPC32_SVR4_ByVal
);
3573 // Area that is at least reserved in the caller of this function.
3574 unsigned MinReservedArea
= CCByValInfo
.getNextStackOffset();
3575 MinReservedArea
= std::max(MinReservedArea
, LinkageSize
);
3577 // Set the size that is at least reserved in caller of this function. Tail
3578 // call optimized function's reserved stack space needs to be aligned so that
3579 // taking the difference between two stack areas will result in an aligned
3582 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
3583 FuncInfo
->setMinReservedArea(MinReservedArea
);
3585 SmallVector
<SDValue
, 8> MemOps
;
3587 // If the function takes variable number of arguments, make a frame index for
3588 // the start of the first vararg value... for expansion of llvm.va_start.
3590 static const MCPhysReg GPArgRegs
[] = {
3591 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
3592 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
3594 const unsigned NumGPArgRegs
= array_lengthof(GPArgRegs
);
3596 static const MCPhysReg FPArgRegs
[] = {
3597 PPC::F1
, PPC::F2
, PPC::F3
, PPC::F4
, PPC::F5
, PPC::F6
, PPC::F7
,
3600 unsigned NumFPArgRegs
= array_lengthof(FPArgRegs
);
3602 if (useSoftFloat() || hasSPE())
3605 FuncInfo
->setVarArgsNumGPR(CCInfo
.getFirstUnallocated(GPArgRegs
));
3606 FuncInfo
->setVarArgsNumFPR(CCInfo
.getFirstUnallocated(FPArgRegs
));
3608 // Make room for NumGPArgRegs and NumFPArgRegs.
3609 int Depth
= NumGPArgRegs
* PtrVT
.getSizeInBits()/8 +
3610 NumFPArgRegs
* MVT(MVT::f64
).getSizeInBits()/8;
3612 FuncInfo
->setVarArgsStackOffset(
3613 MFI
.CreateFixedObject(PtrVT
.getSizeInBits()/8,
3614 CCInfo
.getNextStackOffset(), true));
3616 FuncInfo
->setVarArgsFrameIndex(MFI
.CreateStackObject(Depth
, 8, false));
3617 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
3619 // The fixed integer arguments of a variadic function are stored to the
3620 // VarArgsFrameIndex on the stack so that they may be loaded by
3621 // dereferencing the result of va_next.
3622 for (unsigned GPRIndex
= 0; GPRIndex
!= NumGPArgRegs
; ++GPRIndex
) {
3623 // Get an existing live-in vreg, or add a new one.
3624 unsigned VReg
= MF
.getRegInfo().getLiveInVirtReg(GPArgRegs
[GPRIndex
]);
3626 VReg
= MF
.addLiveIn(GPArgRegs
[GPRIndex
], &PPC::GPRCRegClass
);
3628 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3630 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
3631 MemOps
.push_back(Store
);
3632 // Increment the address by four for the next argument to store
3633 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, dl
, PtrVT
);
3634 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
3637 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
3639 // The double arguments are stored to the VarArgsFrameIndex
3641 for (unsigned FPRIndex
= 0; FPRIndex
!= NumFPArgRegs
; ++FPRIndex
) {
3642 // Get an existing live-in vreg, or add a new one.
3643 unsigned VReg
= MF
.getRegInfo().getLiveInVirtReg(FPArgRegs
[FPRIndex
]);
3645 VReg
= MF
.addLiveIn(FPArgRegs
[FPRIndex
], &PPC::F8RCRegClass
);
3647 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::f64
);
3649 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
3650 MemOps
.push_back(Store
);
3651 // Increment the address by eight for the next argument to store
3652 SDValue PtrOff
= DAG
.getConstant(MVT(MVT::f64
).getSizeInBits()/8, dl
,
3654 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
3658 if (!MemOps
.empty())
3659 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
3664 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3665 // value to MVT::i64 and then truncate to the correct register size.
3666 SDValue
PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags
,
3667 EVT ObjectVT
, SelectionDAG
&DAG
,
3669 const SDLoc
&dl
) const {
3671 ArgVal
= DAG
.getNode(ISD::AssertSext
, dl
, MVT::i64
, ArgVal
,
3672 DAG
.getValueType(ObjectVT
));
3673 else if (Flags
.isZExt())
3674 ArgVal
= DAG
.getNode(ISD::AssertZext
, dl
, MVT::i64
, ArgVal
,
3675 DAG
.getValueType(ObjectVT
));
3677 return DAG
.getNode(ISD::TRUNCATE
, dl
, ObjectVT
, ArgVal
);
3680 SDValue
PPCTargetLowering::LowerFormalArguments_64SVR4(
3681 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
3682 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
3683 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
3684 // TODO: add description of PPC stack frame format, or at least some docs.
3686 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
3687 bool isLittleEndian
= Subtarget
.isLittleEndian();
3688 MachineFunction
&MF
= DAG
.getMachineFunction();
3689 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
3690 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
3692 assert(!(CallConv
== CallingConv::Fast
&& isVarArg
) &&
3693 "fastcc not supported on varargs functions");
3695 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
3696 // Potential tail calls could cause overwriting of argument stack slots.
3697 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
3698 (CallConv
== CallingConv::Fast
));
3699 unsigned PtrByteSize
= 8;
3700 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
3702 static const MCPhysReg GPR
[] = {
3703 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
3704 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
3706 static const MCPhysReg VR
[] = {
3707 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
3708 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
3711 const unsigned Num_GPR_Regs
= array_lengthof(GPR
);
3712 const unsigned Num_FPR_Regs
= useSoftFloat() ? 0 : 13;
3713 const unsigned Num_VR_Regs
= array_lengthof(VR
);
3714 const unsigned Num_QFPR_Regs
= Num_FPR_Regs
;
3716 // Do a first pass over the arguments to determine whether the ABI
3717 // guarantees that our caller has allocated the parameter save area
3718 // on its stack frame. In the ELFv1 ABI, this is always the case;
3719 // in the ELFv2 ABI, it is true if this is a vararg function or if
3720 // any parameter is located in a stack slot.
3722 bool HasParameterArea
= !isELFv2ABI
|| isVarArg
;
3723 unsigned ParamAreaSize
= Num_GPR_Regs
* PtrByteSize
;
3724 unsigned NumBytes
= LinkageSize
;
3725 unsigned AvailableFPRs
= Num_FPR_Regs
;
3726 unsigned AvailableVRs
= Num_VR_Regs
;
3727 for (unsigned i
= 0, e
= Ins
.size(); i
!= e
; ++i
) {
3728 if (Ins
[i
].Flags
.isNest())
3731 if (CalculateStackSlotUsed(Ins
[i
].VT
, Ins
[i
].ArgVT
, Ins
[i
].Flags
,
3732 PtrByteSize
, LinkageSize
, ParamAreaSize
,
3733 NumBytes
, AvailableFPRs
, AvailableVRs
,
3734 Subtarget
.hasQPX()))
3735 HasParameterArea
= true;
3738 // Add DAG nodes to load the arguments or copy them out of registers. On
3739 // entry to a function on PPC, the arguments start after the linkage area,
3740 // although the first ones are often in registers.
3742 unsigned ArgOffset
= LinkageSize
;
3743 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
3744 unsigned &QFPR_idx
= FPR_idx
;
3745 SmallVector
<SDValue
, 8> MemOps
;
3746 Function::const_arg_iterator FuncArg
= MF
.getFunction().arg_begin();
3747 unsigned CurArgIdx
= 0;
3748 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
; ++ArgNo
) {
3750 bool needsLoad
= false;
3751 EVT ObjectVT
= Ins
[ArgNo
].VT
;
3752 EVT OrigVT
= Ins
[ArgNo
].ArgVT
;
3753 unsigned ObjSize
= ObjectVT
.getStoreSize();
3754 unsigned ArgSize
= ObjSize
;
3755 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
3756 if (Ins
[ArgNo
].isOrigArg()) {
3757 std::advance(FuncArg
, Ins
[ArgNo
].getOrigArgIndex() - CurArgIdx
);
3758 CurArgIdx
= Ins
[ArgNo
].getOrigArgIndex();
3760 // We re-align the argument offset for each argument, except when using the
3761 // fast calling convention, when we need to make sure we do that only when
3762 // we'll actually use a stack slot.
3763 unsigned CurArgOffset
, Align
;
3764 auto ComputeArgOffset
= [&]() {
3765 /* Respect alignment of argument on the stack. */
3766 Align
= CalculateStackSlotAlignment(ObjectVT
, OrigVT
, Flags
, PtrByteSize
);
3767 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
3768 CurArgOffset
= ArgOffset
;
3771 if (CallConv
!= CallingConv::Fast
) {
3774 /* Compute GPR index associated with argument offset. */
3775 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
3776 GPR_idx
= std::min(GPR_idx
, Num_GPR_Regs
);
3779 // FIXME the codegen can be much improved in some cases.
3780 // We do not have to keep everything in memory.
3781 if (Flags
.isByVal()) {
3782 assert(Ins
[ArgNo
].isOrigArg() && "Byval arguments cannot be implicit");
3784 if (CallConv
== CallingConv::Fast
)
3787 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
3788 ObjSize
= Flags
.getByValSize();
3789 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3790 // Empty aggregate parameters do not take up registers. Examples:
3794 // etc. However, we have to provide a place-holder in InVals, so
3795 // pretend we have an 8-byte item at the current address for that
3798 int FI
= MFI
.CreateFixedObject(PtrByteSize
, ArgOffset
, true);
3799 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3800 InVals
.push_back(FIN
);
3804 // Create a stack object covering all stack doublewords occupied
3805 // by the argument. If the argument is (fully or partially) on
3806 // the stack, or if the argument is fully in registers but the
3807 // caller has allocated the parameter save anyway, we can refer
3808 // directly to the caller's stack frame. Otherwise, create a
3809 // local copy in our own frame.
3811 if (HasParameterArea
||
3812 ArgSize
+ ArgOffset
> LinkageSize
+ Num_GPR_Regs
* PtrByteSize
)
3813 FI
= MFI
.CreateFixedObject(ArgSize
, ArgOffset
, false, true);
3815 FI
= MFI
.CreateStackObject(ArgSize
, Align
, false);
3816 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
3818 // Handle aggregates smaller than 8 bytes.
3819 if (ObjSize
< PtrByteSize
) {
3820 // The value of the object is its address, which differs from the
3821 // address of the enclosing doubleword on big-endian systems.
3823 if (!isLittleEndian
) {
3824 SDValue ArgOff
= DAG
.getConstant(PtrByteSize
- ObjSize
, dl
, PtrVT
);
3825 Arg
= DAG
.getNode(ISD::ADD
, dl
, ArgOff
.getValueType(), Arg
, ArgOff
);
3827 InVals
.push_back(Arg
);
3829 if (GPR_idx
!= Num_GPR_Regs
) {
3830 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3831 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3832 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3835 if (ObjSize
==1 || ObjSize
==2 || ObjSize
==4) {
3836 EVT ObjType
= (ObjSize
== 1 ? MVT::i8
:
3837 (ObjSize
== 2 ? MVT::i16
: MVT::i32
));
3838 Store
= DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, Arg
,
3839 MachinePointerInfo(&*FuncArg
), ObjType
);
3841 // For sizes that don't fit a truncating store (3, 5, 6, 7),
3842 // store the whole register as-is to the parameter save area
3844 Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
,
3845 MachinePointerInfo(&*FuncArg
));
3848 MemOps
.push_back(Store
);
3850 // Whether we copied from a register or not, advance the offset
3851 // into the parameter save area by a full doubleword.
3852 ArgOffset
+= PtrByteSize
;
3856 // The value of the object is its address, which is the address of
3857 // its first stack doubleword.
3858 InVals
.push_back(FIN
);
3860 // Store whatever pieces of the object are in registers to memory.
3861 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
3862 if (GPR_idx
== Num_GPR_Regs
)
3865 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
3866 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3867 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
3870 SDValue Off
= DAG
.getConstant(j
, dl
, PtrVT
);
3871 Addr
= DAG
.getNode(ISD::ADD
, dl
, Off
.getValueType(), Addr
, Off
);
3873 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, Addr
,
3874 MachinePointerInfo(&*FuncArg
, j
));
3875 MemOps
.push_back(Store
);
3878 ArgOffset
+= ArgSize
;
3882 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
3883 default: llvm_unreachable("Unhandled argument type!");
3887 if (Flags
.isNest()) {
3888 // The 'nest' parameter, if any, is passed in R11.
3889 unsigned VReg
= MF
.addLiveIn(PPC::X11
, &PPC::G8RCRegClass
);
3890 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3892 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
3893 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
3898 // These can be scalar arguments or elements of an integer array type
3899 // passed directly. Clang may use those instead of "byval" aggregate
3900 // types to avoid forcing arguments to memory unnecessarily.
3901 if (GPR_idx
!= Num_GPR_Regs
) {
3902 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3903 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3904 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3906 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
3907 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
3908 // value to MVT::i64 and then truncate to the correct register size.
3909 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
3911 if (CallConv
== CallingConv::Fast
)
3915 ArgSize
= PtrByteSize
;
3917 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
3923 // These can be scalar arguments or elements of a float array type
3924 // passed directly. The latter are used to implement ELFv2 homogenous
3925 // float aggregates.
3926 if (FPR_idx
!= Num_FPR_Regs
) {
3929 if (ObjectVT
== MVT::f32
)
3930 VReg
= MF
.addLiveIn(FPR
[FPR_idx
],
3931 Subtarget
.hasP8Vector()
3932 ? &PPC::VSSRCRegClass
3933 : &PPC::F4RCRegClass
);
3935 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], Subtarget
.hasVSX()
3936 ? &PPC::VSFRCRegClass
3937 : &PPC::F8RCRegClass
);
3939 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
3941 } else if (GPR_idx
!= Num_GPR_Regs
&& CallConv
!= CallingConv::Fast
) {
3942 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
3943 // once we support fp <-> gpr moves.
3945 // This can only ever happen in the presence of f32 array types,
3946 // since otherwise we never run out of FPRs before running out
3948 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
++], &PPC::G8RCRegClass
);
3949 FuncInfo
->addLiveInAttr(VReg
, Flags
);
3950 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
3952 if (ObjectVT
== MVT::f32
) {
3953 if ((ArgOffset
% PtrByteSize
) == (isLittleEndian
? 4 : 0))
3954 ArgVal
= DAG
.getNode(ISD::SRL
, dl
, MVT::i64
, ArgVal
,
3955 DAG
.getConstant(32, dl
, MVT::i32
));
3956 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, ArgVal
);
3959 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, ObjectVT
, ArgVal
);
3961 if (CallConv
== CallingConv::Fast
)
3967 // When passing an array of floats, the array occupies consecutive
3968 // space in the argument area; only round up to the next doubleword
3969 // at the end of the array. Otherwise, each float takes 8 bytes.
3970 if (CallConv
!= CallingConv::Fast
|| needsLoad
) {
3971 ArgSize
= Flags
.isInConsecutiveRegs() ? ObjSize
: PtrByteSize
;
3972 ArgOffset
+= ArgSize
;
3973 if (Flags
.isInConsecutiveRegsLast())
3974 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
3985 if (!Subtarget
.hasQPX()) {
3986 // These can be scalar arguments or elements of a vector array type
3987 // passed directly. The latter are used to implement ELFv2 homogenous
3988 // vector aggregates.
3989 if (VR_idx
!= Num_VR_Regs
) {
3990 unsigned VReg
= MF
.addLiveIn(VR
[VR_idx
], &PPC::VRRCRegClass
);
3991 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
3994 if (CallConv
== CallingConv::Fast
)
3998 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
4003 assert(ObjectVT
.getSimpleVT().SimpleTy
== MVT::v4f32
&&
4004 "Invalid QPX parameter type");
4009 // QPX vectors are treated like their scalar floating-point subregisters
4010 // (except that they're larger).
4011 unsigned Sz
= ObjectVT
.getSimpleVT().SimpleTy
== MVT::v4f32
? 16 : 32;
4012 if (QFPR_idx
!= Num_QFPR_Regs
) {
4013 const TargetRegisterClass
*RC
;
4014 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
4015 case MVT::v4f64
: RC
= &PPC::QFRCRegClass
; break;
4016 case MVT::v4f32
: RC
= &PPC::QSRCRegClass
; break;
4017 default: RC
= &PPC::QBRCRegClass
; break;
4020 unsigned VReg
= MF
.addLiveIn(QFPR
[QFPR_idx
], RC
);
4021 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4024 if (CallConv
== CallingConv::Fast
)
4028 if (CallConv
!= CallingConv::Fast
|| needsLoad
)
4033 // We need to load the argument to a virtual register if we determined
4034 // above that we ran out of physical registers of the appropriate type.
4036 if (ObjSize
< ArgSize
&& !isLittleEndian
)
4037 CurArgOffset
+= ArgSize
- ObjSize
;
4038 int FI
= MFI
.CreateFixedObject(ObjSize
, CurArgOffset
, isImmutable
);
4039 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4040 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Chain
, FIN
, MachinePointerInfo());
4043 InVals
.push_back(ArgVal
);
4046 // Area that is at least reserved in the caller of this function.
4047 unsigned MinReservedArea
;
4048 if (HasParameterArea
)
4049 MinReservedArea
= std::max(ArgOffset
, LinkageSize
+ 8 * PtrByteSize
);
4051 MinReservedArea
= LinkageSize
;
4053 // Set the size that is at least reserved in caller of this function. Tail
4054 // call optimized functions' reserved stack space needs to be aligned so that
4055 // taking the difference between two stack areas will result in an aligned
4058 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
4059 FuncInfo
->setMinReservedArea(MinReservedArea
);
4061 // If the function takes variable number of arguments, make a frame index for
4062 // the start of the first vararg value... for expansion of llvm.va_start.
4064 int Depth
= ArgOffset
;
4066 FuncInfo
->setVarArgsFrameIndex(
4067 MFI
.CreateFixedObject(PtrByteSize
, Depth
, true));
4068 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4070 // If this function is vararg, store any remaining integer argument regs
4071 // to their spots on the stack so that they may be loaded by dereferencing
4072 // the result of va_next.
4073 for (GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
4074 GPR_idx
< Num_GPR_Regs
; ++GPR_idx
) {
4075 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4076 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4078 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4079 MemOps
.push_back(Store
);
4080 // Increment the address by four for the next argument to store
4081 SDValue PtrOff
= DAG
.getConstant(PtrByteSize
, dl
, PtrVT
);
4082 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4086 if (!MemOps
.empty())
4087 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
4092 SDValue
PPCTargetLowering::LowerFormalArguments_Darwin(
4093 SDValue Chain
, CallingConv::ID CallConv
, bool isVarArg
,
4094 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
4095 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
4096 // TODO: add description of PPC stack frame format, or at least some docs.
4098 MachineFunction
&MF
= DAG
.getMachineFunction();
4099 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
4100 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
4102 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
4103 bool isPPC64
= PtrVT
== MVT::i64
;
4104 // Potential tail calls could cause overwriting of argument stack slots.
4105 bool isImmutable
= !(getTargetMachine().Options
.GuaranteedTailCallOpt
&&
4106 (CallConv
== CallingConv::Fast
));
4107 unsigned PtrByteSize
= isPPC64
? 8 : 4;
4108 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4109 unsigned ArgOffset
= LinkageSize
;
4110 // Area that is at least reserved in caller of this function.
4111 unsigned MinReservedArea
= ArgOffset
;
4113 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
4114 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
4115 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
4117 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
4118 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4119 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4121 static const MCPhysReg VR
[] = {
4122 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4123 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4126 const unsigned Num_GPR_Regs
= array_lengthof(GPR_32
);
4127 const unsigned Num_FPR_Regs
= useSoftFloat() ? 0 : 13;
4128 const unsigned Num_VR_Regs
= array_lengthof( VR
);
4130 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
4132 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
4134 // In 32-bit non-varargs functions, the stack space for vectors is after the
4135 // stack space for non-vectors. We do not use this space unless we have
4136 // too many vectors to fit in registers, something that only occurs in
4137 // constructed examples:), but we have to walk the arglist to figure
4138 // that out...for the pathological case, compute VecArgOffset as the
4139 // start of the vector parameter area. Computing VecArgOffset is the
4140 // entire point of the following loop.
4141 unsigned VecArgOffset
= ArgOffset
;
4142 if (!isVarArg
&& !isPPC64
) {
4143 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
;
4145 EVT ObjectVT
= Ins
[ArgNo
].VT
;
4146 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
4148 if (Flags
.isByVal()) {
4149 // ObjSize is the true size, ArgSize rounded up to multiple of regs.
4150 unsigned ObjSize
= Flags
.getByValSize();
4152 ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4153 VecArgOffset
+= ArgSize
;
4157 switch(ObjectVT
.getSimpleVT().SimpleTy
) {
4158 default: llvm_unreachable("Unhandled argument type!");
4164 case MVT::i64
: // PPC64
4166 // FIXME: We are guaranteed to be !isPPC64 at this point.
4167 // Does MVT::i64 apply?
4174 // Nothing to do, we're only looking at Nonvector args here.
4179 // We've found where the vector parameter area in memory is. Skip the
4180 // first 12 parameters; these don't use that memory.
4181 VecArgOffset
= ((VecArgOffset
+15)/16)*16;
4182 VecArgOffset
+= 12*16;
4184 // Add DAG nodes to load the arguments or copy them out of registers. On
4185 // entry to a function on PPC, the arguments start after the linkage area,
4186 // although the first ones are often in registers.
4188 SmallVector
<SDValue
, 8> MemOps
;
4189 unsigned nAltivecParamsAtEnd
= 0;
4190 Function::const_arg_iterator FuncArg
= MF
.getFunction().arg_begin();
4191 unsigned CurArgIdx
= 0;
4192 for (unsigned ArgNo
= 0, e
= Ins
.size(); ArgNo
!= e
; ++ArgNo
) {
4194 bool needsLoad
= false;
4195 EVT ObjectVT
= Ins
[ArgNo
].VT
;
4196 unsigned ObjSize
= ObjectVT
.getSizeInBits()/8;
4197 unsigned ArgSize
= ObjSize
;
4198 ISD::ArgFlagsTy Flags
= Ins
[ArgNo
].Flags
;
4199 if (Ins
[ArgNo
].isOrigArg()) {
4200 std::advance(FuncArg
, Ins
[ArgNo
].getOrigArgIndex() - CurArgIdx
);
4201 CurArgIdx
= Ins
[ArgNo
].getOrigArgIndex();
4203 unsigned CurArgOffset
= ArgOffset
;
4205 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary.
4206 if (ObjectVT
==MVT::v4f32
|| ObjectVT
==MVT::v4i32
||
4207 ObjectVT
==MVT::v8i16
|| ObjectVT
==MVT::v16i8
) {
4208 if (isVarArg
|| isPPC64
) {
4209 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
4210 MinReservedArea
+= CalculateStackSlotSize(ObjectVT
,
4213 } else nAltivecParamsAtEnd
++;
4215 // Calculate min reserved area.
4216 MinReservedArea
+= CalculateStackSlotSize(Ins
[ArgNo
].VT
,
4220 // FIXME the codegen can be much improved in some cases.
4221 // We do not have to keep everything in memory.
4222 if (Flags
.isByVal()) {
4223 assert(Ins
[ArgNo
].isOrigArg() && "Byval arguments cannot be implicit");
4225 // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4226 ObjSize
= Flags
.getByValSize();
4227 ArgSize
= ((ObjSize
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
4228 // Objects of size 1 and 2 are right justified, everything else is
4229 // left justified. This means the memory address is adjusted forwards.
4230 if (ObjSize
==1 || ObjSize
==2) {
4231 CurArgOffset
= CurArgOffset
+ (4 - ObjSize
);
4233 // The value of the object is its address.
4234 int FI
= MFI
.CreateFixedObject(ObjSize
, CurArgOffset
, false, true);
4235 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4236 InVals
.push_back(FIN
);
4237 if (ObjSize
==1 || ObjSize
==2) {
4238 if (GPR_idx
!= Num_GPR_Regs
) {
4241 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4243 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4244 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4245 EVT ObjType
= ObjSize
== 1 ? MVT::i8
: MVT::i16
;
4247 DAG
.getTruncStore(Val
.getValue(1), dl
, Val
, FIN
,
4248 MachinePointerInfo(&*FuncArg
), ObjType
);
4249 MemOps
.push_back(Store
);
4253 ArgOffset
+= PtrByteSize
;
4257 for (unsigned j
= 0; j
< ArgSize
; j
+= PtrByteSize
) {
4258 // Store whatever pieces of the object are in registers
4259 // to memory. ArgOffset will be the address of the beginning
4261 if (GPR_idx
!= Num_GPR_Regs
) {
4264 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4266 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4267 int FI
= MFI
.CreateFixedObject(PtrByteSize
, ArgOffset
, true);
4268 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4269 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4270 SDValue Store
= DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
,
4271 MachinePointerInfo(&*FuncArg
, j
));
4272 MemOps
.push_back(Store
);
4274 ArgOffset
+= PtrByteSize
;
4276 ArgOffset
+= ArgSize
- (ArgOffset
-CurArgOffset
);
4283 switch (ObjectVT
.getSimpleVT().SimpleTy
) {
4284 default: llvm_unreachable("Unhandled argument type!");
4288 if (GPR_idx
!= Num_GPR_Regs
) {
4289 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4290 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i32
);
4292 if (ObjectVT
== MVT::i1
)
4293 ArgVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, ArgVal
);
4298 ArgSize
= PtrByteSize
;
4300 // All int arguments reserve stack space in the Darwin ABI.
4301 ArgOffset
+= PtrByteSize
;
4305 case MVT::i64
: // PPC64
4306 if (GPR_idx
!= Num_GPR_Regs
) {
4307 unsigned VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4308 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, MVT::i64
);
4310 if (ObjectVT
== MVT::i32
|| ObjectVT
== MVT::i1
)
4311 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4312 // value to MVT::i64 and then truncate to the correct register size.
4313 ArgVal
= extendArgForPPC64(Flags
, ObjectVT
, DAG
, ArgVal
, dl
);
4318 ArgSize
= PtrByteSize
;
4320 // All int arguments reserve stack space in the Darwin ABI.
4326 // Every 4 bytes of argument space consumes one of the GPRs available for
4327 // argument passing.
4328 if (GPR_idx
!= Num_GPR_Regs
) {
4330 if (ObjSize
== 8 && GPR_idx
!= Num_GPR_Regs
&& !isPPC64
)
4333 if (FPR_idx
!= Num_FPR_Regs
) {
4336 if (ObjectVT
== MVT::f32
)
4337 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], &PPC::F4RCRegClass
);
4339 VReg
= MF
.addLiveIn(FPR
[FPR_idx
], &PPC::F8RCRegClass
);
4341 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4347 // All FP arguments reserve stack space in the Darwin ABI.
4348 ArgOffset
+= isPPC64
? 8 : ObjSize
;
4354 // Note that vector arguments in registers don't reserve stack space,
4355 // except in varargs functions.
4356 if (VR_idx
!= Num_VR_Regs
) {
4357 unsigned VReg
= MF
.addLiveIn(VR
[VR_idx
], &PPC::VRRCRegClass
);
4358 ArgVal
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, ObjectVT
);
4360 while ((ArgOffset
% 16) != 0) {
4361 ArgOffset
+= PtrByteSize
;
4362 if (GPR_idx
!= Num_GPR_Regs
)
4366 GPR_idx
= std::min(GPR_idx
+4, Num_GPR_Regs
); // FIXME correct for ppc64?
4370 if (!isVarArg
&& !isPPC64
) {
4371 // Vectors go after all the nonvectors.
4372 CurArgOffset
= VecArgOffset
;
4375 // Vectors are aligned.
4376 ArgOffset
= ((ArgOffset
+15)/16)*16;
4377 CurArgOffset
= ArgOffset
;
4385 // We need to load the argument to a virtual register if we determined above
4386 // that we ran out of physical registers of the appropriate type.
4388 int FI
= MFI
.CreateFixedObject(ObjSize
,
4389 CurArgOffset
+ (ArgSize
- ObjSize
),
4391 SDValue FIN
= DAG
.getFrameIndex(FI
, PtrVT
);
4392 ArgVal
= DAG
.getLoad(ObjectVT
, dl
, Chain
, FIN
, MachinePointerInfo());
4395 InVals
.push_back(ArgVal
);
4398 // Allow for Altivec parameters at the end, if needed.
4399 if (nAltivecParamsAtEnd
) {
4400 MinReservedArea
= ((MinReservedArea
+15)/16)*16;
4401 MinReservedArea
+= 16*nAltivecParamsAtEnd
;
4404 // Area that is at least reserved in the caller of this function.
4405 MinReservedArea
= std::max(MinReservedArea
, LinkageSize
+ 8 * PtrByteSize
);
4407 // Set the size that is at least reserved in caller of this function. Tail
4408 // call optimized functions' reserved stack space needs to be aligned so that
4409 // taking the difference between two stack areas will result in an aligned
4412 EnsureStackAlignment(Subtarget
.getFrameLowering(), MinReservedArea
);
4413 FuncInfo
->setMinReservedArea(MinReservedArea
);
4415 // If the function takes variable number of arguments, make a frame index for
4416 // the start of the first vararg value... for expansion of llvm.va_start.
4418 int Depth
= ArgOffset
;
4420 FuncInfo
->setVarArgsFrameIndex(
4421 MFI
.CreateFixedObject(PtrVT
.getSizeInBits()/8,
4423 SDValue FIN
= DAG
.getFrameIndex(FuncInfo
->getVarArgsFrameIndex(), PtrVT
);
4425 // If this function is vararg, store any remaining integer argument regs
4426 // to their spots on the stack so that they may be loaded by dereferencing
4427 // the result of va_next.
4428 for (; GPR_idx
!= Num_GPR_Regs
; ++GPR_idx
) {
4432 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::G8RCRegClass
);
4434 VReg
= MF
.addLiveIn(GPR
[GPR_idx
], &PPC::GPRCRegClass
);
4436 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, VReg
, PtrVT
);
4438 DAG
.getStore(Val
.getValue(1), dl
, Val
, FIN
, MachinePointerInfo());
4439 MemOps
.push_back(Store
);
4440 // Increment the address by four for the next argument to store
4441 SDValue PtrOff
= DAG
.getConstant(PtrVT
.getSizeInBits()/8, dl
, PtrVT
);
4442 FIN
= DAG
.getNode(ISD::ADD
, dl
, PtrOff
.getValueType(), FIN
, PtrOff
);
4446 if (!MemOps
.empty())
4447 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOps
);
4452 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4453 /// adjusted to accommodate the arguments for the tailcall.
4454 static int CalculateTailCallSPDiff(SelectionDAG
& DAG
, bool isTailCall
,
4455 unsigned ParamSize
) {
4457 if (!isTailCall
) return 0;
4459 PPCFunctionInfo
*FI
= DAG
.getMachineFunction().getInfo
<PPCFunctionInfo
>();
4460 unsigned CallerMinReservedArea
= FI
->getMinReservedArea();
4461 int SPDiff
= (int)CallerMinReservedArea
- (int)ParamSize
;
4462 // Remember only if the new adjustment is bigger.
4463 if (SPDiff
< FI
->getTailCallSPDelta())
4464 FI
->setTailCallSPDelta(SPDiff
);
4469 static bool isFunctionGlobalAddress(SDValue Callee
);
4472 callsShareTOCBase(const Function
*Caller
, SDValue Callee
,
4473 const TargetMachine
&TM
) {
4474 // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4475 // don't have enough information to determine if the caller and calle share
4476 // the same TOC base, so we have to pessimistically assume they don't for
4478 GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
);
4482 const GlobalValue
*GV
= G
->getGlobal();
4483 // The medium and large code models are expected to provide a sufficiently
4484 // large TOC to provide all data addressing needs of a module with a
4485 // single TOC. Since each module will be addressed with a single TOC then we
4486 // only need to check that caller and callee don't cross dso boundaries.
4487 if (CodeModel::Medium
== TM
.getCodeModel() ||
4488 CodeModel::Large
== TM
.getCodeModel())
4489 return TM
.shouldAssumeDSOLocal(*Caller
->getParent(), GV
);
4491 // Otherwise we need to ensure callee and caller are in the same section,
4492 // since the linker may allocate multiple TOCs, and we don't know which
4493 // sections will belong to the same TOC base.
4495 if (!GV
->isStrongDefinitionForLinker())
4498 // Any explicitly-specified sections and section prefixes must also match.
4499 // Also, if we're using -ffunction-sections, then each function is always in
4500 // a different section (the same is true for COMDAT functions).
4501 if (TM
.getFunctionSections() || GV
->hasComdat() || Caller
->hasComdat() ||
4502 GV
->getSection() != Caller
->getSection())
4504 if (const auto *F
= dyn_cast
<Function
>(GV
)) {
4505 if (F
->getSectionPrefix() != Caller
->getSectionPrefix())
4509 // If the callee might be interposed, then we can't assume the ultimate call
4510 // target will be in the same section. Even in cases where we can assume that
4511 // interposition won't happen, in any case where the linker might insert a
4512 // stub to allow for interposition, we must generate code as though
4513 // interposition might occur. To understand why this matters, consider a
4514 // situation where: a -> b -> c where the arrows indicate calls. b and c are
4515 // in the same section, but a is in a different module (i.e. has a different
4516 // TOC base pointer). If the linker allows for interposition between b and c,
4517 // then it will generate a stub for the call edge between b and c which will
4518 // save the TOC pointer into the designated stack slot allocated by b. If we
4519 // return true here, and therefore allow a tail call between b and c, that
4520 // stack slot won't exist and the b -> c stub will end up saving b'c TOC base
4521 // pointer into the stack slot allocated by a (where the a -> b stub saved
4522 // a's TOC base pointer). If we're not considering a tail call, but rather,
4523 // whether a nop is needed after the call instruction in b, because the linker
4524 // will insert a stub, it might complain about a missing nop if we omit it
4525 // (although many don't complain in this case).
4526 if (!TM
.shouldAssumeDSOLocal(*Caller
->getParent(), GV
))
4533 needStackSlotPassParameters(const PPCSubtarget
&Subtarget
,
4534 const SmallVectorImpl
<ISD::OutputArg
> &Outs
) {
4535 assert(Subtarget
.is64BitELFABI());
4537 const unsigned PtrByteSize
= 8;
4538 const unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
4540 static const MCPhysReg GPR
[] = {
4541 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
4542 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
4544 static const MCPhysReg VR
[] = {
4545 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
4546 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
4549 const unsigned NumGPRs
= array_lengthof(GPR
);
4550 const unsigned NumFPRs
= 13;
4551 const unsigned NumVRs
= array_lengthof(VR
);
4552 const unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
4554 unsigned NumBytes
= LinkageSize
;
4555 unsigned AvailableFPRs
= NumFPRs
;
4556 unsigned AvailableVRs
= NumVRs
;
4558 for (const ISD::OutputArg
& Param
: Outs
) {
4559 if (Param
.Flags
.isNest()) continue;
4561 if (CalculateStackSlotUsed(Param
.VT
, Param
.ArgVT
, Param
.Flags
,
4562 PtrByteSize
, LinkageSize
, ParamAreaSize
,
4563 NumBytes
, AvailableFPRs
, AvailableVRs
,
4564 Subtarget
.hasQPX()))
4571 hasSameArgumentList(const Function
*CallerFn
, ImmutableCallSite CS
) {
4572 if (CS
.arg_size() != CallerFn
->arg_size())
4575 ImmutableCallSite::arg_iterator CalleeArgIter
= CS
.arg_begin();
4576 ImmutableCallSite::arg_iterator CalleeArgEnd
= CS
.arg_end();
4577 Function::const_arg_iterator CallerArgIter
= CallerFn
->arg_begin();
4579 for (; CalleeArgIter
!= CalleeArgEnd
; ++CalleeArgIter
, ++CallerArgIter
) {
4580 const Value
* CalleeArg
= *CalleeArgIter
;
4581 const Value
* CallerArg
= &(*CallerArgIter
);
4582 if (CalleeArg
== CallerArg
)
4585 // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4586 // tail call @callee([4 x i64] undef, [4 x i64] %b)
4588 // 1st argument of callee is undef and has the same type as caller.
4589 if (CalleeArg
->getType() == CallerArg
->getType() &&
4590 isa
<UndefValue
>(CalleeArg
))
4599 // Returns true if TCO is possible between the callers and callees
4600 // calling conventions.
4602 areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC
,
4603 CallingConv::ID CalleeCC
) {
4604 // Tail calls are possible with fastcc and ccc.
4605 auto isTailCallableCC
= [] (CallingConv::ID CC
){
4606 return CC
== CallingConv::C
|| CC
== CallingConv::Fast
;
4608 if (!isTailCallableCC(CallerCC
) || !isTailCallableCC(CalleeCC
))
4611 // We can safely tail call both fastcc and ccc callees from a c calling
4612 // convention caller. If the caller is fastcc, we may have less stack space
4613 // than a non-fastcc caller with the same signature so disable tail-calls in
4615 return CallerCC
== CallingConv::C
|| CallerCC
== CalleeCC
;
4619 PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4621 CallingConv::ID CalleeCC
,
4622 ImmutableCallSite CS
,
4624 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
4625 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
4626 SelectionDAG
& DAG
) const {
4627 bool TailCallOpt
= getTargetMachine().Options
.GuaranteedTailCallOpt
;
4629 if (DisableSCO
&& !TailCallOpt
) return false;
4631 // Variadic argument functions are not supported.
4632 if (isVarArg
) return false;
4634 auto &Caller
= DAG
.getMachineFunction().getFunction();
4635 // Check that the calling conventions are compatible for tco.
4636 if (!areCallingConvEligibleForTCO_64SVR4(Caller
.getCallingConv(), CalleeCC
))
4639 // Caller contains any byval parameter is not supported.
4640 if (any_of(Ins
, [](const ISD::InputArg
&IA
) { return IA
.Flags
.isByVal(); }))
4643 // Callee contains any byval parameter is not supported, too.
4644 // Note: This is a quick work around, because in some cases, e.g.
4645 // caller's stack size > callee's stack size, we are still able to apply
4646 // sibling call optimization. For example, gcc is able to do SCO for caller1
4647 // in the following example, but not for caller2.
4652 // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4656 // void caller1(struct test a, struct test c, struct test *b) {
4657 // callee(gTest, b); }
4658 // void caller2(struct test *b) { callee(gTest, b); }
4659 if (any_of(Outs
, [](const ISD::OutputArg
& OA
) { return OA
.Flags
.isByVal(); }))
4662 // If callee and caller use different calling conventions, we cannot pass
4663 // parameters on stack since offsets for the parameter area may be different.
4664 if (Caller
.getCallingConv() != CalleeCC
&&
4665 needStackSlotPassParameters(Subtarget
, Outs
))
4668 // No TCO/SCO on indirect call because Caller have to restore its TOC
4669 if (!isFunctionGlobalAddress(Callee
) &&
4670 !isa
<ExternalSymbolSDNode
>(Callee
))
4673 // If the caller and callee potentially have different TOC bases then we
4674 // cannot tail call since we need to restore the TOC pointer after the call.
4675 // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4676 if (!callsShareTOCBase(&Caller
, Callee
, getTargetMachine()))
4679 // TCO allows altering callee ABI, so we don't have to check further.
4680 if (CalleeCC
== CallingConv::Fast
&& TailCallOpt
)
4683 if (DisableSCO
) return false;
4685 // If callee use the same argument list that caller is using, then we can
4686 // apply SCO on this case. If it is not, then we need to check if callee needs
4687 // stack for passing arguments.
4688 if (!hasSameArgumentList(&Caller
, CS
) &&
4689 needStackSlotPassParameters(Subtarget
, Outs
)) {
4696 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4697 /// for tail call optimization. Targets which want to do tail call
4698 /// optimization should implement this function.
4700 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee
,
4701 CallingConv::ID CalleeCC
,
4703 const SmallVectorImpl
<ISD::InputArg
> &Ins
,
4704 SelectionDAG
& DAG
) const {
4705 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
4708 // Variable argument functions are not supported.
4712 MachineFunction
&MF
= DAG
.getMachineFunction();
4713 CallingConv::ID CallerCC
= MF
.getFunction().getCallingConv();
4714 if (CalleeCC
== CallingConv::Fast
&& CallerCC
== CalleeCC
) {
4715 // Functions containing by val parameters are not supported.
4716 for (unsigned i
= 0; i
!= Ins
.size(); i
++) {
4717 ISD::ArgFlagsTy Flags
= Ins
[i
].Flags
;
4718 if (Flags
.isByVal()) return false;
4721 // Non-PIC/GOT tail calls are supported.
4722 if (getTargetMachine().getRelocationModel() != Reloc::PIC_
)
4725 // At the moment we can only do local tail calls (in same module, hidden
4726 // or protected) if we are generating PIC.
4727 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
4728 return G
->getGlobal()->hasHiddenVisibility()
4729 || G
->getGlobal()->hasProtectedVisibility();
4735 /// isCallCompatibleAddress - Return the immediate to use if the specified
4736 /// 32-bit value is representable in the immediate field of a BxA instruction.
4737 static SDNode
*isBLACompatibleAddress(SDValue Op
, SelectionDAG
&DAG
) {
4738 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
);
4739 if (!C
) return nullptr;
4741 int Addr
= C
->getZExtValue();
4742 if ((Addr
& 3) != 0 || // Low 2 bits are implicitly zero.
4743 SignExtend32
<26>(Addr
) != Addr
)
4744 return nullptr; // Top 6 bits have to be sext of immediate.
4748 (int)C
->getZExtValue() >> 2, SDLoc(Op
),
4749 DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout()))
4755 struct TailCallArgumentInfo
{
4760 TailCallArgumentInfo() = default;
4763 } // end anonymous namespace
4765 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4766 static void StoreTailCallArgumentsToStackSlot(
4767 SelectionDAG
&DAG
, SDValue Chain
,
4768 const SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArgs
,
4769 SmallVectorImpl
<SDValue
> &MemOpChains
, const SDLoc
&dl
) {
4770 for (unsigned i
= 0, e
= TailCallArgs
.size(); i
!= e
; ++i
) {
4771 SDValue Arg
= TailCallArgs
[i
].Arg
;
4772 SDValue FIN
= TailCallArgs
[i
].FrameIdxOp
;
4773 int FI
= TailCallArgs
[i
].FrameIdx
;
4774 // Store relative to framepointer.
4775 MemOpChains
.push_back(DAG
.getStore(
4776 Chain
, dl
, Arg
, FIN
,
4777 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
)));
4781 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4782 /// the appropriate stack slot for the tail call optimized function call.
4783 static SDValue
EmitTailCallStoreFPAndRetAddr(SelectionDAG
&DAG
, SDValue Chain
,
4784 SDValue OldRetAddr
, SDValue OldFP
,
4785 int SPDiff
, const SDLoc
&dl
) {
4787 // Calculate the new stack slot for the return address.
4788 MachineFunction
&MF
= DAG
.getMachineFunction();
4789 const PPCSubtarget
&Subtarget
= MF
.getSubtarget
<PPCSubtarget
>();
4790 const PPCFrameLowering
*FL
= Subtarget
.getFrameLowering();
4791 bool isPPC64
= Subtarget
.isPPC64();
4792 int SlotSize
= isPPC64
? 8 : 4;
4793 int NewRetAddrLoc
= SPDiff
+ FL
->getReturnSaveOffset();
4794 int NewRetAddr
= MF
.getFrameInfo().CreateFixedObject(SlotSize
,
4795 NewRetAddrLoc
, true);
4796 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
4797 SDValue NewRetAddrFrIdx
= DAG
.getFrameIndex(NewRetAddr
, VT
);
4798 Chain
= DAG
.getStore(Chain
, dl
, OldRetAddr
, NewRetAddrFrIdx
,
4799 MachinePointerInfo::getFixedStack(MF
, NewRetAddr
));
4801 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack
4802 // slot as the FP is never overwritten.
4803 if (Subtarget
.isDarwinABI()) {
4804 int NewFPLoc
= SPDiff
+ FL
->getFramePointerSaveOffset();
4805 int NewFPIdx
= MF
.getFrameInfo().CreateFixedObject(SlotSize
, NewFPLoc
,
4807 SDValue NewFramePtrIdx
= DAG
.getFrameIndex(NewFPIdx
, VT
);
4808 Chain
= DAG
.getStore(Chain
, dl
, OldFP
, NewFramePtrIdx
,
4809 MachinePointerInfo::getFixedStack(
4810 DAG
.getMachineFunction(), NewFPIdx
));
4816 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4817 /// the position of the argument.
4819 CalculateTailCallArgDest(SelectionDAG
&DAG
, MachineFunction
&MF
, bool isPPC64
,
4820 SDValue Arg
, int SPDiff
, unsigned ArgOffset
,
4821 SmallVectorImpl
<TailCallArgumentInfo
>& TailCallArguments
) {
4822 int Offset
= ArgOffset
+ SPDiff
;
4823 uint32_t OpSize
= (Arg
.getValueSizeInBits() + 7) / 8;
4824 int FI
= MF
.getFrameInfo().CreateFixedObject(OpSize
, Offset
, true);
4825 EVT VT
= isPPC64
? MVT::i64
: MVT::i32
;
4826 SDValue FIN
= DAG
.getFrameIndex(FI
, VT
);
4827 TailCallArgumentInfo Info
;
4829 Info
.FrameIdxOp
= FIN
;
4831 TailCallArguments
.push_back(Info
);
4834 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
4835 /// stack slot. Returns the chain as result and the loaded frame pointers in
4836 /// LROpOut/FPOpout. Used when tail calling.
4837 SDValue
PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
4838 SelectionDAG
&DAG
, int SPDiff
, SDValue Chain
, SDValue
&LROpOut
,
4839 SDValue
&FPOpOut
, const SDLoc
&dl
) const {
4841 // Load the LR and FP stack slot for later adjusting.
4842 EVT VT
= Subtarget
.isPPC64() ? MVT::i64
: MVT::i32
;
4843 LROpOut
= getReturnAddrFrameIndex(DAG
);
4844 LROpOut
= DAG
.getLoad(VT
, dl
, Chain
, LROpOut
, MachinePointerInfo());
4845 Chain
= SDValue(LROpOut
.getNode(), 1);
4847 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack
4848 // slot as the FP is never overwritten.
4849 if (Subtarget
.isDarwinABI()) {
4850 FPOpOut
= getFramePointerFrameIndex(DAG
);
4851 FPOpOut
= DAG
.getLoad(VT
, dl
, Chain
, FPOpOut
, MachinePointerInfo());
4852 Chain
= SDValue(FPOpOut
.getNode(), 1);
4858 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
4859 /// by "Src" to address "Dst" of size "Size". Alignment information is
4860 /// specified by the specific parameter attribute. The copy will be passed as
4861 /// a byval function parameter.
4862 /// Sometimes what we are copying is the end of a larger object, the part that
4863 /// does not fit in registers.
4864 static SDValue
CreateCopyOfByValArgument(SDValue Src
, SDValue Dst
,
4865 SDValue Chain
, ISD::ArgFlagsTy Flags
,
4866 SelectionDAG
&DAG
, const SDLoc
&dl
) {
4867 SDValue SizeNode
= DAG
.getConstant(Flags
.getByValSize(), dl
, MVT::i32
);
4868 return DAG
.getMemcpy(Chain
, dl
, Dst
, Src
, SizeNode
, Flags
.getByValAlign(),
4869 false, false, false, MachinePointerInfo(),
4870 MachinePointerInfo());
4873 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
4875 static void LowerMemOpCallTo(
4876 SelectionDAG
&DAG
, MachineFunction
&MF
, SDValue Chain
, SDValue Arg
,
4877 SDValue PtrOff
, int SPDiff
, unsigned ArgOffset
, bool isPPC64
,
4878 bool isTailCall
, bool isVector
, SmallVectorImpl
<SDValue
> &MemOpChains
,
4879 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
, const SDLoc
&dl
) {
4880 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
4885 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
4887 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
4888 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
4889 DAG
.getConstant(ArgOffset
, dl
, PtrVT
));
4891 MemOpChains
.push_back(
4892 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
4893 // Calculate and remember argument location.
4894 } else CalculateTailCallArgDest(DAG
, MF
, isPPC64
, Arg
, SPDiff
, ArgOffset
,
4899 PrepareTailCall(SelectionDAG
&DAG
, SDValue
&InFlag
, SDValue
&Chain
,
4900 const SDLoc
&dl
, int SPDiff
, unsigned NumBytes
, SDValue LROp
,
4902 SmallVectorImpl
<TailCallArgumentInfo
> &TailCallArguments
) {
4903 // Emit a sequence of copyto/copyfrom virtual registers for arguments that
4904 // might overwrite each other in case of tail call optimization.
4905 SmallVector
<SDValue
, 8> MemOpChains2
;
4906 // Do not flag preceding copytoreg stuff together with the following stuff.
4908 StoreTailCallArgumentsToStackSlot(DAG
, Chain
, TailCallArguments
,
4910 if (!MemOpChains2
.empty())
4911 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains2
);
4913 // Store the return address to the appropriate stack slot.
4914 Chain
= EmitTailCallStoreFPAndRetAddr(DAG
, Chain
, LROp
, FPOp
, SPDiff
, dl
);
4916 // Emit callseq_end just before tailcall node.
4917 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
4918 DAG
.getIntPtrConstant(0, dl
, true), InFlag
, dl
);
4919 InFlag
= Chain
.getValue(1);
4922 // Is this global address that of a function that can be called by name? (as
4923 // opposed to something that must hold a descriptor for an indirect call).
4924 static bool isFunctionGlobalAddress(SDValue Callee
) {
4925 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(Callee
)) {
4926 if (Callee
.getOpcode() == ISD::GlobalTLSAddress
||
4927 Callee
.getOpcode() == ISD::TargetGlobalTLSAddress
)
4930 return G
->getGlobal()->getValueType()->isFunctionTy();
4937 PrepareCall(SelectionDAG
&DAG
, SDValue
&Callee
, SDValue
&InFlag
, SDValue
&Chain
,
4938 SDValue CallSeqStart
, const SDLoc
&dl
, int SPDiff
, bool isTailCall
,
4939 bool isPatchPoint
, bool hasNest
,
4940 SmallVectorImpl
<std::pair
<unsigned, SDValue
>> &RegsToPass
,
4941 SmallVectorImpl
<SDValue
> &Ops
, std::vector
<EVT
> &NodeTys
,
4942 ImmutableCallSite CS
, const PPCSubtarget
&Subtarget
) {
4943 bool isPPC64
= Subtarget
.isPPC64();
4944 bool isSVR4ABI
= Subtarget
.isSVR4ABI();
4945 bool is64BitELFv1ABI
= isPPC64
&& isSVR4ABI
&& !Subtarget
.isELFv2ABI();
4946 bool isAIXABI
= Subtarget
.isAIXABI();
4948 EVT PtrVT
= DAG
.getTargetLoweringInfo().getPointerTy(DAG
.getDataLayout());
4949 NodeTys
.push_back(MVT::Other
); // Returns a chain
4950 NodeTys
.push_back(MVT::Glue
); // Returns a flag for retval copy to use.
4952 unsigned CallOpc
= PPCISD::CALL
;
4954 bool needIndirectCall
= true;
4955 if (!isSVR4ABI
|| !isPPC64
)
4956 if (SDNode
*Dest
= isBLACompatibleAddress(Callee
, DAG
)) {
4957 // If this is an absolute destination address, use the munged value.
4958 Callee
= SDValue(Dest
, 0);
4959 needIndirectCall
= false;
4962 // PC-relative references to external symbols should go through $stub, unless
4963 // we're building with the leopard linker or later, which automatically
4964 // synthesizes these stubs.
4965 const TargetMachine
&TM
= DAG
.getTarget();
4966 const Module
*Mod
= DAG
.getMachineFunction().getFunction().getParent();
4967 const GlobalValue
*GV
= nullptr;
4968 if (auto *G
= dyn_cast
<GlobalAddressSDNode
>(Callee
))
4969 GV
= G
->getGlobal();
4970 bool Local
= TM
.shouldAssumeDSOLocal(*Mod
, GV
);
4971 bool UsePlt
= !Local
&& Subtarget
.isTargetELF() && !isPPC64
;
4973 // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
4974 // every direct call is) turn it into a TargetGlobalAddress /
4975 // TargetExternalSymbol node so that legalize doesn't hack it.
4976 if (isFunctionGlobalAddress(Callee
)) {
4977 GlobalAddressSDNode
*G
= cast
<GlobalAddressSDNode
>(Callee
);
4979 // A call to a TLS address is actually an indirect call to a
4980 // thread-specific pointer.
4981 unsigned OpFlags
= 0;
4983 OpFlags
= PPCII::MO_PLT
;
4985 Callee
= DAG
.getTargetGlobalAddress(G
->getGlobal(), dl
,
4986 Callee
.getValueType(), 0, OpFlags
);
4987 needIndirectCall
= false;
4990 if (ExternalSymbolSDNode
*S
= dyn_cast
<ExternalSymbolSDNode
>(Callee
)) {
4991 unsigned char OpFlags
= 0;
4994 OpFlags
= PPCII::MO_PLT
;
4996 Callee
= DAG
.getTargetExternalSymbol(S
->getSymbol(), Callee
.getValueType(),
4998 needIndirectCall
= false;
5002 // We'll form an invalid direct call when lowering a patchpoint; the full
5003 // sequence for an indirect call is complicated, and many of the
5004 // instructions introduced might have side effects (and, thus, can't be
5005 // removed later). The call itself will be removed as soon as the
5006 // argument/return lowering is complete, so the fact that it has the wrong
5007 // kind of operands should not really matter.
5008 needIndirectCall
= false;
5011 if (needIndirectCall
) {
5012 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair
5013 // to do the call, we can't use PPCISD::CALL.
5014 SDValue MTCTROps
[] = {Chain
, Callee
, InFlag
};
5016 if (is64BitELFv1ABI
) {
5017 // Function pointers in the 64-bit SVR4 ABI do not point to the function
5018 // entry point, but to the function descriptor (the function entry point
5019 // address is part of the function descriptor though).
5020 // The function descriptor is a three doubleword structure with the
5021 // following fields: function entry point, TOC base address and
5022 // environment pointer.
5023 // Thus for a call through a function pointer, the following actions need
5025 // 1. Save the TOC of the caller in the TOC save area of its stack
5026 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5027 // 2. Load the address of the function entry point from the function
5029 // 3. Load the TOC of the callee from the function descriptor into r2.
5030 // 4. Load the environment pointer from the function descriptor into
5032 // 5. Branch to the function entry point address.
5033 // 6. On return of the callee, the TOC of the caller needs to be
5034 // restored (this is done in FinishCall()).
5036 // The loads are scheduled at the beginning of the call sequence, and the
5037 // register copies are flagged together to ensure that no other
5038 // operations can be scheduled in between. E.g. without flagging the
5039 // copies together, a TOC access in the caller could be scheduled between
5040 // the assignment of the callee TOC and the branch to the callee, which
5041 // results in the TOC access going through the TOC of the callee instead
5042 // of going through the TOC of the caller, which leads to incorrect code.
5044 // Load the address of the function entry point from the function
5046 SDValue LDChain
= CallSeqStart
.getValue(CallSeqStart
->getNumValues()-1);
5047 if (LDChain
.getValueType() == MVT::Glue
)
5048 LDChain
= CallSeqStart
.getValue(CallSeqStart
->getNumValues()-2);
5050 auto MMOFlags
= Subtarget
.hasInvariantFunctionDescriptors()
5051 ? (MachineMemOperand::MODereferenceable
|
5052 MachineMemOperand::MOInvariant
)
5053 : MachineMemOperand::MONone
;
5055 MachinePointerInfo
MPI(CS
? CS
.getCalledValue() : nullptr);
5056 SDValue LoadFuncPtr
= DAG
.getLoad(MVT::i64
, dl
, LDChain
, Callee
, MPI
,
5057 /* Alignment = */ 8, MMOFlags
);
5059 // Load environment pointer into r11.
5060 SDValue PtrOff
= DAG
.getIntPtrConstant(16, dl
);
5061 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, Callee
, PtrOff
);
5062 SDValue LoadEnvPtr
=
5063 DAG
.getLoad(MVT::i64
, dl
, LDChain
, AddPtr
, MPI
.getWithOffset(16),
5064 /* Alignment = */ 8, MMOFlags
);
5066 SDValue TOCOff
= DAG
.getIntPtrConstant(8, dl
);
5067 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, Callee
, TOCOff
);
5069 DAG
.getLoad(MVT::i64
, dl
, LDChain
, AddTOC
, MPI
.getWithOffset(8),
5070 /* Alignment = */ 8, MMOFlags
);
5072 setUsesTOCBasePtr(DAG
);
5073 SDValue TOCVal
= DAG
.getCopyToReg(Chain
, dl
, PPC::X2
, TOCPtr
,
5075 Chain
= TOCVal
.getValue(0);
5076 InFlag
= TOCVal
.getValue(1);
5078 // If the function call has an explicit 'nest' parameter, it takes the
5079 // place of the environment pointer.
5081 SDValue EnvVal
= DAG
.getCopyToReg(Chain
, dl
, PPC::X11
, LoadEnvPtr
,
5084 Chain
= EnvVal
.getValue(0);
5085 InFlag
= EnvVal
.getValue(1);
5088 MTCTROps
[0] = Chain
;
5089 MTCTROps
[1] = LoadFuncPtr
;
5090 MTCTROps
[2] = InFlag
;
5093 Chain
= DAG
.getNode(PPCISD::MTCTR
, dl
, NodeTys
,
5094 makeArrayRef(MTCTROps
, InFlag
.getNode() ? 3 : 2));
5095 InFlag
= Chain
.getValue(1);
5098 NodeTys
.push_back(MVT::Other
);
5099 NodeTys
.push_back(MVT::Glue
);
5100 Ops
.push_back(Chain
);
5101 CallOpc
= PPCISD::BCTRL
;
5102 Callee
.setNode(nullptr);
5103 // Add use of X11 (holding environment pointer)
5104 if (is64BitELFv1ABI
&& !hasNest
)
5105 Ops
.push_back(DAG
.getRegister(PPC::X11
, PtrVT
));
5106 // Add CTR register as callee so a bctr can be emitted later.
5108 Ops
.push_back(DAG
.getRegister(isPPC64
? PPC::CTR8
: PPC::CTR
, PtrVT
));
5111 // If this is a direct call, pass the chain and the callee.
5112 if (Callee
.getNode()) {
5113 Ops
.push_back(Chain
);
5114 Ops
.push_back(Callee
);
5116 // If this is a tail call add stack pointer delta.
5118 Ops
.push_back(DAG
.getConstant(SPDiff
, dl
, MVT::i32
));
5120 // Add argument registers to the end of the list so that they are known live
5122 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
)
5123 Ops
.push_back(DAG
.getRegister(RegsToPass
[i
].first
,
5124 RegsToPass
[i
].second
.getValueType()));
5126 // All calls, in the AIX ABI and 64-bit ELF ABIs, need the TOC register
5127 // live into the call.
5128 // We do need to reserve R2/X2 to appease the verifier for the PATCHPOINT.
5129 if ((isSVR4ABI
&& isPPC64
) || isAIXABI
) {
5130 setUsesTOCBasePtr(DAG
);
5132 // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5133 // no way to mark dependencies as implicit here.
5134 // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5136 Ops
.push_back(DAG
.getRegister(isPPC64
? PPC::X2
5143 SDValue
PPCTargetLowering::LowerCallResult(
5144 SDValue Chain
, SDValue InFlag
, CallingConv::ID CallConv
, bool isVarArg
,
5145 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5146 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
) const {
5147 SmallVector
<CCValAssign
, 16> RVLocs
;
5148 CCState
CCRetInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
5151 CCRetInfo
.AnalyzeCallResult(
5152 Ins
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
5156 // Copy all of the result registers out of their specified physreg.
5157 for (unsigned i
= 0, e
= RVLocs
.size(); i
!= e
; ++i
) {
5158 CCValAssign
&VA
= RVLocs
[i
];
5159 assert(VA
.isRegLoc() && "Can only return in registers!");
5163 if (Subtarget
.hasSPE() && VA
.getLocVT() == MVT::f64
) {
5164 SDValue Lo
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
5166 Chain
= Lo
.getValue(1);
5167 InFlag
= Lo
.getValue(2);
5168 VA
= RVLocs
[++i
]; // skip ahead to next loc
5169 SDValue Hi
= DAG
.getCopyFromReg(Chain
, dl
, VA
.getLocReg(), MVT::i32
,
5171 Chain
= Hi
.getValue(1);
5172 InFlag
= Hi
.getValue(2);
5173 if (!Subtarget
.isLittleEndian())
5175 Val
= DAG
.getNode(PPCISD::BUILD_SPE64
, dl
, MVT::f64
, Lo
, Hi
);
5177 Val
= DAG
.getCopyFromReg(Chain
, dl
,
5178 VA
.getLocReg(), VA
.getLocVT(), InFlag
);
5179 Chain
= Val
.getValue(1);
5180 InFlag
= Val
.getValue(2);
5183 switch (VA
.getLocInfo()) {
5184 default: llvm_unreachable("Unknown loc info!");
5185 case CCValAssign::Full
: break;
5186 case CCValAssign::AExt
:
5187 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5189 case CCValAssign::ZExt
:
5190 Val
= DAG
.getNode(ISD::AssertZext
, dl
, VA
.getLocVT(), Val
,
5191 DAG
.getValueType(VA
.getValVT()));
5192 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5194 case CCValAssign::SExt
:
5195 Val
= DAG
.getNode(ISD::AssertSext
, dl
, VA
.getLocVT(), Val
,
5196 DAG
.getValueType(VA
.getValVT()));
5197 Val
= DAG
.getNode(ISD::TRUNCATE
, dl
, VA
.getValVT(), Val
);
5201 InVals
.push_back(Val
);
5207 SDValue
PPCTargetLowering::FinishCall(
5208 CallingConv::ID CallConv
, const SDLoc
&dl
, bool isTailCall
, bool isVarArg
,
5209 bool isPatchPoint
, bool hasNest
, SelectionDAG
&DAG
,
5210 SmallVector
<std::pair
<unsigned, SDValue
>, 8> &RegsToPass
, SDValue InFlag
,
5211 SDValue Chain
, SDValue CallSeqStart
, SDValue
&Callee
, int SPDiff
,
5212 unsigned NumBytes
, const SmallVectorImpl
<ISD::InputArg
> &Ins
,
5213 SmallVectorImpl
<SDValue
> &InVals
, ImmutableCallSite CS
) const {
5214 std::vector
<EVT
> NodeTys
;
5215 SmallVector
<SDValue
, 8> Ops
;
5216 unsigned CallOpc
= PrepareCall(DAG
, Callee
, InFlag
, Chain
, CallSeqStart
, dl
,
5217 SPDiff
, isTailCall
, isPatchPoint
, hasNest
,
5218 RegsToPass
, Ops
, NodeTys
, CS
, Subtarget
);
5220 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5221 if (isVarArg
&& Subtarget
.isSVR4ABI() && !Subtarget
.isPPC64())
5222 Ops
.push_back(DAG
.getRegister(PPC::CR1EQ
, MVT::i32
));
5224 // When performing tail call optimization the callee pops its arguments off
5225 // the stack. Account for this here so these bytes can be pushed back on in
5226 // PPCFrameLowering::eliminateCallFramePseudoInstr.
5227 int BytesCalleePops
=
5228 (CallConv
== CallingConv::Fast
&&
5229 getTargetMachine().Options
.GuaranteedTailCallOpt
) ? NumBytes
: 0;
5231 // Add a register mask operand representing the call-preserved registers.
5232 const TargetRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
5233 const uint32_t *Mask
=
5234 TRI
->getCallPreservedMask(DAG
.getMachineFunction(), CallConv
);
5235 assert(Mask
&& "Missing call preserved mask for calling convention");
5236 Ops
.push_back(DAG
.getRegisterMask(Mask
));
5238 if (InFlag
.getNode())
5239 Ops
.push_back(InFlag
);
5243 assert(((Callee
.getOpcode() == ISD::Register
&&
5244 cast
<RegisterSDNode
>(Callee
)->getReg() == PPC::CTR
) ||
5245 Callee
.getOpcode() == ISD::TargetExternalSymbol
||
5246 Callee
.getOpcode() == ISD::TargetGlobalAddress
||
5247 isa
<ConstantSDNode
>(Callee
)) &&
5248 "Expecting an global address, external symbol, absolute value or register");
5250 DAG
.getMachineFunction().getFrameInfo().setHasTailCall();
5251 return DAG
.getNode(PPCISD::TC_RETURN
, dl
, MVT::Other
, Ops
);
5254 // Add a NOP immediately after the branch instruction when using the 64-bit
5255 // SVR4 or the AIX ABI.
5256 // At link time, if caller and callee are in a different module and
5257 // thus have a different TOC, the call will be replaced with a call to a stub
5258 // function which saves the current TOC, loads the TOC of the callee and
5259 // branches to the callee. The NOP will be replaced with a load instruction
5260 // which restores the TOC of the caller from the TOC save slot of the current
5261 // stack frame. If caller and callee belong to the same module (and have the
5262 // same TOC), the NOP will remain unchanged, or become some other NOP.
5264 MachineFunction
&MF
= DAG
.getMachineFunction();
5265 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5266 if (!isTailCall
&& !isPatchPoint
&&
5267 ((Subtarget
.isSVR4ABI() && Subtarget
.isPPC64()) ||
5268 Subtarget
.isAIXABI())) {
5269 if (CallOpc
== PPCISD::BCTRL
) {
5270 if (Subtarget
.isAIXABI())
5271 report_fatal_error("Indirect call on AIX is not implemented.");
5273 // This is a call through a function pointer.
5274 // Restore the caller TOC from the save area into R2.
5275 // See PrepareCall() for more information about calls through function
5276 // pointers in the 64-bit SVR4 ABI.
5277 // We are using a target-specific load with r2 hard coded, because the
5278 // result of a target-independent load would never go directly into r2,
5279 // since r2 is a reserved register (which prevents the register allocator
5280 // from allocating it), resulting in an additional register being
5281 // allocated and an unnecessary move instruction being generated.
5282 CallOpc
= PPCISD::BCTRL_LOAD_TOC
;
5284 SDValue StackPtr
= DAG
.getRegister(PPC::X1
, PtrVT
);
5285 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
5286 SDValue TOCOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
5287 SDValue AddTOC
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
, StackPtr
, TOCOff
);
5289 // The address needs to go after the chain input but before the flag (or
5290 // any other variadic arguments).
5291 Ops
.insert(std::next(Ops
.begin()), AddTOC
);
5292 } else if (CallOpc
== PPCISD::CALL
&&
5293 !callsShareTOCBase(&MF
.getFunction(), Callee
, DAG
.getTarget())) {
5294 // Otherwise insert NOP for non-local calls.
5295 CallOpc
= PPCISD::CALL_NOP
;
5299 if (Subtarget
.isAIXABI() && isFunctionGlobalAddress(Callee
)) {
5300 // On AIX, direct function calls reference the symbol for the function's
5301 // entry point, which is named by inserting a "." before the function's
5303 GlobalAddressSDNode
*G
= cast
<GlobalAddressSDNode
>(Callee
);
5304 auto &Context
= DAG
.getMachineFunction().getMMI().getContext();
5305 MCSymbol
*S
= Context
.getOrCreateSymbol(Twine(".") +
5306 Twine(G
->getGlobal()->getName()));
5307 Callee
= DAG
.getMCSymbol(S
, PtrVT
);
5308 // Replace the GlobalAddressSDNode Callee with the MCSymbolSDNode.
5312 Chain
= DAG
.getNode(CallOpc
, dl
, NodeTys
, Ops
);
5313 InFlag
= Chain
.getValue(1);
5315 Chain
= DAG
.getCALLSEQ_END(Chain
, DAG
.getIntPtrConstant(NumBytes
, dl
, true),
5316 DAG
.getIntPtrConstant(BytesCalleePops
, dl
, true),
5319 InFlag
= Chain
.getValue(1);
5321 return LowerCallResult(Chain
, InFlag
, CallConv
, isVarArg
,
5322 Ins
, dl
, DAG
, InVals
);
5326 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo
&CLI
,
5327 SmallVectorImpl
<SDValue
> &InVals
) const {
5328 SelectionDAG
&DAG
= CLI
.DAG
;
5330 SmallVectorImpl
<ISD::OutputArg
> &Outs
= CLI
.Outs
;
5331 SmallVectorImpl
<SDValue
> &OutVals
= CLI
.OutVals
;
5332 SmallVectorImpl
<ISD::InputArg
> &Ins
= CLI
.Ins
;
5333 SDValue Chain
= CLI
.Chain
;
5334 SDValue Callee
= CLI
.Callee
;
5335 bool &isTailCall
= CLI
.IsTailCall
;
5336 CallingConv::ID CallConv
= CLI
.CallConv
;
5337 bool isVarArg
= CLI
.IsVarArg
;
5338 bool isPatchPoint
= CLI
.IsPatchPoint
;
5339 ImmutableCallSite CS
= CLI
.CS
;
5342 if (Subtarget
.useLongCalls() && !(CS
&& CS
.isMustTailCall()))
5344 else if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64())
5346 IsEligibleForTailCallOptimization_64SVR4(Callee
, CallConv
, CS
,
5347 isVarArg
, Outs
, Ins
, DAG
);
5349 isTailCall
= IsEligibleForTailCallOptimization(Callee
, CallConv
, isVarArg
,
5353 if (!getTargetMachine().Options
.GuaranteedTailCallOpt
)
5356 assert(isa
<GlobalAddressSDNode
>(Callee
) &&
5357 "Callee should be an llvm::Function object.");
5359 const GlobalValue
*GV
=
5360 cast
<GlobalAddressSDNode
>(Callee
)->getGlobal();
5361 const unsigned Width
=
5362 80 - strlen("TCO caller: ") - strlen(", callee linkage: 0, 0");
5363 dbgs() << "TCO caller: "
5364 << left_justify(DAG
.getMachineFunction().getName(), Width
)
5365 << ", callee linkage: " << GV
->getVisibility() << ", "
5366 << GV
->getLinkage() << "\n");
5370 if (!isTailCall
&& CS
&& CS
.isMustTailCall())
5371 report_fatal_error("failed to perform tail call elimination on a call "
5372 "site marked musttail");
5374 // When long calls (i.e. indirect calls) are always used, calls are always
5375 // made via function pointer. If we have a function name, first translate it
5377 if (Subtarget
.useLongCalls() && isa
<GlobalAddressSDNode
>(Callee
) &&
5379 Callee
= LowerGlobalAddress(Callee
, DAG
);
5381 if (Subtarget
.isSVR4ABI() && Subtarget
.isPPC64())
5382 return LowerCall_64SVR4(Chain
, Callee
, CallConv
, isVarArg
,
5383 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5384 dl
, DAG
, InVals
, CS
);
5386 if (Subtarget
.isSVR4ABI())
5387 return LowerCall_32SVR4(Chain
, Callee
, CallConv
, isVarArg
,
5388 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5389 dl
, DAG
, InVals
, CS
);
5391 if (Subtarget
.isAIXABI())
5392 return LowerCall_AIX(Chain
, Callee
, CallConv
, isVarArg
,
5393 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5394 dl
, DAG
, InVals
, CS
);
5396 return LowerCall_Darwin(Chain
, Callee
, CallConv
, isVarArg
,
5397 isTailCall
, isPatchPoint
, Outs
, OutVals
, Ins
,
5398 dl
, DAG
, InVals
, CS
);
5401 SDValue
PPCTargetLowering::LowerCall_32SVR4(
5402 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
5403 bool isTailCall
, bool isPatchPoint
,
5404 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5405 const SmallVectorImpl
<SDValue
> &OutVals
,
5406 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5407 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
5408 ImmutableCallSite CS
) const {
5409 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5410 // of the 32-bit SVR4 ABI stack frame layout.
5412 assert((CallConv
== CallingConv::C
||
5413 CallConv
== CallingConv::Cold
||
5414 CallConv
== CallingConv::Fast
) && "Unknown calling convention!");
5416 unsigned PtrByteSize
= 4;
5418 MachineFunction
&MF
= DAG
.getMachineFunction();
5420 // Mark this function as potentially containing a function that contains a
5421 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5422 // and restoring the callers stack pointer in this functions epilog. This is
5423 // done because by tail calling the called function might overwrite the value
5424 // in this function's (MF) stack pointer stack slot 0(SP).
5425 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5426 CallConv
== CallingConv::Fast
)
5427 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
5429 // Count how many bytes are to be pushed on the stack, including the linkage
5430 // area, parameter list area and the part of the local variable space which
5431 // contains copies of aggregates which are passed by value.
5433 // Assign locations to all of the outgoing arguments.
5434 SmallVector
<CCValAssign
, 16> ArgLocs
;
5435 PPCCCState
CCInfo(CallConv
, isVarArg
, MF
, ArgLocs
, *DAG
.getContext());
5437 // Reserve space for the linkage area on the stack.
5438 CCInfo
.AllocateStack(Subtarget
.getFrameLowering()->getLinkageSize(),
5441 CCInfo
.PreAnalyzeCallOperands(Outs
);
5444 // Handle fixed and variable vector arguments differently.
5445 // Fixed vector arguments go into registers as long as registers are
5446 // available. Variable vector arguments always go into memory.
5447 unsigned NumArgs
= Outs
.size();
5449 for (unsigned i
= 0; i
!= NumArgs
; ++i
) {
5450 MVT ArgVT
= Outs
[i
].VT
;
5451 ISD::ArgFlagsTy ArgFlags
= Outs
[i
].Flags
;
5454 if (Outs
[i
].IsFixed
) {
5455 Result
= CC_PPC32_SVR4(i
, ArgVT
, ArgVT
, CCValAssign::Full
, ArgFlags
,
5458 Result
= CC_PPC32_SVR4_VarArg(i
, ArgVT
, ArgVT
, CCValAssign::Full
,
5464 errs() << "Call operand #" << i
<< " has unhandled type "
5465 << EVT(ArgVT
).getEVTString() << "\n";
5467 llvm_unreachable(nullptr);
5471 // All arguments are treated the same.
5472 CCInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4
);
5474 CCInfo
.clearWasPPCF128();
5476 // Assign locations to all of the outgoing aggregate by value arguments.
5477 SmallVector
<CCValAssign
, 16> ByValArgLocs
;
5478 CCState
CCByValInfo(CallConv
, isVarArg
, MF
, ByValArgLocs
, *DAG
.getContext());
5480 // Reserve stack space for the allocations in CCInfo.
5481 CCByValInfo
.AllocateStack(CCInfo
.getNextStackOffset(), PtrByteSize
);
5483 CCByValInfo
.AnalyzeCallOperands(Outs
, CC_PPC32_SVR4_ByVal
);
5485 // Size of the linkage area, parameter list area and the part of the local
5486 // space variable where copies of aggregates which are passed by value are
5488 unsigned NumBytes
= CCByValInfo
.getNextStackOffset();
5490 // Calculate by how many bytes the stack has to be adjusted in case of tail
5491 // call optimization.
5492 int SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
5494 // Adjust the stack pointer for the new arguments...
5495 // These operations are automatically eliminated by the prolog/epilog pass
5496 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
5497 SDValue CallSeqStart
= Chain
;
5499 // Load the return address and frame pointer so it can be moved somewhere else
5502 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
5504 // Set up a copy of the stack pointer for use loading and storing any
5505 // arguments that may not fit in the registers available for argument
5507 SDValue StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
5509 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
5510 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
5511 SmallVector
<SDValue
, 8> MemOpChains
;
5513 bool seenFloatArg
= false;
5514 // Walk the register/memloc assignments, inserting copies/loads.
5515 // i - Tracks the index into the list of registers allocated for the call
5516 // RealArgIdx - Tracks the index into the list of actual function arguments
5517 // j - Tracks the index into the list of byval arguments
5518 for (unsigned i
= 0, RealArgIdx
= 0, j
= 0, e
= ArgLocs
.size();
5520 ++i
, ++RealArgIdx
) {
5521 CCValAssign
&VA
= ArgLocs
[i
];
5522 SDValue Arg
= OutVals
[RealArgIdx
];
5523 ISD::ArgFlagsTy Flags
= Outs
[RealArgIdx
].Flags
;
5525 if (Flags
.isByVal()) {
5526 // Argument is an aggregate which is passed by value, thus we need to
5527 // create a copy of it in the local variable space of the current stack
5528 // frame (which is the stack frame of the caller) and pass the address of
5529 // this copy to the callee.
5530 assert((j
< ByValArgLocs
.size()) && "Index out of bounds!");
5531 CCValAssign
&ByValVA
= ByValArgLocs
[j
++];
5532 assert((VA
.getValNo() == ByValVA
.getValNo()) && "ValNo mismatch!");
5534 // Memory reserved in the local variable space of the callers stack frame.
5535 unsigned LocMemOffset
= ByValVA
.getLocMemOffset();
5537 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
5538 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
5541 // Create a copy of the argument in the local area of the current
5543 SDValue MemcpyCall
=
5544 CreateCopyOfByValArgument(Arg
, PtrOff
,
5545 CallSeqStart
.getNode()->getOperand(0),
5548 // This must go outside the CALLSEQ_START..END.
5549 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, NumBytes
, 0,
5551 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
5552 NewCallSeqStart
.getNode());
5553 Chain
= CallSeqStart
= NewCallSeqStart
;
5555 // Pass the address of the aggregate copy on the stack either in a
5556 // physical register or in the parameter list area of the current stack
5557 // frame to the callee.
5561 // When useCRBits() is true, there can be i1 arguments.
5562 // It is because getRegisterType(MVT::i1) => MVT::i1,
5563 // and for other integer types getRegisterType() => MVT::i32.
5564 // Extend i1 and ensure callee will get i32.
5565 if (Arg
.getValueType() == MVT::i1
)
5566 Arg
= DAG
.getNode(Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
,
5569 if (VA
.isRegLoc()) {
5570 seenFloatArg
|= VA
.getLocVT().isFloatingPoint();
5571 // Put argument in a physical register.
5572 if (Subtarget
.hasSPE() && Arg
.getValueType() == MVT::f64
) {
5573 bool IsLE
= Subtarget
.isLittleEndian();
5574 SDValue SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
5575 DAG
.getIntPtrConstant(IsLE
? 0 : 1, dl
));
5576 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), SVal
.getValue(0)));
5577 SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
5578 DAG
.getIntPtrConstant(IsLE
? 1 : 0, dl
));
5579 RegsToPass
.push_back(std::make_pair(ArgLocs
[++i
].getLocReg(),
5582 RegsToPass
.push_back(std::make_pair(VA
.getLocReg(), Arg
));
5584 // Put argument in the parameter list area of the current stack frame.
5585 assert(VA
.isMemLoc());
5586 unsigned LocMemOffset
= VA
.getLocMemOffset();
5589 SDValue PtrOff
= DAG
.getIntPtrConstant(LocMemOffset
, dl
);
5590 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, getPointerTy(MF
.getDataLayout()),
5593 MemOpChains
.push_back(
5594 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo()));
5596 // Calculate and remember argument location.
5597 CalculateTailCallArgDest(DAG
, MF
, false, Arg
, SPDiff
, LocMemOffset
,
5603 if (!MemOpChains
.empty())
5604 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
5606 // Build a sequence of copy-to-reg nodes chained together with token chain
5607 // and flag operands which copy the outgoing args into the appropriate regs.
5609 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
5610 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
5611 RegsToPass
[i
].second
, InFlag
);
5612 InFlag
= Chain
.getValue(1);
5615 // Set CR bit 6 to true if this is a vararg call with floating args passed in
5618 SDVTList VTs
= DAG
.getVTList(MVT::Other
, MVT::Glue
);
5619 SDValue Ops
[] = { Chain
, InFlag
};
5621 Chain
= DAG
.getNode(seenFloatArg
? PPCISD::CR6SET
: PPCISD::CR6UNSET
,
5622 dl
, VTs
, makeArrayRef(Ops
, InFlag
.getNode() ? 2 : 1));
5624 InFlag
= Chain
.getValue(1);
5628 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
5631 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
5632 /* unused except on PPC64 ELFv1 */ false, DAG
,
5633 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
5634 NumBytes
, Ins
, InVals
, CS
);
5637 // Copy an argument into memory, being careful to do this outside the
5638 // call sequence for the call to which the argument belongs.
5639 SDValue
PPCTargetLowering::createMemcpyOutsideCallSeq(
5640 SDValue Arg
, SDValue PtrOff
, SDValue CallSeqStart
, ISD::ArgFlagsTy Flags
,
5641 SelectionDAG
&DAG
, const SDLoc
&dl
) const {
5642 SDValue MemcpyCall
= CreateCopyOfByValArgument(Arg
, PtrOff
,
5643 CallSeqStart
.getNode()->getOperand(0),
5645 // The MEMCPY must go outside the CALLSEQ_START..END.
5646 int64_t FrameSize
= CallSeqStart
.getConstantOperandVal(1);
5647 SDValue NewCallSeqStart
= DAG
.getCALLSEQ_START(MemcpyCall
, FrameSize
, 0,
5649 DAG
.ReplaceAllUsesWith(CallSeqStart
.getNode(),
5650 NewCallSeqStart
.getNode());
5651 return NewCallSeqStart
;
5654 SDValue
PPCTargetLowering::LowerCall_64SVR4(
5655 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
5656 bool isTailCall
, bool isPatchPoint
,
5657 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
5658 const SmallVectorImpl
<SDValue
> &OutVals
,
5659 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
5660 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
5661 ImmutableCallSite CS
) const {
5662 bool isELFv2ABI
= Subtarget
.isELFv2ABI();
5663 bool isLittleEndian
= Subtarget
.isLittleEndian();
5664 unsigned NumOps
= Outs
.size();
5665 bool hasNest
= false;
5666 bool IsSibCall
= false;
5668 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
5669 unsigned PtrByteSize
= 8;
5671 MachineFunction
&MF
= DAG
.getMachineFunction();
5673 if (isTailCall
&& !getTargetMachine().Options
.GuaranteedTailCallOpt
)
5676 // Mark this function as potentially containing a function that contains a
5677 // tail call. As a consequence the frame pointer will be used for dynamicalloc
5678 // and restoring the callers stack pointer in this functions epilog. This is
5679 // done because by tail calling the called function might overwrite the value
5680 // in this function's (MF) stack pointer stack slot 0(SP).
5681 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5682 CallConv
== CallingConv::Fast
)
5683 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
5685 assert(!(CallConv
== CallingConv::Fast
&& isVarArg
) &&
5686 "fastcc not supported on varargs functions");
5688 // Count how many bytes are to be pushed on the stack, including the linkage
5689 // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5690 // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5691 // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5692 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
5693 unsigned NumBytes
= LinkageSize
;
5694 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
5695 unsigned &QFPR_idx
= FPR_idx
;
5697 static const MCPhysReg GPR
[] = {
5698 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
5699 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
5701 static const MCPhysReg VR
[] = {
5702 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
5703 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
5706 const unsigned NumGPRs
= array_lengthof(GPR
);
5707 const unsigned NumFPRs
= useSoftFloat() ? 0 : 13;
5708 const unsigned NumVRs
= array_lengthof(VR
);
5709 const unsigned NumQFPRs
= NumFPRs
;
5711 // On ELFv2, we can avoid allocating the parameter area if all the arguments
5712 // can be passed to the callee in registers.
5713 // For the fast calling convention, there is another check below.
5714 // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5715 bool HasParameterArea
= !isELFv2ABI
|| isVarArg
|| CallConv
== CallingConv::Fast
;
5716 if (!HasParameterArea
) {
5717 unsigned ParamAreaSize
= NumGPRs
* PtrByteSize
;
5718 unsigned AvailableFPRs
= NumFPRs
;
5719 unsigned AvailableVRs
= NumVRs
;
5720 unsigned NumBytesTmp
= NumBytes
;
5721 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5722 if (Outs
[i
].Flags
.isNest()) continue;
5723 if (CalculateStackSlotUsed(Outs
[i
].VT
, Outs
[i
].ArgVT
, Outs
[i
].Flags
,
5724 PtrByteSize
, LinkageSize
, ParamAreaSize
,
5725 NumBytesTmp
, AvailableFPRs
, AvailableVRs
,
5726 Subtarget
.hasQPX()))
5727 HasParameterArea
= true;
5731 // When using the fast calling convention, we don't provide backing for
5732 // arguments that will be in registers.
5733 unsigned NumGPRsUsed
= 0, NumFPRsUsed
= 0, NumVRsUsed
= 0;
5735 // Avoid allocating parameter area for fastcc functions if all the arguments
5736 // can be passed in the registers.
5737 if (CallConv
== CallingConv::Fast
)
5738 HasParameterArea
= false;
5740 // Add up all the space actually used.
5741 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5742 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5743 EVT ArgVT
= Outs
[i
].VT
;
5744 EVT OrigVT
= Outs
[i
].ArgVT
;
5749 if (CallConv
== CallingConv::Fast
) {
5750 if (Flags
.isByVal()) {
5751 NumGPRsUsed
+= (Flags
.getByValSize()+7)/8;
5752 if (NumGPRsUsed
> NumGPRs
)
5753 HasParameterArea
= true;
5755 switch (ArgVT
.getSimpleVT().SimpleTy
) {
5756 default: llvm_unreachable("Unexpected ValueType for argument!");
5760 if (++NumGPRsUsed
<= NumGPRs
)
5770 if (++NumVRsUsed
<= NumVRs
)
5774 // When using QPX, this is handled like a FP register, otherwise, it
5775 // is an Altivec register.
5776 if (Subtarget
.hasQPX()) {
5777 if (++NumFPRsUsed
<= NumFPRs
)
5780 if (++NumVRsUsed
<= NumVRs
)
5786 case MVT::v4f64
: // QPX
5787 case MVT::v4i1
: // QPX
5788 if (++NumFPRsUsed
<= NumFPRs
)
5792 HasParameterArea
= true;
5796 /* Respect alignment of argument on the stack. */
5798 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
5799 NumBytes
= ((NumBytes
+ Align
- 1) / Align
) * Align
;
5801 NumBytes
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
5802 if (Flags
.isInConsecutiveRegsLast())
5803 NumBytes
= ((NumBytes
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
5806 unsigned NumBytesActuallyUsed
= NumBytes
;
5808 // In the old ELFv1 ABI,
5809 // the prolog code of the callee may store up to 8 GPR argument registers to
5810 // the stack, allowing va_start to index over them in memory if its varargs.
5811 // Because we cannot tell if this is needed on the caller side, we have to
5812 // conservatively assume that it is needed. As such, make sure we have at
5813 // least enough stack space for the caller to store the 8 GPRs.
5814 // In the ELFv2 ABI, we allocate the parameter area iff a callee
5815 // really requires memory operands, e.g. a vararg function.
5816 if (HasParameterArea
)
5817 NumBytes
= std::max(NumBytes
, LinkageSize
+ 8 * PtrByteSize
);
5819 NumBytes
= LinkageSize
;
5821 // Tail call needs the stack to be aligned.
5822 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
5823 CallConv
== CallingConv::Fast
)
5824 NumBytes
= EnsureStackAlignment(Subtarget
.getFrameLowering(), NumBytes
);
5828 // Calculate by how many bytes the stack has to be adjusted in case of tail
5829 // call optimization.
5831 SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
5833 // To protect arguments on the stack from being clobbered in a tail call,
5834 // force all the loads to happen before doing any other lowering.
5836 Chain
= DAG
.getStackArgumentTokenFactor(Chain
);
5838 // Adjust the stack pointer for the new arguments...
5839 // These operations are automatically eliminated by the prolog/epilog pass
5841 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
5842 SDValue CallSeqStart
= Chain
;
5844 // Load the return address and frame pointer so it can be move somewhere else
5847 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
5849 // Set up a copy of the stack pointer for use loading and storing any
5850 // arguments that may not fit in the registers available for argument
5852 SDValue StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
5854 // Figure out which arguments are going to go in registers, and which in
5855 // memory. Also, if this is a vararg function, floating point operations
5856 // must be stored to our stack, and loaded into integer regs as well, if
5857 // any integer regs are available for argument passing.
5858 unsigned ArgOffset
= LinkageSize
;
5860 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
5861 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
5863 SmallVector
<SDValue
, 8> MemOpChains
;
5864 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
5865 SDValue Arg
= OutVals
[i
];
5866 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
5867 EVT ArgVT
= Outs
[i
].VT
;
5868 EVT OrigVT
= Outs
[i
].ArgVT
;
5870 // PtrOff will be used to store the current argument to the stack if a
5871 // register cannot be found for it.
5874 // We re-align the argument offset for each argument, except when using the
5875 // fast calling convention, when we need to make sure we do that only when
5876 // we'll actually use a stack slot.
5877 auto ComputePtrOff
= [&]() {
5878 /* Respect alignment of argument on the stack. */
5880 CalculateStackSlotAlignment(ArgVT
, OrigVT
, Flags
, PtrByteSize
);
5881 ArgOffset
= ((ArgOffset
+ Align
- 1) / Align
) * Align
;
5883 PtrOff
= DAG
.getConstant(ArgOffset
, dl
, StackPtr
.getValueType());
5885 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
5888 if (CallConv
!= CallingConv::Fast
) {
5891 /* Compute GPR index associated with argument offset. */
5892 GPR_idx
= (ArgOffset
- LinkageSize
) / PtrByteSize
;
5893 GPR_idx
= std::min(GPR_idx
, NumGPRs
);
5896 // Promote integers to 64-bit values.
5897 if (Arg
.getValueType() == MVT::i32
|| Arg
.getValueType() == MVT::i1
) {
5898 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
5899 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
5900 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
5903 // FIXME memcpy is used way more than necessary. Correctness first.
5904 // Note: "by value" is code for passing a structure by value, not
5906 if (Flags
.isByVal()) {
5907 // Note: Size includes alignment padding, so
5908 // struct x { short a; char b; }
5909 // will have Size = 4. With #pragma pack(1), it will have Size = 3.
5910 // These are the proper values we need for right-justifying the
5911 // aggregate in a parameter register.
5912 unsigned Size
= Flags
.getByValSize();
5914 // An empty aggregate parameter takes up no storage and no
5919 if (CallConv
== CallingConv::Fast
)
5922 // All aggregates smaller than 8 bytes must be passed right-justified.
5923 if (Size
==1 || Size
==2 || Size
==4) {
5924 EVT VT
= (Size
==1) ? MVT::i8
: ((Size
==2) ? MVT::i16
: MVT::i32
);
5925 if (GPR_idx
!= NumGPRs
) {
5926 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
5927 MachinePointerInfo(), VT
);
5928 MemOpChains
.push_back(Load
.getValue(1));
5929 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
5931 ArgOffset
+= PtrByteSize
;
5936 if (GPR_idx
== NumGPRs
&& Size
< 8) {
5937 SDValue AddPtr
= PtrOff
;
5938 if (!isLittleEndian
) {
5939 SDValue Const
= DAG
.getConstant(PtrByteSize
- Size
, dl
,
5940 PtrOff
.getValueType());
5941 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
5943 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
5946 ArgOffset
+= PtrByteSize
;
5949 // Copy entire object into memory. There are cases where gcc-generated
5950 // code assumes it is there, even if it could be put entirely into
5951 // registers. (This is not what the doc says.)
5953 // FIXME: The above statement is likely due to a misunderstanding of the
5954 // documents. All arguments must be copied into the parameter area BY
5955 // THE CALLEE in the event that the callee takes the address of any
5956 // formal argument. That has not yet been implemented. However, it is
5957 // reasonable to use the stack area as a staging area for the register
5960 // Skip this for small aggregates, as we will use the same slot for a
5961 // right-justified copy, below.
5963 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, PtrOff
,
5967 // When a register is available, pass a small aggregate right-justified.
5968 if (Size
< 8 && GPR_idx
!= NumGPRs
) {
5969 // The easiest way to get this right-justified in a register
5970 // is to copy the structure into the rightmost portion of a
5971 // local variable slot, then load the whole slot into the
5973 // FIXME: The memcpy seems to produce pretty awful code for
5974 // small aggregates, particularly for packed ones.
5975 // FIXME: It would be preferable to use the slot in the
5976 // parameter save area instead of a new local variable.
5977 SDValue AddPtr
= PtrOff
;
5978 if (!isLittleEndian
) {
5979 SDValue Const
= DAG
.getConstant(8 - Size
, dl
, PtrOff
.getValueType());
5980 AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
5982 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
5986 // Load the slot into the register.
5988 DAG
.getLoad(PtrVT
, dl
, Chain
, PtrOff
, MachinePointerInfo());
5989 MemOpChains
.push_back(Load
.getValue(1));
5990 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
5992 // Done with this argument.
5993 ArgOffset
+= PtrByteSize
;
5997 // For aggregates larger than PtrByteSize, copy the pieces of the
5998 // object that fit into registers from the parameter save area.
5999 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
6000 SDValue Const
= DAG
.getConstant(j
, dl
, PtrOff
.getValueType());
6001 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
6002 if (GPR_idx
!= NumGPRs
) {
6004 DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
, MachinePointerInfo());
6005 MemOpChains
.push_back(Load
.getValue(1));
6006 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6007 ArgOffset
+= PtrByteSize
;
6009 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
6016 switch (Arg
.getSimpleValueType().SimpleTy
) {
6017 default: llvm_unreachable("Unexpected ValueType for argument!");
6021 if (Flags
.isNest()) {
6022 // The 'nest' parameter, if any, is passed in R11.
6023 RegsToPass
.push_back(std::make_pair(PPC::X11
, Arg
));
6028 // These can be scalar arguments or elements of an integer array type
6029 // passed directly. Clang may use those instead of "byval" aggregate
6030 // types to avoid forcing arguments to memory unnecessarily.
6031 if (GPR_idx
!= NumGPRs
) {
6032 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6034 if (CallConv
== CallingConv::Fast
)
6037 assert(HasParameterArea
&&
6038 "Parameter area must exist to pass an argument in memory.");
6039 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6040 true, isTailCall
, false, MemOpChains
,
6041 TailCallArguments
, dl
);
6042 if (CallConv
== CallingConv::Fast
)
6043 ArgOffset
+= PtrByteSize
;
6045 if (CallConv
!= CallingConv::Fast
)
6046 ArgOffset
+= PtrByteSize
;
6050 // These can be scalar arguments or elements of a float array type
6051 // passed directly. The latter are used to implement ELFv2 homogenous
6052 // float aggregates.
6054 // Named arguments go into FPRs first, and once they overflow, the
6055 // remaining arguments go into GPRs and then the parameter save area.
6056 // Unnamed arguments for vararg functions always go to GPRs and
6057 // then the parameter save area. For now, put all arguments to vararg
6058 // routines always in both locations (FPR *and* GPR or stack slot).
6059 bool NeedGPROrStack
= isVarArg
|| FPR_idx
== NumFPRs
;
6060 bool NeededLoad
= false;
6062 // First load the argument into the next available FPR.
6063 if (FPR_idx
!= NumFPRs
)
6064 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6066 // Next, load the argument into GPR or stack slot if needed.
6067 if (!NeedGPROrStack
)
6069 else if (GPR_idx
!= NumGPRs
&& CallConv
!= CallingConv::Fast
) {
6070 // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6071 // once we support fp <-> gpr moves.
6073 // In the non-vararg case, this can only ever happen in the
6074 // presence of f32 array types, since otherwise we never run
6075 // out of FPRs before running out of GPRs.
6078 // Double values are always passed in a single GPR.
6079 if (Arg
.getValueType() != MVT::f32
) {
6080 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i64
, Arg
);
6082 // Non-array float values are extended and passed in a GPR.
6083 } else if (!Flags
.isInConsecutiveRegs()) {
6084 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6085 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
6087 // If we have an array of floats, we collect every odd element
6088 // together with its predecessor into one GPR.
6089 } else if (ArgOffset
% PtrByteSize
!= 0) {
6091 Lo
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, OutVals
[i
- 1]);
6092 Hi
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6093 if (!isLittleEndian
)
6095 ArgVal
= DAG
.getNode(ISD::BUILD_PAIR
, dl
, MVT::i64
, Lo
, Hi
);
6097 // The final element, if even, goes into the first half of a GPR.
6098 } else if (Flags
.isInConsecutiveRegsLast()) {
6099 ArgVal
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::i32
, Arg
);
6100 ArgVal
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i64
, ArgVal
);
6101 if (!isLittleEndian
)
6102 ArgVal
= DAG
.getNode(ISD::SHL
, dl
, MVT::i64
, ArgVal
,
6103 DAG
.getConstant(32, dl
, MVT::i32
));
6105 // Non-final even elements are skipped; they will be handled
6106 // together the with subsequent argument on the next go-around.
6110 if (ArgVal
.getNode())
6111 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], ArgVal
));
6113 if (CallConv
== CallingConv::Fast
)
6116 // Single-precision floating-point values are mapped to the
6117 // second (rightmost) word of the stack doubleword.
6118 if (Arg
.getValueType() == MVT::f32
&&
6119 !isLittleEndian
&& !Flags
.isInConsecutiveRegs()) {
6120 SDValue ConstFour
= DAG
.getConstant(4, dl
, PtrOff
.getValueType());
6121 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
6124 assert(HasParameterArea
&&
6125 "Parameter area must exist to pass an argument in memory.");
6126 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6127 true, isTailCall
, false, MemOpChains
,
6128 TailCallArguments
, dl
);
6132 // When passing an array of floats, the array occupies consecutive
6133 // space in the argument area; only round up to the next doubleword
6134 // at the end of the array. Otherwise, each float takes 8 bytes.
6135 if (CallConv
!= CallingConv::Fast
|| NeededLoad
) {
6136 ArgOffset
+= (Arg
.getValueType() == MVT::f32
&&
6137 Flags
.isInConsecutiveRegs()) ? 4 : 8;
6138 if (Flags
.isInConsecutiveRegsLast())
6139 ArgOffset
= ((ArgOffset
+ PtrByteSize
- 1)/PtrByteSize
) * PtrByteSize
;
6151 if (!Subtarget
.hasQPX()) {
6152 // These can be scalar arguments or elements of a vector array type
6153 // passed directly. The latter are used to implement ELFv2 homogenous
6154 // vector aggregates.
6156 // For a varargs call, named arguments go into VRs or on the stack as
6157 // usual; unnamed arguments always go to the stack or the corresponding
6158 // GPRs when within range. For now, we always put the value in both
6159 // locations (or even all three).
6161 assert(HasParameterArea
&&
6162 "Parameter area must exist if we have a varargs call.");
6163 // We could elide this store in the case where the object fits
6164 // entirely in R registers. Maybe later.
6166 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6167 MemOpChains
.push_back(Store
);
6168 if (VR_idx
!= NumVRs
) {
6170 DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, MachinePointerInfo());
6171 MemOpChains
.push_back(Load
.getValue(1));
6172 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
6175 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
6176 if (GPR_idx
== NumGPRs
)
6178 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6179 DAG
.getConstant(i
, dl
, PtrVT
));
6181 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6182 MemOpChains
.push_back(Load
.getValue(1));
6183 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6188 // Non-varargs Altivec params go into VRs or on the stack.
6189 if (VR_idx
!= NumVRs
) {
6190 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
6192 if (CallConv
== CallingConv::Fast
)
6195 assert(HasParameterArea
&&
6196 "Parameter area must exist to pass an argument in memory.");
6197 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6198 true, isTailCall
, true, MemOpChains
,
6199 TailCallArguments
, dl
);
6200 if (CallConv
== CallingConv::Fast
)
6204 if (CallConv
!= CallingConv::Fast
)
6209 assert(Arg
.getValueType().getSimpleVT().SimpleTy
== MVT::v4f32
&&
6210 "Invalid QPX parameter type");
6215 bool IsF32
= Arg
.getValueType().getSimpleVT().SimpleTy
== MVT::v4f32
;
6217 assert(HasParameterArea
&&
6218 "Parameter area must exist if we have a varargs call.");
6219 // We could elide this store in the case where the object fits
6220 // entirely in R registers. Maybe later.
6222 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6223 MemOpChains
.push_back(Store
);
6224 if (QFPR_idx
!= NumQFPRs
) {
6225 SDValue Load
= DAG
.getLoad(IsF32
? MVT::v4f32
: MVT::v4f64
, dl
, Store
,
6226 PtrOff
, MachinePointerInfo());
6227 MemOpChains
.push_back(Load
.getValue(1));
6228 RegsToPass
.push_back(std::make_pair(QFPR
[QFPR_idx
++], Load
));
6230 ArgOffset
+= (IsF32
? 16 : 32);
6231 for (unsigned i
= 0; i
< (IsF32
? 16U : 32U); i
+= PtrByteSize
) {
6232 if (GPR_idx
== NumGPRs
)
6234 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6235 DAG
.getConstant(i
, dl
, PtrVT
));
6237 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6238 MemOpChains
.push_back(Load
.getValue(1));
6239 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6244 // Non-varargs QPX params go into registers or on the stack.
6245 if (QFPR_idx
!= NumQFPRs
) {
6246 RegsToPass
.push_back(std::make_pair(QFPR
[QFPR_idx
++], Arg
));
6248 if (CallConv
== CallingConv::Fast
)
6251 assert(HasParameterArea
&&
6252 "Parameter area must exist to pass an argument in memory.");
6253 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6254 true, isTailCall
, true, MemOpChains
,
6255 TailCallArguments
, dl
);
6256 if (CallConv
== CallingConv::Fast
)
6257 ArgOffset
+= (IsF32
? 16 : 32);
6260 if (CallConv
!= CallingConv::Fast
)
6261 ArgOffset
+= (IsF32
? 16 : 32);
6267 assert((!HasParameterArea
|| NumBytesActuallyUsed
== ArgOffset
) &&
6268 "mismatch in size of parameter area");
6269 (void)NumBytesActuallyUsed
;
6271 if (!MemOpChains
.empty())
6272 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6274 // Check if this is an indirect call (MTCTR/BCTRL).
6275 // See PrepareCall() for more information about calls through function
6276 // pointers in the 64-bit SVR4 ABI.
6277 if (!isTailCall
&& !isPatchPoint
&&
6278 !isFunctionGlobalAddress(Callee
) &&
6279 !isa
<ExternalSymbolSDNode
>(Callee
)) {
6280 // Load r2 into a virtual register and store it to the TOC save area.
6281 setUsesTOCBasePtr(DAG
);
6282 SDValue Val
= DAG
.getCopyFromReg(Chain
, dl
, PPC::X2
, MVT::i64
);
6283 // TOC save area offset.
6284 unsigned TOCSaveOffset
= Subtarget
.getFrameLowering()->getTOCSaveOffset();
6285 SDValue PtrOff
= DAG
.getIntPtrConstant(TOCSaveOffset
, dl
);
6286 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6287 Chain
= DAG
.getStore(
6288 Val
.getValue(1), dl
, Val
, AddPtr
,
6289 MachinePointerInfo::getStack(DAG
.getMachineFunction(), TOCSaveOffset
));
6290 // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6291 // This does not mean the MTCTR instruction must use R12; it's easier
6292 // to model this as an extra parameter, so do that.
6293 if (isELFv2ABI
&& !isPatchPoint
)
6294 RegsToPass
.push_back(std::make_pair((unsigned)PPC::X12
, Callee
));
6297 // Build a sequence of copy-to-reg nodes chained together with token chain
6298 // and flag operands which copy the outgoing args into the appropriate regs.
6300 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6301 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6302 RegsToPass
[i
].second
, InFlag
);
6303 InFlag
= Chain
.getValue(1);
6306 if (isTailCall
&& !IsSibCall
)
6307 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6310 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
, hasNest
,
6311 DAG
, RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
,
6312 SPDiff
, NumBytes
, Ins
, InVals
, CS
);
6315 SDValue
PPCTargetLowering::LowerCall_Darwin(
6316 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
6317 bool isTailCall
, bool isPatchPoint
,
6318 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6319 const SmallVectorImpl
<SDValue
> &OutVals
,
6320 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
6321 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
6322 ImmutableCallSite CS
) const {
6323 unsigned NumOps
= Outs
.size();
6325 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6326 bool isPPC64
= PtrVT
== MVT::i64
;
6327 unsigned PtrByteSize
= isPPC64
? 8 : 4;
6329 MachineFunction
&MF
= DAG
.getMachineFunction();
6331 // Mark this function as potentially containing a function that contains a
6332 // tail call. As a consequence the frame pointer will be used for dynamicalloc
6333 // and restoring the callers stack pointer in this functions epilog. This is
6334 // done because by tail calling the called function might overwrite the value
6335 // in this function's (MF) stack pointer stack slot 0(SP).
6336 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
6337 CallConv
== CallingConv::Fast
)
6338 MF
.getInfo
<PPCFunctionInfo
>()->setHasFastCall();
6340 // Count how many bytes are to be pushed on the stack, including the linkage
6341 // area, and parameter passing area. We start with 24/48 bytes, which is
6342 // prereserved space for [SP][CR][LR][3 x unused].
6343 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
6344 unsigned NumBytes
= LinkageSize
;
6346 // Add up all the space actually used.
6347 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually
6348 // they all go in registers, but we must reserve stack space for them for
6349 // possible use by the caller. In varargs or 64-bit calls, parameters are
6350 // assigned stack space in order, with padding so Altivec parameters are
6352 unsigned nAltivecParamsAtEnd
= 0;
6353 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6354 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6355 EVT ArgVT
= Outs
[i
].VT
;
6356 // Varargs Altivec parameters are padded to a 16 byte boundary.
6357 if (ArgVT
== MVT::v4f32
|| ArgVT
== MVT::v4i32
||
6358 ArgVT
== MVT::v8i16
|| ArgVT
== MVT::v16i8
||
6359 ArgVT
== MVT::v2f64
|| ArgVT
== MVT::v2i64
) {
6360 if (!isVarArg
&& !isPPC64
) {
6361 // Non-varargs Altivec parameters go after all the non-Altivec
6362 // parameters; handle those later so we know how much padding we need.
6363 nAltivecParamsAtEnd
++;
6366 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary.
6367 NumBytes
= ((NumBytes
+15)/16)*16;
6369 NumBytes
+= CalculateStackSlotSize(ArgVT
, Flags
, PtrByteSize
);
6372 // Allow for Altivec parameters at the end, if needed.
6373 if (nAltivecParamsAtEnd
) {
6374 NumBytes
= ((NumBytes
+15)/16)*16;
6375 NumBytes
+= 16*nAltivecParamsAtEnd
;
6378 // The prolog code of the callee may store up to 8 GPR argument registers to
6379 // the stack, allowing va_start to index over them in memory if its varargs.
6380 // Because we cannot tell if this is needed on the caller side, we have to
6381 // conservatively assume that it is needed. As such, make sure we have at
6382 // least enough stack space for the caller to store the 8 GPRs.
6383 NumBytes
= std::max(NumBytes
, LinkageSize
+ 8 * PtrByteSize
);
6385 // Tail call needs the stack to be aligned.
6386 if (getTargetMachine().Options
.GuaranteedTailCallOpt
&&
6387 CallConv
== CallingConv::Fast
)
6388 NumBytes
= EnsureStackAlignment(Subtarget
.getFrameLowering(), NumBytes
);
6390 // Calculate by how many bytes the stack has to be adjusted in case of tail
6391 // call optimization.
6392 int SPDiff
= CalculateTailCallSPDiff(DAG
, isTailCall
, NumBytes
);
6394 // To protect arguments on the stack from being clobbered in a tail call,
6395 // force all the loads to happen before doing any other lowering.
6397 Chain
= DAG
.getStackArgumentTokenFactor(Chain
);
6399 // Adjust the stack pointer for the new arguments...
6400 // These operations are automatically eliminated by the prolog/epilog pass
6401 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
6402 SDValue CallSeqStart
= Chain
;
6404 // Load the return address and frame pointer so it can be move somewhere else
6407 Chain
= EmitTailCallLoadFPAndRetAddr(DAG
, SPDiff
, Chain
, LROp
, FPOp
, dl
);
6409 // Set up a copy of the stack pointer for use loading and storing any
6410 // arguments that may not fit in the registers available for argument
6414 StackPtr
= DAG
.getRegister(PPC::X1
, MVT::i64
);
6416 StackPtr
= DAG
.getRegister(PPC::R1
, MVT::i32
);
6418 // Figure out which arguments are going to go in registers, and which in
6419 // memory. Also, if this is a vararg function, floating point operations
6420 // must be stored to our stack, and loaded into integer regs as well, if
6421 // any integer regs are available for argument passing.
6422 unsigned ArgOffset
= LinkageSize
;
6423 unsigned GPR_idx
= 0, FPR_idx
= 0, VR_idx
= 0;
6425 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
6426 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
6427 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
,
6429 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
6430 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
6431 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
,
6433 static const MCPhysReg VR
[] = {
6434 PPC::V2
, PPC::V3
, PPC::V4
, PPC::V5
, PPC::V6
, PPC::V7
, PPC::V8
,
6435 PPC::V9
, PPC::V10
, PPC::V11
, PPC::V12
, PPC::V13
6437 const unsigned NumGPRs
= array_lengthof(GPR_32
);
6438 const unsigned NumFPRs
= 13;
6439 const unsigned NumVRs
= array_lengthof(VR
);
6441 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
6443 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
6444 SmallVector
<TailCallArgumentInfo
, 8> TailCallArguments
;
6446 SmallVector
<SDValue
, 8> MemOpChains
;
6447 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6448 SDValue Arg
= OutVals
[i
];
6449 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6451 // PtrOff will be used to store the current argument to the stack if a
6452 // register cannot be found for it.
6455 PtrOff
= DAG
.getConstant(ArgOffset
, dl
, StackPtr
.getValueType());
6457 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
, PtrOff
);
6459 // On PPC64, promote integers to 64-bit values.
6460 if (isPPC64
&& Arg
.getValueType() == MVT::i32
) {
6461 // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6462 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
6463 Arg
= DAG
.getNode(ExtOp
, dl
, MVT::i64
, Arg
);
6466 // FIXME memcpy is used way more than necessary. Correctness first.
6467 // Note: "by value" is code for passing a structure by value, not
6469 if (Flags
.isByVal()) {
6470 unsigned Size
= Flags
.getByValSize();
6471 // Very small objects are passed right-justified. Everything else is
6472 // passed left-justified.
6473 if (Size
==1 || Size
==2) {
6474 EVT VT
= (Size
==1) ? MVT::i8
: MVT::i16
;
6475 if (GPR_idx
!= NumGPRs
) {
6476 SDValue Load
= DAG
.getExtLoad(ISD::EXTLOAD
, dl
, PtrVT
, Chain
, Arg
,
6477 MachinePointerInfo(), VT
);
6478 MemOpChains
.push_back(Load
.getValue(1));
6479 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6481 ArgOffset
+= PtrByteSize
;
6483 SDValue Const
= DAG
.getConstant(PtrByteSize
- Size
, dl
,
6484 PtrOff
.getValueType());
6485 SDValue AddPtr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, Const
);
6486 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, AddPtr
,
6489 ArgOffset
+= PtrByteSize
;
6493 // Copy entire object into memory. There are cases where gcc-generated
6494 // code assumes it is there, even if it could be put entirely into
6495 // registers. (This is not what the doc says.)
6496 Chain
= CallSeqStart
= createMemcpyOutsideCallSeq(Arg
, PtrOff
,
6500 // For small aggregates (Darwin only) and aggregates >= PtrByteSize,
6501 // copy the pieces of the object that fit into registers from the
6502 // parameter save area.
6503 for (unsigned j
=0; j
<Size
; j
+=PtrByteSize
) {
6504 SDValue Const
= DAG
.getConstant(j
, dl
, PtrOff
.getValueType());
6505 SDValue AddArg
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, Arg
, Const
);
6506 if (GPR_idx
!= NumGPRs
) {
6508 DAG
.getLoad(PtrVT
, dl
, Chain
, AddArg
, MachinePointerInfo());
6509 MemOpChains
.push_back(Load
.getValue(1));
6510 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6511 ArgOffset
+= PtrByteSize
;
6513 ArgOffset
+= ((Size
- j
+ PtrByteSize
-1)/PtrByteSize
)*PtrByteSize
;
6520 switch (Arg
.getSimpleValueType().SimpleTy
) {
6521 default: llvm_unreachable("Unexpected ValueType for argument!");
6525 if (GPR_idx
!= NumGPRs
) {
6526 if (Arg
.getValueType() == MVT::i1
)
6527 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, PtrVT
, Arg
);
6529 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6531 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6532 isPPC64
, isTailCall
, false, MemOpChains
,
6533 TailCallArguments
, dl
);
6535 ArgOffset
+= PtrByteSize
;
6539 if (FPR_idx
!= NumFPRs
) {
6540 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6544 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6545 MemOpChains
.push_back(Store
);
6547 // Float varargs are always shadowed in available integer registers
6548 if (GPR_idx
!= NumGPRs
) {
6550 DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, MachinePointerInfo());
6551 MemOpChains
.push_back(Load
.getValue(1));
6552 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6554 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&& !isPPC64
){
6555 SDValue ConstFour
= DAG
.getConstant(4, dl
, PtrOff
.getValueType());
6556 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
, ConstFour
);
6558 DAG
.getLoad(PtrVT
, dl
, Store
, PtrOff
, MachinePointerInfo());
6559 MemOpChains
.push_back(Load
.getValue(1));
6560 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6563 // If we have any FPRs remaining, we may also have GPRs remaining.
6564 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available
6566 if (GPR_idx
!= NumGPRs
)
6568 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&&
6569 !isPPC64
) // PPC64 has 64-bit GPR's obviously :)
6573 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6574 isPPC64
, isTailCall
, false, MemOpChains
,
6575 TailCallArguments
, dl
);
6579 ArgOffset
+= Arg
.getValueType() == MVT::f32
? 4 : 8;
6586 // These go aligned on the stack, or in the corresponding R registers
6587 // when within range. The Darwin PPC ABI doc claims they also go in
6588 // V registers; in fact gcc does this only for arguments that are
6589 // prototyped, not for those that match the ... We do it for all
6590 // arguments, seems to work.
6591 while (ArgOffset
% 16 !=0) {
6592 ArgOffset
+= PtrByteSize
;
6593 if (GPR_idx
!= NumGPRs
)
6596 // We could elide this store in the case where the object fits
6597 // entirely in R registers. Maybe later.
6598 PtrOff
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackPtr
,
6599 DAG
.getConstant(ArgOffset
, dl
, PtrVT
));
6601 DAG
.getStore(Chain
, dl
, Arg
, PtrOff
, MachinePointerInfo());
6602 MemOpChains
.push_back(Store
);
6603 if (VR_idx
!= NumVRs
) {
6605 DAG
.getLoad(MVT::v4f32
, dl
, Store
, PtrOff
, MachinePointerInfo());
6606 MemOpChains
.push_back(Load
.getValue(1));
6607 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Load
));
6610 for (unsigned i
=0; i
<16; i
+=PtrByteSize
) {
6611 if (GPR_idx
== NumGPRs
)
6613 SDValue Ix
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, PtrOff
,
6614 DAG
.getConstant(i
, dl
, PtrVT
));
6616 DAG
.getLoad(PtrVT
, dl
, Store
, Ix
, MachinePointerInfo());
6617 MemOpChains
.push_back(Load
.getValue(1));
6618 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Load
));
6623 // Non-varargs Altivec params generally go in registers, but have
6624 // stack space allocated at the end.
6625 if (VR_idx
!= NumVRs
) {
6626 // Doesn't have GPR space allocated.
6627 RegsToPass
.push_back(std::make_pair(VR
[VR_idx
++], Arg
));
6628 } else if (nAltivecParamsAtEnd
==0) {
6629 // We are emitting Altivec params in order.
6630 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6631 isPPC64
, isTailCall
, true, MemOpChains
,
6632 TailCallArguments
, dl
);
6638 // If all Altivec parameters fit in registers, as they usually do,
6639 // they get stack space following the non-Altivec parameters. We
6640 // don't track this here because nobody below needs it.
6641 // If there are more Altivec parameters than fit in registers emit
6643 if (!isVarArg
&& nAltivecParamsAtEnd
> NumVRs
) {
6645 // Offset is aligned; skip 1st 12 params which go in V registers.
6646 ArgOffset
= ((ArgOffset
+15)/16)*16;
6648 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6649 SDValue Arg
= OutVals
[i
];
6650 EVT ArgType
= Outs
[i
].VT
;
6651 if (ArgType
==MVT::v4f32
|| ArgType
==MVT::v4i32
||
6652 ArgType
==MVT::v8i16
|| ArgType
==MVT::v16i8
) {
6655 // We are emitting Altivec params in order.
6656 LowerMemOpCallTo(DAG
, MF
, Chain
, Arg
, PtrOff
, SPDiff
, ArgOffset
,
6657 isPPC64
, isTailCall
, true, MemOpChains
,
6658 TailCallArguments
, dl
);
6665 if (!MemOpChains
.empty())
6666 Chain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, MemOpChains
);
6668 // On Darwin, R12 must contain the address of an indirect callee. This does
6669 // not mean the MTCTR instruction must use R12; it's easier to model this as
6670 // an extra parameter, so do that.
6672 !isFunctionGlobalAddress(Callee
) &&
6673 !isa
<ExternalSymbolSDNode
>(Callee
) &&
6674 !isBLACompatibleAddress(Callee
, DAG
))
6675 RegsToPass
.push_back(std::make_pair((unsigned)(isPPC64
? PPC::X12
:
6676 PPC::R12
), Callee
));
6678 // Build a sequence of copy-to-reg nodes chained together with token chain
6679 // and flag operands which copy the outgoing args into the appropriate regs.
6681 for (unsigned i
= 0, e
= RegsToPass
.size(); i
!= e
; ++i
) {
6682 Chain
= DAG
.getCopyToReg(Chain
, dl
, RegsToPass
[i
].first
,
6683 RegsToPass
[i
].second
, InFlag
);
6684 InFlag
= Chain
.getValue(1);
6688 PrepareTailCall(DAG
, InFlag
, Chain
, dl
, SPDiff
, NumBytes
, LROp
, FPOp
,
6691 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
6692 /* unused except on PPC64 ELFv1 */ false, DAG
,
6693 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
6694 NumBytes
, Ins
, InVals
, CS
);
6698 SDValue
PPCTargetLowering::LowerCall_AIX(
6699 SDValue Chain
, SDValue Callee
, CallingConv::ID CallConv
, bool isVarArg
,
6700 bool isTailCall
, bool isPatchPoint
,
6701 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6702 const SmallVectorImpl
<SDValue
> &OutVals
,
6703 const SmallVectorImpl
<ISD::InputArg
> &Ins
, const SDLoc
&dl
,
6704 SelectionDAG
&DAG
, SmallVectorImpl
<SDValue
> &InVals
,
6705 ImmutableCallSite CS
) const {
6707 assert((CallConv
== CallingConv::C
|| CallConv
== CallingConv::Fast
) &&
6708 "Unimplemented calling convention!");
6709 if (isVarArg
|| isPatchPoint
)
6710 report_fatal_error("This call type is unimplemented on AIX.");
6712 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6713 bool isPPC64
= PtrVT
== MVT::i64
;
6714 unsigned PtrByteSize
= isPPC64
? 8 : 4;
6715 unsigned NumOps
= Outs
.size();
6718 // Count how many bytes are to be pushed on the stack, including the linkage
6719 // area, parameter list area.
6720 // On XCOFF, we start with 24/48, which is reserved space for
6721 // [SP][CR][LR][2 x reserved][TOC].
6722 unsigned LinkageSize
= Subtarget
.getFrameLowering()->getLinkageSize();
6724 // The prolog code of the callee may store up to 8 GPR argument registers to
6725 // the stack, allowing va_start to index over them in memory if the callee
6727 // Because we cannot tell if this is needed on the caller side, we have to
6728 // conservatively assume that it is needed. As such, make sure we have at
6729 // least enough stack space for the caller to store the 8 GPRs.
6730 unsigned NumBytes
= LinkageSize
+ 8 * PtrByteSize
;
6732 // Adjust the stack pointer for the new arguments...
6733 // These operations are automatically eliminated by the prolog/epilog
6735 Chain
= DAG
.getCALLSEQ_START(Chain
, NumBytes
, 0, dl
);
6736 SDValue CallSeqStart
= Chain
;
6738 static const MCPhysReg GPR_32
[] = { // 32-bit registers.
6739 PPC::R3
, PPC::R4
, PPC::R5
, PPC::R6
,
6740 PPC::R7
, PPC::R8
, PPC::R9
, PPC::R10
6742 static const MCPhysReg GPR_64
[] = { // 64-bit registers.
6743 PPC::X3
, PPC::X4
, PPC::X5
, PPC::X6
,
6744 PPC::X7
, PPC::X8
, PPC::X9
, PPC::X10
6747 const unsigned NumGPRs
= isPPC64
? array_lengthof(GPR_64
)
6748 : array_lengthof(GPR_32
);
6749 const unsigned NumFPRs
= array_lengthof(FPR
);
6750 assert(NumFPRs
== 13 && "Only FPR 1-13 could be used for parameter passing "
6753 const MCPhysReg
*GPR
= isPPC64
? GPR_64
: GPR_32
;
6754 unsigned GPR_idx
= 0, FPR_idx
= 0;
6756 SmallVector
<std::pair
<unsigned, SDValue
>, 8> RegsToPass
;
6759 report_fatal_error("Handling of tail call is unimplemented!");
6762 for (unsigned i
= 0; i
!= NumOps
; ++i
) {
6763 SDValue Arg
= OutVals
[i
];
6764 ISD::ArgFlagsTy Flags
= Outs
[i
].Flags
;
6766 // Promote integers if needed.
6767 if (Arg
.getValueType() == MVT::i1
||
6768 (isPPC64
&& Arg
.getValueType() == MVT::i32
)) {
6769 unsigned ExtOp
= Flags
.isSExt() ? ISD::SIGN_EXTEND
: ISD::ZERO_EXTEND
;
6770 Arg
= DAG
.getNode(ExtOp
, dl
, PtrVT
, Arg
);
6773 // Note: "by value" is code for passing a structure by value, not
6775 if (Flags
.isByVal())
6776 report_fatal_error("Passing structure by value is unimplemented!");
6778 switch (Arg
.getSimpleValueType().SimpleTy
) {
6779 default: llvm_unreachable("Unexpected ValueType for argument!");
6783 if (GPR_idx
!= NumGPRs
)
6784 RegsToPass
.push_back(std::make_pair(GPR
[GPR_idx
++], Arg
));
6786 report_fatal_error("Handling of placing parameters on the stack is "
6791 if (FPR_idx
!= NumFPRs
) {
6792 RegsToPass
.push_back(std::make_pair(FPR
[FPR_idx
++], Arg
));
6794 // If we have any FPRs remaining, we may also have GPRs remaining.
6795 // Args passed in FPRs consume 1 or 2 (f64 in 32 bit mode) available
6797 if (GPR_idx
!= NumGPRs
)
6799 if (GPR_idx
!= NumGPRs
&& Arg
.getValueType() == MVT::f64
&& !isPPC64
)
6802 report_fatal_error("Handling of placing parameters on the stack is "
6815 report_fatal_error("Handling of this parameter type is unimplemented!");
6819 if (!isFunctionGlobalAddress(Callee
) &&
6820 !isa
<ExternalSymbolSDNode
>(Callee
))
6821 report_fatal_error("Handling of indirect call is unimplemented!");
6823 // Build a sequence of copy-to-reg nodes chained together with token chain
6824 // and flag operands which copy the outgoing args into the appropriate regs.
6826 for (auto Reg
: RegsToPass
) {
6827 Chain
= DAG
.getCopyToReg(Chain
, dl
, Reg
.first
, Reg
.second
, InFlag
);
6828 InFlag
= Chain
.getValue(1);
6831 return FinishCall(CallConv
, dl
, isTailCall
, isVarArg
, isPatchPoint
,
6832 /* unused except on PPC64 ELFv1 */ false, DAG
,
6833 RegsToPass
, InFlag
, Chain
, CallSeqStart
, Callee
, SPDiff
,
6834 NumBytes
, Ins
, InVals
, CS
);
6838 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv
,
6839 MachineFunction
&MF
, bool isVarArg
,
6840 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6841 LLVMContext
&Context
) const {
6842 SmallVector
<CCValAssign
, 16> RVLocs
;
6843 CCState
CCInfo(CallConv
, isVarArg
, MF
, RVLocs
, Context
);
6844 return CCInfo
.CheckReturn(
6845 Outs
, (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
6851 PPCTargetLowering::LowerReturn(SDValue Chain
, CallingConv::ID CallConv
,
6853 const SmallVectorImpl
<ISD::OutputArg
> &Outs
,
6854 const SmallVectorImpl
<SDValue
> &OutVals
,
6855 const SDLoc
&dl
, SelectionDAG
&DAG
) const {
6856 SmallVector
<CCValAssign
, 16> RVLocs
;
6857 CCState
CCInfo(CallConv
, isVarArg
, DAG
.getMachineFunction(), RVLocs
,
6859 CCInfo
.AnalyzeReturn(Outs
,
6860 (Subtarget
.isSVR4ABI() && CallConv
== CallingConv::Cold
)
6865 SmallVector
<SDValue
, 4> RetOps(1, Chain
);
6867 // Copy the result values into the output registers.
6868 for (unsigned i
= 0, RealResIdx
= 0; i
!= RVLocs
.size(); ++i
, ++RealResIdx
) {
6869 CCValAssign
&VA
= RVLocs
[i
];
6870 assert(VA
.isRegLoc() && "Can only return in registers!");
6872 SDValue Arg
= OutVals
[RealResIdx
];
6874 switch (VA
.getLocInfo()) {
6875 default: llvm_unreachable("Unknown loc info!");
6876 case CCValAssign::Full
: break;
6877 case CCValAssign::AExt
:
6878 Arg
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6880 case CCValAssign::ZExt
:
6881 Arg
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6883 case CCValAssign::SExt
:
6884 Arg
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, VA
.getLocVT(), Arg
);
6887 if (Subtarget
.hasSPE() && VA
.getLocVT() == MVT::f64
) {
6888 bool isLittleEndian
= Subtarget
.isLittleEndian();
6889 // Legalize ret f64 -> ret 2 x i32.
6891 DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
6892 DAG
.getIntPtrConstant(isLittleEndian
? 0 : 1, dl
));
6893 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), SVal
, Flag
);
6894 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
6895 SVal
= DAG
.getNode(PPCISD::EXTRACT_SPE
, dl
, MVT::i32
, Arg
,
6896 DAG
.getIntPtrConstant(isLittleEndian
? 1 : 0, dl
));
6897 Flag
= Chain
.getValue(1);
6898 VA
= RVLocs
[++i
]; // skip ahead to next loc
6899 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), SVal
, Flag
);
6901 Chain
= DAG
.getCopyToReg(Chain
, dl
, VA
.getLocReg(), Arg
, Flag
);
6902 Flag
= Chain
.getValue(1);
6903 RetOps
.push_back(DAG
.getRegister(VA
.getLocReg(), VA
.getLocVT()));
6906 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
6907 const MCPhysReg
*I
=
6908 TRI
->getCalleeSavedRegsViaCopy(&DAG
.getMachineFunction());
6912 if (PPC::G8RCRegClass
.contains(*I
))
6913 RetOps
.push_back(DAG
.getRegister(*I
, MVT::i64
));
6914 else if (PPC::F8RCRegClass
.contains(*I
))
6915 RetOps
.push_back(DAG
.getRegister(*I
, MVT::getFloatingPointVT(64)));
6916 else if (PPC::CRRCRegClass
.contains(*I
))
6917 RetOps
.push_back(DAG
.getRegister(*I
, MVT::i1
));
6918 else if (PPC::VRRCRegClass
.contains(*I
))
6919 RetOps
.push_back(DAG
.getRegister(*I
, MVT::Other
));
6921 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
6925 RetOps
[0] = Chain
; // Update chain.
6927 // Add the flag if we have it.
6929 RetOps
.push_back(Flag
);
6931 return DAG
.getNode(PPCISD::RET_FLAG
, dl
, MVT::Other
, RetOps
);
6935 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op
,
6936 SelectionDAG
&DAG
) const {
6939 // Get the correct type for integers.
6940 EVT IntVT
= Op
.getValueType();
6943 SDValue Chain
= Op
.getOperand(0);
6944 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
6945 // Build a DYNAREAOFFSET node.
6946 SDValue Ops
[2] = {Chain
, FPSIdx
};
6947 SDVTList VTs
= DAG
.getVTList(IntVT
);
6948 return DAG
.getNode(PPCISD::DYNAREAOFFSET
, dl
, VTs
, Ops
);
6951 SDValue
PPCTargetLowering::LowerSTACKRESTORE(SDValue Op
,
6952 SelectionDAG
&DAG
) const {
6953 // When we pop the dynamic allocation we need to restore the SP link.
6956 // Get the correct type for pointers.
6957 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
6959 // Construct the stack pointer operand.
6960 bool isPPC64
= Subtarget
.isPPC64();
6961 unsigned SP
= isPPC64
? PPC::X1
: PPC::R1
;
6962 SDValue StackPtr
= DAG
.getRegister(SP
, PtrVT
);
6964 // Get the operands for the STACKRESTORE.
6965 SDValue Chain
= Op
.getOperand(0);
6966 SDValue SaveSP
= Op
.getOperand(1);
6968 // Load the old link SP.
6969 SDValue LoadLinkSP
=
6970 DAG
.getLoad(PtrVT
, dl
, Chain
, StackPtr
, MachinePointerInfo());
6972 // Restore the stack pointer.
6973 Chain
= DAG
.getCopyToReg(LoadLinkSP
.getValue(1), dl
, SP
, SaveSP
);
6975 // Store the old link SP.
6976 return DAG
.getStore(Chain
, dl
, LoadLinkSP
, StackPtr
, MachinePointerInfo());
6979 SDValue
PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG
&DAG
) const {
6980 MachineFunction
&MF
= DAG
.getMachineFunction();
6981 bool isPPC64
= Subtarget
.isPPC64();
6982 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
6984 // Get current frame pointer save index. The users of this index will be
6985 // primarily DYNALLOC instructions.
6986 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
6987 int RASI
= FI
->getReturnAddrSaveIndex();
6989 // If the frame pointer save index hasn't been defined yet.
6991 // Find out what the fix offset of the frame pointer save area.
6992 int LROffset
= Subtarget
.getFrameLowering()->getReturnSaveOffset();
6993 // Allocate the frame index for frame pointer save area.
6994 RASI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, LROffset
, false);
6996 FI
->setReturnAddrSaveIndex(RASI
);
6998 return DAG
.getFrameIndex(RASI
, PtrVT
);
7002 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG
& DAG
) const {
7003 MachineFunction
&MF
= DAG
.getMachineFunction();
7004 bool isPPC64
= Subtarget
.isPPC64();
7005 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7007 // Get current frame pointer save index. The users of this index will be
7008 // primarily DYNALLOC instructions.
7009 PPCFunctionInfo
*FI
= MF
.getInfo
<PPCFunctionInfo
>();
7010 int FPSI
= FI
->getFramePointerSaveIndex();
7012 // If the frame pointer save index hasn't been defined yet.
7014 // Find out what the fix offset of the frame pointer save area.
7015 int FPOffset
= Subtarget
.getFrameLowering()->getFramePointerSaveOffset();
7016 // Allocate the frame index for frame pointer save area.
7017 FPSI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, FPOffset
, true);
7019 FI
->setFramePointerSaveIndex(FPSI
);
7021 return DAG
.getFrameIndex(FPSI
, PtrVT
);
7024 SDValue
PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op
,
7025 SelectionDAG
&DAG
) const {
7027 SDValue Chain
= Op
.getOperand(0);
7028 SDValue Size
= Op
.getOperand(1);
7031 // Get the correct type for pointers.
7032 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7034 SDValue NegSize
= DAG
.getNode(ISD::SUB
, dl
, PtrVT
,
7035 DAG
.getConstant(0, dl
, PtrVT
), Size
);
7036 // Construct a node for the frame pointer save index.
7037 SDValue FPSIdx
= getFramePointerFrameIndex(DAG
);
7038 // Build a DYNALLOC node.
7039 SDValue Ops
[3] = { Chain
, NegSize
, FPSIdx
};
7040 SDVTList VTs
= DAG
.getVTList(PtrVT
, MVT::Other
);
7041 return DAG
.getNode(PPCISD::DYNALLOC
, dl
, VTs
, Ops
);
7044 SDValue
PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op
,
7045 SelectionDAG
&DAG
) const {
7046 MachineFunction
&MF
= DAG
.getMachineFunction();
7048 bool isPPC64
= Subtarget
.isPPC64();
7049 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7051 int FI
= MF
.getFrameInfo().CreateFixedObject(isPPC64
? 8 : 4, 0, false);
7052 return DAG
.getFrameIndex(FI
, PtrVT
);
7055 SDValue
PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op
,
7056 SelectionDAG
&DAG
) const {
7058 return DAG
.getNode(PPCISD::EH_SJLJ_SETJMP
, DL
,
7059 DAG
.getVTList(MVT::i32
, MVT::Other
),
7060 Op
.getOperand(0), Op
.getOperand(1));
7063 SDValue
PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op
,
7064 SelectionDAG
&DAG
) const {
7066 return DAG
.getNode(PPCISD::EH_SJLJ_LONGJMP
, DL
, MVT::Other
,
7067 Op
.getOperand(0), Op
.getOperand(1));
7070 SDValue
PPCTargetLowering::LowerLOAD(SDValue Op
, SelectionDAG
&DAG
) const {
7071 if (Op
.getValueType().isVector())
7072 return LowerVectorLoad(Op
, DAG
);
7074 assert(Op
.getValueType() == MVT::i1
&&
7075 "Custom lowering only for i1 loads");
7077 // First, load 8 bits into 32 bits, then truncate to 1 bit.
7080 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op
);
7082 SDValue Chain
= LD
->getChain();
7083 SDValue BasePtr
= LD
->getBasePtr();
7084 MachineMemOperand
*MMO
= LD
->getMemOperand();
7087 DAG
.getExtLoad(ISD::EXTLOAD
, dl
, getPointerTy(DAG
.getDataLayout()), Chain
,
7088 BasePtr
, MVT::i8
, MMO
);
7089 SDValue Result
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewLD
);
7091 SDValue Ops
[] = { Result
, SDValue(NewLD
.getNode(), 1) };
7092 return DAG
.getMergeValues(Ops
, dl
);
7095 SDValue
PPCTargetLowering::LowerSTORE(SDValue Op
, SelectionDAG
&DAG
) const {
7096 if (Op
.getOperand(1).getValueType().isVector())
7097 return LowerVectorStore(Op
, DAG
);
7099 assert(Op
.getOperand(1).getValueType() == MVT::i1
&&
7100 "Custom lowering only for i1 stores");
7102 // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7105 StoreSDNode
*ST
= cast
<StoreSDNode
>(Op
);
7107 SDValue Chain
= ST
->getChain();
7108 SDValue BasePtr
= ST
->getBasePtr();
7109 SDValue Value
= ST
->getValue();
7110 MachineMemOperand
*MMO
= ST
->getMemOperand();
7112 Value
= DAG
.getNode(ISD::ZERO_EXTEND
, dl
, getPointerTy(DAG
.getDataLayout()),
7114 return DAG
.getTruncStore(Chain
, dl
, Value
, BasePtr
, MVT::i8
, MMO
);
7117 // FIXME: Remove this once the ANDI glue bug is fixed:
7118 SDValue
PPCTargetLowering::LowerTRUNCATE(SDValue Op
, SelectionDAG
&DAG
) const {
7119 assert(Op
.getValueType() == MVT::i1
&&
7120 "Custom lowering only for i1 results");
7123 return DAG
.getNode(PPCISD::ANDIo_1_GT_BIT
, DL
, MVT::i1
,
7127 SDValue
PPCTargetLowering::LowerTRUNCATEVector(SDValue Op
,
7128 SelectionDAG
&DAG
) const {
7130 // Implements a vector truncate that fits in a vector register as a shuffle.
7131 // We want to legalize vector truncates down to where the source fits in
7132 // a vector register (and target is therefore smaller than vector register
7133 // size). At that point legalization will try to custom lower the sub-legal
7134 // result and get here - where we can contain the truncate as a single target
7137 // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7138 // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7140 // We will implement it for big-endian ordering as this (where x denotes
7142 // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7143 // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7145 // The same operation in little-endian ordering will be:
7146 // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7147 // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7149 assert(Op
.getValueType().isVector() && "Vector type expected.");
7152 SDValue N1
= Op
.getOperand(0);
7153 unsigned SrcSize
= N1
.getValueType().getSizeInBits();
7154 assert(SrcSize
<= 128 && "Source must fit in an Altivec/VSX vector");
7155 SDValue WideSrc
= SrcSize
== 128 ? N1
: widenVec(DAG
, N1
, DL
);
7157 EVT TrgVT
= Op
.getValueType();
7158 unsigned TrgNumElts
= TrgVT
.getVectorNumElements();
7159 EVT EltVT
= TrgVT
.getVectorElementType();
7160 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
7161 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
7163 // First list the elements we want to keep.
7164 unsigned SizeMult
= SrcSize
/ TrgVT
.getSizeInBits();
7165 SmallVector
<int, 16> ShuffV
;
7166 if (Subtarget
.isLittleEndian())
7167 for (unsigned i
= 0; i
< TrgNumElts
; ++i
)
7168 ShuffV
.push_back(i
* SizeMult
);
7170 for (unsigned i
= 1; i
<= TrgNumElts
; ++i
)
7171 ShuffV
.push_back(i
* SizeMult
- 1);
7173 // Populate the remaining elements with undefs.
7174 for (unsigned i
= TrgNumElts
; i
< WideNumElts
; ++i
)
7175 // ShuffV.push_back(i + WideNumElts);
7176 ShuffV
.push_back(WideNumElts
+ 1);
7178 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, DL
, WideVT
, WideSrc
);
7179 return DAG
.getVectorShuffle(WideVT
, DL
, Conv
, DAG
.getUNDEF(WideVT
), ShuffV
);
7182 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7184 SDValue
PPCTargetLowering::LowerSELECT_CC(SDValue Op
, SelectionDAG
&DAG
) const {
7185 // Not FP? Not a fsel.
7186 if (!Op
.getOperand(0).getValueType().isFloatingPoint() ||
7187 !Op
.getOperand(2).getValueType().isFloatingPoint())
7190 // We might be able to do better than this under some circumstances, but in
7191 // general, fsel-based lowering of select is a finite-math-only optimization.
7192 // For more information, see section F.3 of the 2.06 ISA specification.
7193 if (!DAG
.getTarget().Options
.NoInfsFPMath
||
7194 !DAG
.getTarget().Options
.NoNaNsFPMath
)
7196 // TODO: Propagate flags from the select rather than global settings.
7198 Flags
.setNoInfs(true);
7199 Flags
.setNoNaNs(true);
7201 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Op
.getOperand(4))->get();
7203 EVT ResVT
= Op
.getValueType();
7204 EVT CmpVT
= Op
.getOperand(0).getValueType();
7205 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
7206 SDValue TV
= Op
.getOperand(2), FV
= Op
.getOperand(3);
7209 // If the RHS of the comparison is a 0.0, we don't need to do the
7210 // subtraction at all.
7212 if (isFloatingPointZero(RHS
))
7214 default: break; // SETUO etc aren't handled by fsel.
7219 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7220 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
7221 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
7222 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7223 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
7224 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
7225 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), Sel1
, FV
);
7228 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
7232 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7233 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
7234 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, LHS
, TV
, FV
);
7237 std::swap(TV
, FV
); // fsel is natively setge, swap operands for setlt
7241 if (LHS
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7242 LHS
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, LHS
);
7243 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
7244 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, LHS
), TV
, FV
);
7249 default: break; // SETUO etc aren't handled by fsel.
7254 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
7255 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7256 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7257 Sel1
= DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
7258 if (Sel1
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7259 Sel1
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Sel1
);
7260 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
,
7261 DAG
.getNode(ISD::FNEG
, dl
, MVT::f64
, Cmp
), Sel1
, FV
);
7264 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
7265 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7266 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7267 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
7270 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, LHS
, RHS
, Flags
);
7271 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7272 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7273 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
7276 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
7277 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7278 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7279 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, FV
, TV
);
7282 Cmp
= DAG
.getNode(ISD::FSUB
, dl
, CmpVT
, RHS
, LHS
, Flags
);
7283 if (Cmp
.getValueType() == MVT::f32
) // Comparison is always 64-bits
7284 Cmp
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Cmp
);
7285 return DAG
.getNode(PPCISD::FSEL
, dl
, ResVT
, Cmp
, TV
, FV
);
7290 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op
, ReuseLoadInfo
&RLI
,
7292 const SDLoc
&dl
) const {
7293 assert(Op
.getOperand(0).getValueType().isFloatingPoint());
7294 SDValue Src
= Op
.getOperand(0);
7295 if (Src
.getValueType() == MVT::f32
)
7296 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
7299 switch (Op
.getSimpleValueType().SimpleTy
) {
7300 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7303 Op
.getOpcode() == ISD::FP_TO_SINT
7305 : (Subtarget
.hasFPCVT() ? PPCISD::FCTIWUZ
: PPCISD::FCTIDZ
),
7309 assert((Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT()) &&
7310 "i64 FP_TO_UINT is supported only with FPCVT");
7311 Tmp
= DAG
.getNode(Op
.getOpcode()==ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
7317 // Convert the FP value to an int value through memory.
7318 bool i32Stack
= Op
.getValueType() == MVT::i32
&& Subtarget
.hasSTFIWX() &&
7319 (Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT());
7320 SDValue FIPtr
= DAG
.CreateStackTemporary(i32Stack
? MVT::i32
: MVT::f64
);
7321 int FI
= cast
<FrameIndexSDNode
>(FIPtr
)->getIndex();
7322 MachinePointerInfo MPI
=
7323 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FI
);
7325 // Emit a store to the stack slot.
7328 MachineFunction
&MF
= DAG
.getMachineFunction();
7329 MachineMemOperand
*MMO
=
7330 MF
.getMachineMemOperand(MPI
, MachineMemOperand::MOStore
, 4, 4);
7331 SDValue Ops
[] = { DAG
.getEntryNode(), Tmp
, FIPtr
};
7332 Chain
= DAG
.getMemIntrinsicNode(PPCISD::STFIWX
, dl
,
7333 DAG
.getVTList(MVT::Other
), Ops
, MVT::i32
, MMO
);
7335 Chain
= DAG
.getStore(DAG
.getEntryNode(), dl
, Tmp
, FIPtr
, MPI
);
7337 // Result is a load from the stack slot. If loading 4 bytes, make sure to
7338 // add in a bias on big endian.
7339 if (Op
.getValueType() == MVT::i32
&& !i32Stack
) {
7340 FIPtr
= DAG
.getNode(ISD::ADD
, dl
, FIPtr
.getValueType(), FIPtr
,
7341 DAG
.getConstant(4, dl
, FIPtr
.getValueType()));
7342 MPI
= MPI
.getWithOffset(Subtarget
.isLittleEndian() ? 0 : 4);
7350 /// Custom lowers floating point to integer conversions to use
7351 /// the direct move instructions available in ISA 2.07 to avoid the
7352 /// need for load/store combinations.
7353 SDValue
PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op
,
7355 const SDLoc
&dl
) const {
7356 assert(Op
.getOperand(0).getValueType().isFloatingPoint());
7357 SDValue Src
= Op
.getOperand(0);
7359 if (Src
.getValueType() == MVT::f32
)
7360 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
7363 switch (Op
.getSimpleValueType().SimpleTy
) {
7364 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
7367 Op
.getOpcode() == ISD::FP_TO_SINT
7369 : (Subtarget
.hasFPCVT() ? PPCISD::FCTIWUZ
: PPCISD::FCTIDZ
),
7371 Tmp
= DAG
.getNode(PPCISD::MFVSR
, dl
, MVT::i32
, Tmp
);
7374 assert((Op
.getOpcode() == ISD::FP_TO_SINT
|| Subtarget
.hasFPCVT()) &&
7375 "i64 FP_TO_UINT is supported only with FPCVT");
7376 Tmp
= DAG
.getNode(Op
.getOpcode()==ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
7379 Tmp
= DAG
.getNode(PPCISD::MFVSR
, dl
, MVT::i64
, Tmp
);
7385 SDValue
PPCTargetLowering::LowerFP_TO_INT(SDValue Op
, SelectionDAG
&DAG
,
7386 const SDLoc
&dl
) const {
7388 // FP to INT conversions are legal for f128.
7389 if (EnableQuadPrecision
&& (Op
->getOperand(0).getValueType() == MVT::f128
))
7392 // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
7393 // PPC (the libcall is not available).
7394 if (Op
.getOperand(0).getValueType() == MVT::ppcf128
) {
7395 if (Op
.getValueType() == MVT::i32
) {
7396 if (Op
.getOpcode() == ISD::FP_TO_SINT
) {
7397 SDValue Lo
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
7398 MVT::f64
, Op
.getOperand(0),
7399 DAG
.getIntPtrConstant(0, dl
));
7400 SDValue Hi
= DAG
.getNode(ISD::EXTRACT_ELEMENT
, dl
,
7401 MVT::f64
, Op
.getOperand(0),
7402 DAG
.getIntPtrConstant(1, dl
));
7404 // Add the two halves of the long double in round-to-zero mode.
7405 SDValue Res
= DAG
.getNode(PPCISD::FADDRTZ
, dl
, MVT::f64
, Lo
, Hi
);
7407 // Now use a smaller FP_TO_SINT.
7408 return DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, Res
);
7410 if (Op
.getOpcode() == ISD::FP_TO_UINT
) {
7411 const uint64_t TwoE31
[] = {0x41e0000000000000LL
, 0};
7412 APFloat APF
= APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31
));
7413 SDValue Tmp
= DAG
.getConstantFP(APF
, dl
, MVT::ppcf128
);
7414 // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
7415 // FIXME: generated code sucks.
7416 // TODO: Are there fast-math-flags to propagate to this FSUB?
7417 SDValue True
= DAG
.getNode(ISD::FSUB
, dl
, MVT::ppcf128
,
7418 Op
.getOperand(0), Tmp
);
7419 True
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
, True
);
7420 True
= DAG
.getNode(ISD::ADD
, dl
, MVT::i32
, True
,
7421 DAG
.getConstant(0x80000000, dl
, MVT::i32
));
7422 SDValue False
= DAG
.getNode(ISD::FP_TO_SINT
, dl
, MVT::i32
,
7424 return DAG
.getSelectCC(dl
, Op
.getOperand(0), Tmp
, True
, False
,
7432 if (Subtarget
.hasDirectMove() && Subtarget
.isPPC64())
7433 return LowerFP_TO_INTDirectMove(Op
, DAG
, dl
);
7436 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
7438 return DAG
.getLoad(Op
.getValueType(), dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
7439 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
7442 // We're trying to insert a regular store, S, and then a load, L. If the
7443 // incoming value, O, is a load, we might just be able to have our load use the
7444 // address used by O. However, we don't know if anything else will store to
7445 // that address before we can load from it. To prevent this situation, we need
7446 // to insert our load, L, into the chain as a peer of O. To do this, we give L
7447 // the same chain operand as O, we create a token factor from the chain results
7448 // of O and L, and we replace all uses of O's chain result with that token
7449 // factor (see spliceIntoChain below for this last part).
7450 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op
, EVT MemVT
,
7453 ISD::LoadExtType ET
) const {
7455 if (ET
== ISD::NON_EXTLOAD
&&
7456 (Op
.getOpcode() == ISD::FP_TO_UINT
||
7457 Op
.getOpcode() == ISD::FP_TO_SINT
) &&
7458 isOperationLegalOrCustom(Op
.getOpcode(),
7459 Op
.getOperand(0).getValueType())) {
7461 LowerFP_TO_INTForReuse(Op
, RLI
, DAG
, dl
);
7465 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
);
7466 if (!LD
|| LD
->getExtensionType() != ET
|| LD
->isVolatile() ||
7467 LD
->isNonTemporal())
7469 if (LD
->getMemoryVT() != MemVT
)
7472 RLI
.Ptr
= LD
->getBasePtr();
7473 if (LD
->isIndexed() && !LD
->getOffset().isUndef()) {
7474 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
7475 "Non-pre-inc AM on PPC?");
7476 RLI
.Ptr
= DAG
.getNode(ISD::ADD
, dl
, RLI
.Ptr
.getValueType(), RLI
.Ptr
,
7480 RLI
.Chain
= LD
->getChain();
7481 RLI
.MPI
= LD
->getPointerInfo();
7482 RLI
.IsDereferenceable
= LD
->isDereferenceable();
7483 RLI
.IsInvariant
= LD
->isInvariant();
7484 RLI
.Alignment
= LD
->getAlignment();
7485 RLI
.AAInfo
= LD
->getAAInfo();
7486 RLI
.Ranges
= LD
->getRanges();
7488 RLI
.ResChain
= SDValue(LD
, LD
->isIndexed() ? 2 : 1);
7492 // Given the head of the old chain, ResChain, insert a token factor containing
7493 // it and NewResChain, and make users of ResChain now be users of that token
7495 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
7496 void PPCTargetLowering::spliceIntoChain(SDValue ResChain
,
7497 SDValue NewResChain
,
7498 SelectionDAG
&DAG
) const {
7502 SDLoc
dl(NewResChain
);
7504 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
7505 NewResChain
, DAG
.getUNDEF(MVT::Other
));
7506 assert(TF
.getNode() != NewResChain
.getNode() &&
7507 "A new TF really is required here");
7509 DAG
.ReplaceAllUsesOfValueWith(ResChain
, TF
);
7510 DAG
.UpdateNodeOperands(TF
.getNode(), ResChain
, NewResChain
);
7513 /// Analyze profitability of direct move
7514 /// prefer float load to int load plus direct move
7515 /// when there is no integer use of int load
7516 bool PPCTargetLowering::directMoveIsProfitable(const SDValue
&Op
) const {
7517 SDNode
*Origin
= Op
.getOperand(0).getNode();
7518 if (Origin
->getOpcode() != ISD::LOAD
)
7521 // If there is no LXSIBZX/LXSIHZX, like Power8,
7522 // prefer direct move if the memory size is 1 or 2 bytes.
7523 MachineMemOperand
*MMO
= cast
<LoadSDNode
>(Origin
)->getMemOperand();
7524 if (!Subtarget
.hasP9Vector() && MMO
->getSize() <= 2)
7527 for (SDNode::use_iterator UI
= Origin
->use_begin(),
7528 UE
= Origin
->use_end();
7531 // Only look at the users of the loaded value.
7532 if (UI
.getUse().get().getResNo() != 0)
7535 if (UI
->getOpcode() != ISD::SINT_TO_FP
&&
7536 UI
->getOpcode() != ISD::UINT_TO_FP
)
7543 /// Custom lowers integer to floating point conversions to use
7544 /// the direct move instructions available in ISA 2.07 to avoid the
7545 /// need for load/store combinations.
7546 SDValue
PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op
,
7548 const SDLoc
&dl
) const {
7549 assert((Op
.getValueType() == MVT::f32
||
7550 Op
.getValueType() == MVT::f64
) &&
7551 "Invalid floating point type as target of conversion");
7552 assert(Subtarget
.hasFPCVT() &&
7553 "Int to FP conversions with direct moves require FPCVT");
7555 SDValue Src
= Op
.getOperand(0);
7556 bool SinglePrec
= Op
.getValueType() == MVT::f32
;
7557 bool WordInt
= Src
.getSimpleValueType().SimpleTy
== MVT::i32
;
7558 bool Signed
= Op
.getOpcode() == ISD::SINT_TO_FP
;
7559 unsigned ConvOp
= Signed
? (SinglePrec
? PPCISD::FCFIDS
: PPCISD::FCFID
) :
7560 (SinglePrec
? PPCISD::FCFIDUS
: PPCISD::FCFIDU
);
7563 FP
= DAG
.getNode(Signed
? PPCISD::MTVSRA
: PPCISD::MTVSRZ
,
7565 FP
= DAG
.getNode(ConvOp
, dl
, SinglePrec
? MVT::f32
: MVT::f64
, FP
);
7568 FP
= DAG
.getNode(PPCISD::MTVSRA
, dl
, MVT::f64
, Src
);
7569 FP
= DAG
.getNode(ConvOp
, dl
, SinglePrec
? MVT::f32
: MVT::f64
, FP
);
7575 static SDValue
widenVec(SelectionDAG
&DAG
, SDValue Vec
, const SDLoc
&dl
) {
7577 EVT VecVT
= Vec
.getValueType();
7578 assert(VecVT
.isVector() && "Expected a vector type.");
7579 assert(VecVT
.getSizeInBits() < 128 && "Vector is already full width.");
7581 EVT EltVT
= VecVT
.getVectorElementType();
7582 unsigned WideNumElts
= 128 / EltVT
.getSizeInBits();
7583 EVT WideVT
= EVT::getVectorVT(*DAG
.getContext(), EltVT
, WideNumElts
);
7585 unsigned NumConcat
= WideNumElts
/ VecVT
.getVectorNumElements();
7586 SmallVector
<SDValue
, 16> Ops(NumConcat
);
7588 SDValue UndefVec
= DAG
.getUNDEF(VecVT
);
7589 for (unsigned i
= 1; i
< NumConcat
; ++i
)
7592 return DAG
.getNode(ISD::CONCAT_VECTORS
, dl
, WideVT
, Ops
);
7595 SDValue
PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op
, SelectionDAG
&DAG
,
7596 const SDLoc
&dl
) const {
7598 unsigned Opc
= Op
.getOpcode();
7599 assert((Opc
== ISD::UINT_TO_FP
|| Opc
== ISD::SINT_TO_FP
) &&
7600 "Unexpected conversion type");
7601 assert((Op
.getValueType() == MVT::v2f64
|| Op
.getValueType() == MVT::v4f32
) &&
7602 "Supports conversions to v2f64/v4f32 only.");
7604 bool SignedConv
= Opc
== ISD::SINT_TO_FP
;
7605 bool FourEltRes
= Op
.getValueType() == MVT::v4f32
;
7607 SDValue Wide
= widenVec(DAG
, Op
.getOperand(0), dl
);
7608 EVT WideVT
= Wide
.getValueType();
7609 unsigned WideNumElts
= WideVT
.getVectorNumElements();
7610 MVT IntermediateVT
= FourEltRes
? MVT::v4i32
: MVT::v2i64
;
7612 SmallVector
<int, 16> ShuffV
;
7613 for (unsigned i
= 0; i
< WideNumElts
; ++i
)
7614 ShuffV
.push_back(i
+ WideNumElts
);
7616 int Stride
= FourEltRes
? WideNumElts
/ 4 : WideNumElts
/ 2;
7617 int SaveElts
= FourEltRes
? 4 : 2;
7618 if (Subtarget
.isLittleEndian())
7619 for (int i
= 0; i
< SaveElts
; i
++)
7620 ShuffV
[i
* Stride
] = i
;
7622 for (int i
= 1; i
<= SaveElts
; i
++)
7623 ShuffV
[i
* Stride
- 1] = i
- 1;
7625 SDValue ShuffleSrc2
=
7626 SignedConv
? DAG
.getUNDEF(WideVT
) : DAG
.getConstant(0, dl
, WideVT
);
7627 SDValue Arrange
= DAG
.getVectorShuffle(WideVT
, dl
, Wide
, ShuffleSrc2
, ShuffV
);
7629 SignedConv
? (unsigned)PPCISD::SExtVElems
: (unsigned)ISD::BITCAST
;
7632 if (!Subtarget
.hasP9Altivec() && SignedConv
) {
7633 Arrange
= DAG
.getBitcast(IntermediateVT
, Arrange
);
7634 Extend
= DAG
.getNode(ISD::SIGN_EXTEND_INREG
, dl
, IntermediateVT
, Arrange
,
7635 DAG
.getValueType(Op
.getOperand(0).getValueType()));
7637 Extend
= DAG
.getNode(ExtendOp
, dl
, IntermediateVT
, Arrange
);
7639 return DAG
.getNode(Opc
, dl
, Op
.getValueType(), Extend
);
7642 SDValue
PPCTargetLowering::LowerINT_TO_FP(SDValue Op
,
7643 SelectionDAG
&DAG
) const {
7646 EVT InVT
= Op
.getOperand(0).getValueType();
7647 EVT OutVT
= Op
.getValueType();
7648 if (OutVT
.isVector() && OutVT
.isFloatingPoint() &&
7649 isOperationCustom(Op
.getOpcode(), InVT
))
7650 return LowerINT_TO_FPVector(Op
, DAG
, dl
);
7652 // Conversions to f128 are legal.
7653 if (EnableQuadPrecision
&& (Op
.getValueType() == MVT::f128
))
7656 if (Subtarget
.hasQPX() && Op
.getOperand(0).getValueType() == MVT::v4i1
) {
7657 if (Op
.getValueType() != MVT::v4f32
&& Op
.getValueType() != MVT::v4f64
)
7660 SDValue Value
= Op
.getOperand(0);
7661 // The values are now known to be -1 (false) or 1 (true). To convert this
7662 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
7663 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
7664 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
7666 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
7668 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
7670 if (Op
.getValueType() != MVT::v4f64
)
7671 Value
= DAG
.getNode(ISD::FP_ROUND
, dl
,
7672 Op
.getValueType(), Value
,
7673 DAG
.getIntPtrConstant(1, dl
));
7677 // Don't handle ppc_fp128 here; let it be lowered to a libcall.
7678 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
7681 if (Op
.getOperand(0).getValueType() == MVT::i1
)
7682 return DAG
.getNode(ISD::SELECT
, dl
, Op
.getValueType(), Op
.getOperand(0),
7683 DAG
.getConstantFP(1.0, dl
, Op
.getValueType()),
7684 DAG
.getConstantFP(0.0, dl
, Op
.getValueType()));
7686 // If we have direct moves, we can do all the conversion, skip the store/load
7687 // however, without FPCVT we can't do most conversions.
7688 if (Subtarget
.hasDirectMove() && directMoveIsProfitable(Op
) &&
7689 Subtarget
.isPPC64() && Subtarget
.hasFPCVT())
7690 return LowerINT_TO_FPDirectMove(Op
, DAG
, dl
);
7692 assert((Op
.getOpcode() == ISD::SINT_TO_FP
|| Subtarget
.hasFPCVT()) &&
7693 "UINT_TO_FP is supported only with FPCVT");
7695 // If we have FCFIDS, then use it when converting to single-precision.
7696 // Otherwise, convert to double-precision and then round.
7697 unsigned FCFOp
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
7698 ? (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDUS
7700 : (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDU
7702 MVT FCFTy
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
7706 if (Op
.getOperand(0).getValueType() == MVT::i64
) {
7707 SDValue SINT
= Op
.getOperand(0);
7708 // When converting to single-precision, we actually need to convert
7709 // to double-precision first and then round to single-precision.
7710 // To avoid double-rounding effects during that operation, we have
7711 // to prepare the input operand. Bits that might be truncated when
7712 // converting to double-precision are replaced by a bit that won't
7713 // be lost at this stage, but is below the single-precision rounding
7716 // However, if -enable-unsafe-fp-math is in effect, accept double
7717 // rounding to avoid the extra overhead.
7718 if (Op
.getValueType() == MVT::f32
&&
7719 !Subtarget
.hasFPCVT() &&
7720 !DAG
.getTarget().Options
.UnsafeFPMath
) {
7722 // Twiddle input to make sure the low 11 bits are zero. (If this
7723 // is the case, we are guaranteed the value will fit into the 53 bit
7724 // mantissa of an IEEE double-precision value without rounding.)
7725 // If any of those low 11 bits were not zero originally, make sure
7726 // bit 12 (value 2048) is set instead, so that the final rounding
7727 // to single-precision gets the correct result.
7728 SDValue Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
7729 SINT
, DAG
.getConstant(2047, dl
, MVT::i64
));
7730 Round
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
7731 Round
, DAG
.getConstant(2047, dl
, MVT::i64
));
7732 Round
= DAG
.getNode(ISD::OR
, dl
, MVT::i64
, Round
, SINT
);
7733 Round
= DAG
.getNode(ISD::AND
, dl
, MVT::i64
,
7734 Round
, DAG
.getConstant(-2048, dl
, MVT::i64
));
7736 // However, we cannot use that value unconditionally: if the magnitude
7737 // of the input value is small, the bit-twiddling we did above might
7738 // end up visibly changing the output. Fortunately, in that case, we
7739 // don't need to twiddle bits since the original input will convert
7740 // exactly to double-precision floating-point already. Therefore,
7741 // construct a conditional to use the original value if the top 11
7742 // bits are all sign-bit copies, and use the rounded value computed
7744 SDValue Cond
= DAG
.getNode(ISD::SRA
, dl
, MVT::i64
,
7745 SINT
, DAG
.getConstant(53, dl
, MVT::i32
));
7746 Cond
= DAG
.getNode(ISD::ADD
, dl
, MVT::i64
,
7747 Cond
, DAG
.getConstant(1, dl
, MVT::i64
));
7748 Cond
= DAG
.getSetCC(dl
, MVT::i32
,
7749 Cond
, DAG
.getConstant(1, dl
, MVT::i64
), ISD::SETUGT
);
7751 SINT
= DAG
.getNode(ISD::SELECT
, dl
, MVT::i64
, Cond
, Round
, SINT
);
7757 MachineFunction
&MF
= DAG
.getMachineFunction();
7758 if (canReuseLoadAddress(SINT
, MVT::i64
, RLI
, DAG
)) {
7759 Bits
= DAG
.getLoad(MVT::f64
, dl
, RLI
.Chain
, RLI
.Ptr
, RLI
.MPI
,
7760 RLI
.Alignment
, RLI
.MMOFlags(), RLI
.AAInfo
, RLI
.Ranges
);
7761 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7762 } else if (Subtarget
.hasLFIWAX() &&
7763 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::SEXTLOAD
)) {
7764 MachineMemOperand
*MMO
=
7765 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7766 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7767 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7768 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWAX
, dl
,
7769 DAG
.getVTList(MVT::f64
, MVT::Other
),
7770 Ops
, MVT::i32
, MMO
);
7771 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7772 } else if (Subtarget
.hasFPCVT() &&
7773 canReuseLoadAddress(SINT
, MVT::i32
, RLI
, DAG
, ISD::ZEXTLOAD
)) {
7774 MachineMemOperand
*MMO
=
7775 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7776 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7777 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7778 Bits
= DAG
.getMemIntrinsicNode(PPCISD::LFIWZX
, dl
,
7779 DAG
.getVTList(MVT::f64
, MVT::Other
),
7780 Ops
, MVT::i32
, MMO
);
7781 spliceIntoChain(RLI
.ResChain
, Bits
.getValue(1), DAG
);
7782 } else if (((Subtarget
.hasLFIWAX() &&
7783 SINT
.getOpcode() == ISD::SIGN_EXTEND
) ||
7784 (Subtarget
.hasFPCVT() &&
7785 SINT
.getOpcode() == ISD::ZERO_EXTEND
)) &&
7786 SINT
.getOperand(0).getValueType() == MVT::i32
) {
7787 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7788 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
7790 int FrameIdx
= MFI
.CreateStackObject(4, 4, false);
7791 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7794 DAG
.getStore(DAG
.getEntryNode(), dl
, SINT
.getOperand(0), FIdx
,
7795 MachinePointerInfo::getFixedStack(
7796 DAG
.getMachineFunction(), FrameIdx
));
7798 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
7799 "Expected an i32 store");
7804 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
7807 MachineMemOperand
*MMO
=
7808 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7809 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7810 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7811 Bits
= DAG
.getMemIntrinsicNode(SINT
.getOpcode() == ISD::ZERO_EXTEND
?
7812 PPCISD::LFIWZX
: PPCISD::LFIWAX
,
7813 dl
, DAG
.getVTList(MVT::f64
, MVT::Other
),
7814 Ops
, MVT::i32
, MMO
);
7816 Bits
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::f64
, SINT
);
7818 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Bits
);
7820 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT())
7821 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
,
7822 MVT::f32
, FP
, DAG
.getIntPtrConstant(0, dl
));
7826 assert(Op
.getOperand(0).getValueType() == MVT::i32
&&
7827 "Unhandled INT_TO_FP type in custom expander!");
7828 // Since we only generate this in 64-bit mode, we can take advantage of
7829 // 64-bit registers. In particular, sign extend the input value into the
7830 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
7831 // then lfd it and fcfid it.
7832 MachineFunction
&MF
= DAG
.getMachineFunction();
7833 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
7834 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7837 if (Subtarget
.hasLFIWAX() || Subtarget
.hasFPCVT()) {
7840 if (!(ReusingLoad
= canReuseLoadAddress(Op
.getOperand(0), MVT::i32
, RLI
,
7842 int FrameIdx
= MFI
.CreateStackObject(4, 4, false);
7843 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7846 DAG
.getStore(DAG
.getEntryNode(), dl
, Op
.getOperand(0), FIdx
,
7847 MachinePointerInfo::getFixedStack(
7848 DAG
.getMachineFunction(), FrameIdx
));
7850 assert(cast
<StoreSDNode
>(Store
)->getMemoryVT() == MVT::i32
&&
7851 "Expected an i32 store");
7856 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
7860 MachineMemOperand
*MMO
=
7861 MF
.getMachineMemOperand(RLI
.MPI
, MachineMemOperand::MOLoad
, 4,
7862 RLI
.Alignment
, RLI
.AAInfo
, RLI
.Ranges
);
7863 SDValue Ops
[] = { RLI
.Chain
, RLI
.Ptr
};
7864 Ld
= DAG
.getMemIntrinsicNode(Op
.getOpcode() == ISD::UINT_TO_FP
?
7865 PPCISD::LFIWZX
: PPCISD::LFIWAX
,
7866 dl
, DAG
.getVTList(MVT::f64
, MVT::Other
),
7867 Ops
, MVT::i32
, MMO
);
7869 spliceIntoChain(RLI
.ResChain
, Ld
.getValue(1), DAG
);
7871 assert(Subtarget
.isPPC64() &&
7872 "i32->FP without LFIWAX supported only on PPC64");
7874 int FrameIdx
= MFI
.CreateStackObject(8, 8, false);
7875 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
7877 SDValue Ext64
= DAG
.getNode(ISD::SIGN_EXTEND
, dl
, MVT::i64
,
7880 // STD the extended value into the stack slot.
7881 SDValue Store
= DAG
.getStore(
7882 DAG
.getEntryNode(), dl
, Ext64
, FIdx
,
7883 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
7885 // Load the value as a double.
7887 MVT::f64
, dl
, Store
, FIdx
,
7888 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
));
7891 // FCFID it and return it.
7892 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Ld
);
7893 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT())
7894 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
, MVT::f32
, FP
,
7895 DAG
.getIntPtrConstant(0, dl
));
7899 SDValue
PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op
,
7900 SelectionDAG
&DAG
) const {
7903 The rounding mode is in bits 30:31 of FPSR, and has the following
7910 FLT_ROUNDS, on the other hand, expects the following:
7917 To perform the conversion, we do:
7918 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
7921 MachineFunction
&MF
= DAG
.getMachineFunction();
7922 EVT VT
= Op
.getValueType();
7923 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
7925 // Save FP Control Word to register
7927 MVT::f64
, // return register
7928 MVT::Glue
// unused in this context
7930 SDValue Chain
= DAG
.getNode(PPCISD::MFFS
, dl
, NodeTys
, None
);
7932 // Save FP register to stack slot
7933 int SSFI
= MF
.getFrameInfo().CreateStackObject(8, 8, false);
7934 SDValue StackSlot
= DAG
.getFrameIndex(SSFI
, PtrVT
);
7935 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Chain
, StackSlot
,
7936 MachinePointerInfo());
7938 // Load FP Control Word from low 32 bits of stack slot.
7939 SDValue Four
= DAG
.getConstant(4, dl
, PtrVT
);
7940 SDValue Addr
= DAG
.getNode(ISD::ADD
, dl
, PtrVT
, StackSlot
, Four
);
7941 SDValue CWD
= DAG
.getLoad(MVT::i32
, dl
, Store
, Addr
, MachinePointerInfo());
7943 // Transform as necessary
7945 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
7946 CWD
, DAG
.getConstant(3, dl
, MVT::i32
));
7948 DAG
.getNode(ISD::SRL
, dl
, MVT::i32
,
7949 DAG
.getNode(ISD::AND
, dl
, MVT::i32
,
7950 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
,
7951 CWD
, DAG
.getConstant(3, dl
, MVT::i32
)),
7952 DAG
.getConstant(3, dl
, MVT::i32
)),
7953 DAG
.getConstant(1, dl
, MVT::i32
));
7956 DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, CWD1
, CWD2
);
7958 return DAG
.getNode((VT
.getSizeInBits() < 16 ?
7959 ISD::TRUNCATE
: ISD::ZERO_EXTEND
), dl
, VT
, RetVal
);
7962 SDValue
PPCTargetLowering::LowerSHL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
7963 EVT VT
= Op
.getValueType();
7964 unsigned BitWidth
= VT
.getSizeInBits();
7966 assert(Op
.getNumOperands() == 3 &&
7967 VT
== Op
.getOperand(1).getValueType() &&
7970 // Expand into a bunch of logical ops. Note that these ops
7971 // depend on the PPC behavior for oversized shift amounts.
7972 SDValue Lo
= Op
.getOperand(0);
7973 SDValue Hi
= Op
.getOperand(1);
7974 SDValue Amt
= Op
.getOperand(2);
7975 EVT AmtVT
= Amt
.getValueType();
7977 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
7978 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
7979 SDValue Tmp2
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Amt
);
7980 SDValue Tmp3
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Tmp1
);
7981 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
7982 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
7983 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
7984 SDValue Tmp6
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Tmp5
);
7985 SDValue OutHi
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
7986 SDValue OutLo
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Lo
, Amt
);
7987 SDValue OutOps
[] = { OutLo
, OutHi
};
7988 return DAG
.getMergeValues(OutOps
, dl
);
7991 SDValue
PPCTargetLowering::LowerSRL_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
7992 EVT VT
= Op
.getValueType();
7994 unsigned BitWidth
= VT
.getSizeInBits();
7995 assert(Op
.getNumOperands() == 3 &&
7996 VT
== Op
.getOperand(1).getValueType() &&
7999 // Expand into a bunch of logical ops. Note that these ops
8000 // depend on the PPC behavior for oversized shift amounts.
8001 SDValue Lo
= Op
.getOperand(0);
8002 SDValue Hi
= Op
.getOperand(1);
8003 SDValue Amt
= Op
.getOperand(2);
8004 EVT AmtVT
= Amt
.getValueType();
8006 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
8007 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
8008 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
8009 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
8010 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
8011 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
8012 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
8013 SDValue Tmp6
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Tmp5
);
8014 SDValue OutLo
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp4
, Tmp6
);
8015 SDValue OutHi
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Hi
, Amt
);
8016 SDValue OutOps
[] = { OutLo
, OutHi
};
8017 return DAG
.getMergeValues(OutOps
, dl
);
8020 SDValue
PPCTargetLowering::LowerSRA_PARTS(SDValue Op
, SelectionDAG
&DAG
) const {
8022 EVT VT
= Op
.getValueType();
8023 unsigned BitWidth
= VT
.getSizeInBits();
8024 assert(Op
.getNumOperands() == 3 &&
8025 VT
== Op
.getOperand(1).getValueType() &&
8028 // Expand into a bunch of logical ops, followed by a select_cc.
8029 SDValue Lo
= Op
.getOperand(0);
8030 SDValue Hi
= Op
.getOperand(1);
8031 SDValue Amt
= Op
.getOperand(2);
8032 EVT AmtVT
= Amt
.getValueType();
8034 SDValue Tmp1
= DAG
.getNode(ISD::SUB
, dl
, AmtVT
,
8035 DAG
.getConstant(BitWidth
, dl
, AmtVT
), Amt
);
8036 SDValue Tmp2
= DAG
.getNode(PPCISD::SRL
, dl
, VT
, Lo
, Amt
);
8037 SDValue Tmp3
= DAG
.getNode(PPCISD::SHL
, dl
, VT
, Hi
, Tmp1
);
8038 SDValue Tmp4
= DAG
.getNode(ISD::OR
, dl
, VT
, Tmp2
, Tmp3
);
8039 SDValue Tmp5
= DAG
.getNode(ISD::ADD
, dl
, AmtVT
, Amt
,
8040 DAG
.getConstant(-BitWidth
, dl
, AmtVT
));
8041 SDValue Tmp6
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Tmp5
);
8042 SDValue OutHi
= DAG
.getNode(PPCISD::SRA
, dl
, VT
, Hi
, Amt
);
8043 SDValue OutLo
= DAG
.getSelectCC(dl
, Tmp5
, DAG
.getConstant(0, dl
, AmtVT
),
8044 Tmp4
, Tmp6
, ISD::SETLE
);
8045 SDValue OutOps
[] = { OutLo
, OutHi
};
8046 return DAG
.getMergeValues(OutOps
, dl
);
8049 //===----------------------------------------------------------------------===//
8050 // Vector related lowering.
8053 /// BuildSplatI - Build a canonical splati of Val with an element size of
8054 /// SplatSize. Cast the result to VT.
8055 static SDValue
BuildSplatI(int Val
, unsigned SplatSize
, EVT VT
,
8056 SelectionDAG
&DAG
, const SDLoc
&dl
) {
8057 assert(Val
>= -16 && Val
<= 15 && "vsplti is out of range!");
8059 static const MVT VTys
[] = { // canonical VT to use for each size.
8060 MVT::v16i8
, MVT::v8i16
, MVT::Other
, MVT::v4i32
8063 EVT ReqVT
= VT
!= MVT::Other
? VT
: VTys
[SplatSize
-1];
8065 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize.
8069 EVT CanonicalVT
= VTys
[SplatSize
-1];
8071 // Build a canonical splat for this value.
8072 return DAG
.getBitcast(ReqVT
, DAG
.getConstant(Val
, dl
, CanonicalVT
));
8075 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8076 /// specified intrinsic ID.
8077 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op
, SelectionDAG
&DAG
,
8078 const SDLoc
&dl
, EVT DestVT
= MVT::Other
) {
8079 if (DestVT
== MVT::Other
) DestVT
= Op
.getValueType();
8080 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
8081 DAG
.getConstant(IID
, dl
, MVT::i32
), Op
);
8084 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8085 /// specified intrinsic ID.
8086 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue LHS
, SDValue RHS
,
8087 SelectionDAG
&DAG
, const SDLoc
&dl
,
8088 EVT DestVT
= MVT::Other
) {
8089 if (DestVT
== MVT::Other
) DestVT
= LHS
.getValueType();
8090 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
8091 DAG
.getConstant(IID
, dl
, MVT::i32
), LHS
, RHS
);
8094 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8095 /// specified intrinsic ID.
8096 static SDValue
BuildIntrinsicOp(unsigned IID
, SDValue Op0
, SDValue Op1
,
8097 SDValue Op2
, SelectionDAG
&DAG
, const SDLoc
&dl
,
8098 EVT DestVT
= MVT::Other
) {
8099 if (DestVT
== MVT::Other
) DestVT
= Op0
.getValueType();
8100 return DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, DestVT
,
8101 DAG
.getConstant(IID
, dl
, MVT::i32
), Op0
, Op1
, Op2
);
8104 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8105 /// amount. The result has the specified value type.
8106 static SDValue
BuildVSLDOI(SDValue LHS
, SDValue RHS
, unsigned Amt
, EVT VT
,
8107 SelectionDAG
&DAG
, const SDLoc
&dl
) {
8108 // Force LHS/RHS to be the right type.
8109 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, LHS
);
8110 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, RHS
);
8113 for (unsigned i
= 0; i
!= 16; ++i
)
8115 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, LHS
, RHS
, Ops
);
8116 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
8119 /// Do we have an efficient pattern in a .td file for this node?
8121 /// \param V - pointer to the BuildVectorSDNode being matched
8122 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8124 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8125 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8126 /// the opposite is true (expansion is beneficial) are:
8127 /// - The node builds a vector out of integers that are not 32 or 64-bits
8128 /// - The node builds a vector out of constants
8129 /// - The node is a "load-and-splat"
8130 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8131 static bool haveEfficientBuildVectorPattern(BuildVectorSDNode
*V
,
8134 EVT VecVT
= V
->getValueType(0);
8135 bool RightType
= VecVT
== MVT::v2f64
||
8136 (HasP8Vector
&& VecVT
== MVT::v4f32
) ||
8137 (HasDirectMove
&& (VecVT
== MVT::v2i64
|| VecVT
== MVT::v4i32
));
8141 bool IsSplat
= true;
8142 bool IsLoad
= false;
8143 SDValue Op0
= V
->getOperand(0);
8145 // This function is called in a block that confirms the node is not a constant
8146 // splat. So a constant BUILD_VECTOR here means the vector is built out of
8147 // different constants.
8148 if (V
->isConstant())
8150 for (int i
= 0, e
= V
->getNumOperands(); i
< e
; ++i
) {
8151 if (V
->getOperand(i
).isUndef())
8153 // We want to expand nodes that represent load-and-splat even if the
8154 // loaded value is a floating point truncation or conversion to int.
8155 if (V
->getOperand(i
).getOpcode() == ISD::LOAD
||
8156 (V
->getOperand(i
).getOpcode() == ISD::FP_ROUND
&&
8157 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
8158 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_SINT
&&
8159 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
) ||
8160 (V
->getOperand(i
).getOpcode() == ISD::FP_TO_UINT
&&
8161 V
->getOperand(i
).getOperand(0).getOpcode() == ISD::LOAD
))
8163 // If the operands are different or the input is not a load and has more
8164 // uses than just this BV node, then it isn't a splat.
8165 if (V
->getOperand(i
) != Op0
||
8166 (!IsLoad
&& !V
->isOnlyUserOf(V
->getOperand(i
).getNode())))
8169 return !(IsSplat
&& IsLoad
);
8172 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
8173 SDValue
PPCTargetLowering::LowerBITCAST(SDValue Op
, SelectionDAG
&DAG
) const {
8176 SDValue Op0
= Op
->getOperand(0);
8178 if (!EnableQuadPrecision
||
8179 (Op
.getValueType() != MVT::f128
) ||
8180 (Op0
.getOpcode() != ISD::BUILD_PAIR
) ||
8181 (Op0
.getOperand(0).getValueType() != MVT::i64
) ||
8182 (Op0
.getOperand(1).getValueType() != MVT::i64
))
8185 return DAG
.getNode(PPCISD::BUILD_FP128
, dl
, MVT::f128
, Op0
.getOperand(0),
8189 // If this is a case we can't handle, return null and let the default
8190 // expansion code take care of it. If we CAN select this case, and if it
8191 // selects to a single instruction, return Op. Otherwise, if we can codegen
8192 // this case more efficiently than a constant pool load, lower it to the
8193 // sequence of ops that should be used.
8194 SDValue
PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op
,
8195 SelectionDAG
&DAG
) const {
8197 BuildVectorSDNode
*BVN
= dyn_cast
<BuildVectorSDNode
>(Op
.getNode());
8198 assert(BVN
&& "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
8200 if (Subtarget
.hasQPX() && Op
.getValueType() == MVT::v4i1
) {
8201 // We first build an i32 vector, load it into a QPX register,
8202 // then convert it to a floating-point vector and compare it
8203 // to a zero vector to get the boolean result.
8204 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
8205 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
8206 MachinePointerInfo PtrInfo
=
8207 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
8208 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
8209 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
8211 assert(BVN
->getNumOperands() == 4 &&
8212 "BUILD_VECTOR for v4i1 does not have 4 operands");
8214 bool IsConst
= true;
8215 for (unsigned i
= 0; i
< 4; ++i
) {
8216 if (BVN
->getOperand(i
).isUndef()) continue;
8217 if (!isa
<ConstantSDNode
>(BVN
->getOperand(i
))) {
8225 ConstantFP::get(Type::getFloatTy(*DAG
.getContext()), 1.0);
8227 ConstantFP::get(Type::getFloatTy(*DAG
.getContext()), -1.0);
8230 for (unsigned i
= 0; i
< 4; ++i
) {
8231 if (BVN
->getOperand(i
).isUndef())
8232 CV
[i
] = UndefValue::get(Type::getFloatTy(*DAG
.getContext()));
8233 else if (isNullConstant(BVN
->getOperand(i
)))
8239 Constant
*CP
= ConstantVector::get(CV
);
8240 SDValue CPIdx
= DAG
.getConstantPool(CP
, getPointerTy(DAG
.getDataLayout()),
8241 16 /* alignment */);
8243 SDValue Ops
[] = {DAG
.getEntryNode(), CPIdx
};
8244 SDVTList VTs
= DAG
.getVTList({MVT::v4i1
, /*chain*/ MVT::Other
});
8245 return DAG
.getMemIntrinsicNode(
8246 PPCISD::QVLFSb
, dl
, VTs
, Ops
, MVT::v4f32
,
8247 MachinePointerInfo::getConstantPool(DAG
.getMachineFunction()));
8250 SmallVector
<SDValue
, 4> Stores
;
8251 for (unsigned i
= 0; i
< 4; ++i
) {
8252 if (BVN
->getOperand(i
).isUndef()) continue;
8254 unsigned Offset
= 4*i
;
8255 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
8256 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
8258 unsigned StoreSize
= BVN
->getOperand(i
).getValueType().getStoreSize();
8259 if (StoreSize
> 4) {
8261 DAG
.getTruncStore(DAG
.getEntryNode(), dl
, BVN
->getOperand(i
), Idx
,
8262 PtrInfo
.getWithOffset(Offset
), MVT::i32
));
8264 SDValue StoreValue
= BVN
->getOperand(i
);
8266 StoreValue
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, StoreValue
);
8268 Stores
.push_back(DAG
.getStore(DAG
.getEntryNode(), dl
, StoreValue
, Idx
,
8269 PtrInfo
.getWithOffset(Offset
)));
8274 if (!Stores
.empty())
8275 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
8277 StoreChain
= DAG
.getEntryNode();
8279 // Now load from v4i32 into the QPX register; this will extend it to
8280 // v4i64 but not yet convert it to a floating point. Nevertheless, this
8281 // is typed as v4f64 because the QPX register integer states are not
8282 // explicitly represented.
8284 SDValue Ops
[] = {StoreChain
,
8285 DAG
.getConstant(Intrinsic::ppc_qpx_qvlfiwz
, dl
, MVT::i32
),
8287 SDVTList VTs
= DAG
.getVTList({MVT::v4f64
, /*chain*/ MVT::Other
});
8289 SDValue LoadedVect
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
,
8290 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
8291 LoadedVect
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
8292 DAG
.getConstant(Intrinsic::ppc_qpx_qvfcfidu
, dl
, MVT::i32
),
8295 SDValue FPZeros
= DAG
.getConstantFP(0.0, dl
, MVT::v4f64
);
8297 return DAG
.getSetCC(dl
, MVT::v4i1
, LoadedVect
, FPZeros
, ISD::SETEQ
);
8300 // All other QPX vectors are handled by generic code.
8301 if (Subtarget
.hasQPX())
8304 // Check if this is a splat of a constant value.
8305 APInt APSplatBits
, APSplatUndef
;
8306 unsigned SplatBitSize
;
8308 if (! BVN
->isConstantSplat(APSplatBits
, APSplatUndef
, SplatBitSize
,
8309 HasAnyUndefs
, 0, !Subtarget
.isLittleEndian()) ||
8310 SplatBitSize
> 32) {
8311 // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
8312 // lowered to VSX instructions under certain conditions.
8313 // Without VSX, there is no pattern more efficient than expanding the node.
8314 if (Subtarget
.hasVSX() &&
8315 haveEfficientBuildVectorPattern(BVN
, Subtarget
.hasDirectMove(),
8316 Subtarget
.hasP8Vector()))
8321 unsigned SplatBits
= APSplatBits
.getZExtValue();
8322 unsigned SplatUndef
= APSplatUndef
.getZExtValue();
8323 unsigned SplatSize
= SplatBitSize
/ 8;
8325 // First, handle single instruction cases.
8328 if (SplatBits
== 0) {
8329 // Canonicalize all zero vectors to be v4i32.
8330 if (Op
.getValueType() != MVT::v4i32
|| HasAnyUndefs
) {
8331 SDValue Z
= DAG
.getConstant(0, dl
, MVT::v4i32
);
8332 Op
= DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Z
);
8337 // We have XXSPLTIB for constant splats one byte wide
8338 if (Subtarget
.hasP9Vector() && SplatSize
== 1) {
8339 // This is a splat of 1-byte elements with some elements potentially undef.
8340 // Rather than trying to match undef in the SDAG patterns, ensure that all
8341 // elements are the same constant.
8342 if (HasAnyUndefs
|| ISD::isBuildVectorAllOnes(BVN
)) {
8343 SmallVector
<SDValue
, 16> Ops(16, DAG
.getConstant(SplatBits
,
8345 SDValue NewBV
= DAG
.getBuildVector(MVT::v16i8
, dl
, Ops
);
8346 if (Op
.getValueType() != MVT::v16i8
)
8347 return DAG
.getBitcast(Op
.getValueType(), NewBV
);
8351 // BuildVectorSDNode::isConstantSplat() is actually pretty smart. It'll
8352 // detect that constant splats like v8i16: 0xABAB are really just splats
8353 // of a 1-byte constant. In this case, we need to convert the node to a
8354 // splat of v16i8 and a bitcast.
8355 if (Op
.getValueType() != MVT::v16i8
)
8356 return DAG
.getBitcast(Op
.getValueType(),
8357 DAG
.getConstant(SplatBits
, dl
, MVT::v16i8
));
8362 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
8363 int32_t SextVal
= (int32_t(SplatBits
<< (32-SplatBitSize
)) >>
8365 if (SextVal
>= -16 && SextVal
<= 15)
8366 return BuildSplatI(SextVal
, SplatSize
, Op
.getValueType(), DAG
, dl
);
8368 // Two instruction sequences.
8370 // If this value is in the range [-32,30] and is even, use:
8371 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
8372 // If this value is in the range [17,31] and is odd, use:
8373 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
8374 // If this value is in the range [-31,-17] and is odd, use:
8375 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
8376 // Note the last two are three-instruction sequences.
8377 if (SextVal
>= -32 && SextVal
<= 31) {
8378 // To avoid having these optimizations undone by constant folding,
8379 // we convert to a pseudo that will be expanded later into one of
8381 SDValue Elt
= DAG
.getConstant(SextVal
, dl
, MVT::i32
);
8382 EVT VT
= (SplatSize
== 1 ? MVT::v16i8
:
8383 (SplatSize
== 2 ? MVT::v8i16
: MVT::v4i32
));
8384 SDValue EltSize
= DAG
.getConstant(SplatSize
, dl
, MVT::i32
);
8385 SDValue RetVal
= DAG
.getNode(PPCISD::VADD_SPLAT
, dl
, VT
, Elt
, EltSize
);
8386 if (VT
== Op
.getValueType())
8389 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), RetVal
);
8392 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
8393 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
8395 if (SplatSize
== 4 && SplatBits
== (0x7FFFFFFF&~SplatUndef
)) {
8396 // Make -1 and vspltisw -1:
8397 SDValue OnesV
= BuildSplatI(-1, 4, MVT::v4i32
, DAG
, dl
);
8399 // Make the VSLW intrinsic, computing 0x8000_0000.
8400 SDValue Res
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, OnesV
,
8403 // xor by OnesV to invert it.
8404 Res
= DAG
.getNode(ISD::XOR
, dl
, MVT::v4i32
, Res
, OnesV
);
8405 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8408 // Check to see if this is a wide variety of vsplti*, binop self cases.
8409 static const signed char SplatCsts
[] = {
8410 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
8411 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
8414 for (unsigned idx
= 0; idx
< array_lengthof(SplatCsts
); ++idx
) {
8415 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
8416 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
8417 int i
= SplatCsts
[idx
];
8419 // Figure out what shift amount will be used by altivec if shifted by i in
8421 unsigned TypeShiftAmt
= i
& (SplatBitSize
-1);
8423 // vsplti + shl self.
8424 if (SextVal
== (int)((unsigned)i
<< TypeShiftAmt
)) {
8425 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8426 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8427 Intrinsic::ppc_altivec_vslb
, Intrinsic::ppc_altivec_vslh
, 0,
8428 Intrinsic::ppc_altivec_vslw
8430 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8431 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8434 // vsplti + srl self.
8435 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
8436 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8437 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8438 Intrinsic::ppc_altivec_vsrb
, Intrinsic::ppc_altivec_vsrh
, 0,
8439 Intrinsic::ppc_altivec_vsrw
8441 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8442 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8445 // vsplti + sra self.
8446 if (SextVal
== (int)((unsigned)i
>> TypeShiftAmt
)) {
8447 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8448 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8449 Intrinsic::ppc_altivec_vsrab
, Intrinsic::ppc_altivec_vsrah
, 0,
8450 Intrinsic::ppc_altivec_vsraw
8452 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8453 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8456 // vsplti + rol self.
8457 if (SextVal
== (int)(((unsigned)i
<< TypeShiftAmt
) |
8458 ((unsigned)i
>> (SplatBitSize
-TypeShiftAmt
)))) {
8459 SDValue Res
= BuildSplatI(i
, SplatSize
, MVT::Other
, DAG
, dl
);
8460 static const unsigned IIDs
[] = { // Intrinsic to use for each size.
8461 Intrinsic::ppc_altivec_vrlb
, Intrinsic::ppc_altivec_vrlh
, 0,
8462 Intrinsic::ppc_altivec_vrlw
8464 Res
= BuildIntrinsicOp(IIDs
[SplatSize
-1], Res
, Res
, DAG
, dl
);
8465 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Res
);
8468 // t = vsplti c, result = vsldoi t, t, 1
8469 if (SextVal
== (int)(((unsigned)i
<< 8) | (i
< 0 ? 0xFF : 0))) {
8470 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8471 unsigned Amt
= Subtarget
.isLittleEndian() ? 15 : 1;
8472 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8474 // t = vsplti c, result = vsldoi t, t, 2
8475 if (SextVal
== (int)(((unsigned)i
<< 16) | (i
< 0 ? 0xFFFF : 0))) {
8476 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8477 unsigned Amt
= Subtarget
.isLittleEndian() ? 14 : 2;
8478 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8480 // t = vsplti c, result = vsldoi t, t, 3
8481 if (SextVal
== (int)(((unsigned)i
<< 24) | (i
< 0 ? 0xFFFFFF : 0))) {
8482 SDValue T
= BuildSplatI(i
, SplatSize
, MVT::v16i8
, DAG
, dl
);
8483 unsigned Amt
= Subtarget
.isLittleEndian() ? 13 : 3;
8484 return BuildVSLDOI(T
, T
, Amt
, Op
.getValueType(), DAG
, dl
);
8491 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
8492 /// the specified operations to build the shuffle.
8493 static SDValue
GeneratePerfectShuffle(unsigned PFEntry
, SDValue LHS
,
8494 SDValue RHS
, SelectionDAG
&DAG
,
8496 unsigned OpNum
= (PFEntry
>> 26) & 0x0F;
8497 unsigned LHSID
= (PFEntry
>> 13) & ((1 << 13)-1);
8498 unsigned RHSID
= (PFEntry
>> 0) & ((1 << 13)-1);
8501 OP_COPY
= 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
8513 if (OpNum
== OP_COPY
) {
8514 if (LHSID
== (1*9+2)*9+3) return LHS
;
8515 assert(LHSID
== ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
8519 SDValue OpLHS
, OpRHS
;
8520 OpLHS
= GeneratePerfectShuffle(PerfectShuffleTable
[LHSID
], LHS
, RHS
, DAG
, dl
);
8521 OpRHS
= GeneratePerfectShuffle(PerfectShuffleTable
[RHSID
], LHS
, RHS
, DAG
, dl
);
8525 default: llvm_unreachable("Unknown i32 permute!");
8527 ShufIdxs
[ 0] = 0; ShufIdxs
[ 1] = 1; ShufIdxs
[ 2] = 2; ShufIdxs
[ 3] = 3;
8528 ShufIdxs
[ 4] = 16; ShufIdxs
[ 5] = 17; ShufIdxs
[ 6] = 18; ShufIdxs
[ 7] = 19;
8529 ShufIdxs
[ 8] = 4; ShufIdxs
[ 9] = 5; ShufIdxs
[10] = 6; ShufIdxs
[11] = 7;
8530 ShufIdxs
[12] = 20; ShufIdxs
[13] = 21; ShufIdxs
[14] = 22; ShufIdxs
[15] = 23;
8533 ShufIdxs
[ 0] = 8; ShufIdxs
[ 1] = 9; ShufIdxs
[ 2] = 10; ShufIdxs
[ 3] = 11;
8534 ShufIdxs
[ 4] = 24; ShufIdxs
[ 5] = 25; ShufIdxs
[ 6] = 26; ShufIdxs
[ 7] = 27;
8535 ShufIdxs
[ 8] = 12; ShufIdxs
[ 9] = 13; ShufIdxs
[10] = 14; ShufIdxs
[11] = 15;
8536 ShufIdxs
[12] = 28; ShufIdxs
[13] = 29; ShufIdxs
[14] = 30; ShufIdxs
[15] = 31;
8539 for (unsigned i
= 0; i
!= 16; ++i
)
8540 ShufIdxs
[i
] = (i
&3)+0;
8543 for (unsigned i
= 0; i
!= 16; ++i
)
8544 ShufIdxs
[i
] = (i
&3)+4;
8547 for (unsigned i
= 0; i
!= 16; ++i
)
8548 ShufIdxs
[i
] = (i
&3)+8;
8551 for (unsigned i
= 0; i
!= 16; ++i
)
8552 ShufIdxs
[i
] = (i
&3)+12;
8555 return BuildVSLDOI(OpLHS
, OpRHS
, 4, OpLHS
.getValueType(), DAG
, dl
);
8557 return BuildVSLDOI(OpLHS
, OpRHS
, 8, OpLHS
.getValueType(), DAG
, dl
);
8559 return BuildVSLDOI(OpLHS
, OpRHS
, 12, OpLHS
.getValueType(), DAG
, dl
);
8561 EVT VT
= OpLHS
.getValueType();
8562 OpLHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpLHS
);
8563 OpRHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OpRHS
);
8564 SDValue T
= DAG
.getVectorShuffle(MVT::v16i8
, dl
, OpLHS
, OpRHS
, ShufIdxs
);
8565 return DAG
.getNode(ISD::BITCAST
, dl
, VT
, T
);
8568 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
8569 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
8571 SDValue
PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode
*N
,
8572 SelectionDAG
&DAG
) const {
8573 const unsigned BytesInVector
= 16;
8574 bool IsLE
= Subtarget
.isLittleEndian();
8576 SDValue V1
= N
->getOperand(0);
8577 SDValue V2
= N
->getOperand(1);
8578 unsigned ShiftElts
= 0, InsertAtByte
= 0;
8581 // Shifts required to get the byte we want at element 7.
8582 unsigned LittleEndianShifts
[] = {8, 7, 6, 5, 4, 3, 2, 1,
8583 0, 15, 14, 13, 12, 11, 10, 9};
8584 unsigned BigEndianShifts
[] = {9, 10, 11, 12, 13, 14, 15, 0,
8585 1, 2, 3, 4, 5, 6, 7, 8};
8587 ArrayRef
<int> Mask
= N
->getMask();
8588 int OriginalOrder
[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
8590 // For each mask element, find out if we're just inserting something
8591 // from V2 into V1 or vice versa.
8592 // Possible permutations inserting an element from V2 into V1:
8593 // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8594 // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
8596 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
8597 // Inserting from V1 into V2 will be similar, except mask range will be
8600 bool FoundCandidate
= false;
8601 // If both vector operands for the shuffle are the same vector, the mask
8602 // will contain only elements from the first one and the second one will be
8604 unsigned VINSERTBSrcElem
= IsLE
? 8 : 7;
8605 // Go through the mask of half-words to find an element that's being moved
8606 // from one vector to the other.
8607 for (unsigned i
= 0; i
< BytesInVector
; ++i
) {
8608 unsigned CurrentElement
= Mask
[i
];
8609 // If 2nd operand is undefined, we should only look for element 7 in the
8611 if (V2
.isUndef() && CurrentElement
!= VINSERTBSrcElem
)
8614 bool OtherElementsInOrder
= true;
8615 // Examine the other elements in the Mask to see if they're in original
8617 for (unsigned j
= 0; j
< BytesInVector
; ++j
) {
8620 // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
8621 // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
8622 // in which we always assume we're always picking from the 1st operand.
8624 (!V2
.isUndef() && CurrentElement
< BytesInVector
) ? BytesInVector
: 0;
8625 if (Mask
[j
] != OriginalOrder
[j
] + MaskOffset
) {
8626 OtherElementsInOrder
= false;
8630 // If other elements are in original order, we record the number of shifts
8631 // we need to get the element we want into element 7. Also record which byte
8632 // in the vector we should insert into.
8633 if (OtherElementsInOrder
) {
8634 // If 2nd operand is undefined, we assume no shifts and no swapping.
8639 // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
8640 ShiftElts
= IsLE
? LittleEndianShifts
[CurrentElement
& 0xF]
8641 : BigEndianShifts
[CurrentElement
& 0xF];
8642 Swap
= CurrentElement
< BytesInVector
;
8644 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) : i
;
8645 FoundCandidate
= true;
8650 if (!FoundCandidate
)
8653 // Candidate found, construct the proper SDAG sequence with VINSERTB,
8654 // optionally with VECSHL if shift is required.
8660 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
8661 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8662 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, Shl
,
8663 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8665 return DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v16i8
, V1
, V2
,
8666 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8669 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
8670 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
8672 SDValue
PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode
*N
,
8673 SelectionDAG
&DAG
) const {
8674 const unsigned NumHalfWords
= 8;
8675 const unsigned BytesInVector
= NumHalfWords
* 2;
8676 // Check that the shuffle is on half-words.
8677 if (!isNByteElemShuffleMask(N
, 2, 1))
8680 bool IsLE
= Subtarget
.isLittleEndian();
8682 SDValue V1
= N
->getOperand(0);
8683 SDValue V2
= N
->getOperand(1);
8684 unsigned ShiftElts
= 0, InsertAtByte
= 0;
8687 // Shifts required to get the half-word we want at element 3.
8688 unsigned LittleEndianShifts
[] = {4, 3, 2, 1, 0, 7, 6, 5};
8689 unsigned BigEndianShifts
[] = {5, 6, 7, 0, 1, 2, 3, 4};
8692 uint32_t OriginalOrderLow
= 0x1234567;
8693 uint32_t OriginalOrderHigh
= 0x89ABCDEF;
8694 // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
8695 // 32-bit space, only need 4-bit nibbles per element.
8696 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
8697 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
8698 Mask
|= ((uint32_t)(N
->getMaskElt(i
* 2) / 2) << MaskShift
);
8701 // For each mask element, find out if we're just inserting something
8702 // from V2 into V1 or vice versa. Possible permutations inserting an element
8704 // X, 1, 2, 3, 4, 5, 6, 7
8705 // 0, X, 2, 3, 4, 5, 6, 7
8706 // 0, 1, X, 3, 4, 5, 6, 7
8707 // 0, 1, 2, X, 4, 5, 6, 7
8708 // 0, 1, 2, 3, X, 5, 6, 7
8709 // 0, 1, 2, 3, 4, X, 6, 7
8710 // 0, 1, 2, 3, 4, 5, X, 7
8711 // 0, 1, 2, 3, 4, 5, 6, X
8712 // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
8714 bool FoundCandidate
= false;
8715 // Go through the mask of half-words to find an element that's being moved
8716 // from one vector to the other.
8717 for (unsigned i
= 0; i
< NumHalfWords
; ++i
) {
8718 unsigned MaskShift
= (NumHalfWords
- 1 - i
) * 4;
8719 uint32_t MaskOneElt
= (Mask
>> MaskShift
) & 0xF;
8720 uint32_t MaskOtherElts
= ~(0xF << MaskShift
);
8721 uint32_t TargetOrder
= 0x0;
8723 // If both vector operands for the shuffle are the same vector, the mask
8724 // will contain only elements from the first one and the second one will be
8728 unsigned VINSERTHSrcElem
= IsLE
? 4 : 3;
8729 TargetOrder
= OriginalOrderLow
;
8731 // Skip if not the correct element or mask of other elements don't equal
8732 // to our expected order.
8733 if (MaskOneElt
== VINSERTHSrcElem
&&
8734 (Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
8735 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
8736 FoundCandidate
= true;
8739 } else { // If both operands are defined.
8740 // Target order is [8,15] if the current mask is between [0,7].
8742 (MaskOneElt
< NumHalfWords
) ? OriginalOrderHigh
: OriginalOrderLow
;
8743 // Skip if mask of other elements don't equal our expected order.
8744 if ((Mask
& MaskOtherElts
) == (TargetOrder
& MaskOtherElts
)) {
8745 // We only need the last 3 bits for the number of shifts.
8746 ShiftElts
= IsLE
? LittleEndianShifts
[MaskOneElt
& 0x7]
8747 : BigEndianShifts
[MaskOneElt
& 0x7];
8748 InsertAtByte
= IsLE
? BytesInVector
- (i
+ 1) * 2 : i
* 2;
8749 Swap
= MaskOneElt
< NumHalfWords
;
8750 FoundCandidate
= true;
8756 if (!FoundCandidate
)
8759 // Candidate found, construct the proper SDAG sequence with VINSERTH,
8760 // optionally with VECSHL if shift is required.
8765 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
8767 // Double ShiftElts because we're left shifting on v16i8 type.
8768 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v16i8
, V2
, V2
,
8769 DAG
.getConstant(2 * ShiftElts
, dl
, MVT::i32
));
8770 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, Shl
);
8771 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
8772 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8773 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8775 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V2
);
8776 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v8i16
, Conv1
, Conv2
,
8777 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8778 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8781 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
8782 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
8783 /// return the code it can be lowered into. Worst case, it can always be
8784 /// lowered into a vperm.
8785 SDValue
PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op
,
8786 SelectionDAG
&DAG
) const {
8788 SDValue V1
= Op
.getOperand(0);
8789 SDValue V2
= Op
.getOperand(1);
8790 ShuffleVectorSDNode
*SVOp
= cast
<ShuffleVectorSDNode
>(Op
);
8791 EVT VT
= Op
.getValueType();
8792 bool isLittleEndian
= Subtarget
.isLittleEndian();
8794 unsigned ShiftElts
, InsertAtByte
;
8796 if (Subtarget
.hasP9Vector() &&
8797 PPC::isXXINSERTWMask(SVOp
, ShiftElts
, InsertAtByte
, Swap
,
8801 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8802 SDValue Conv2
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
);
8804 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv2
, Conv2
,
8805 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8806 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Shl
,
8807 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8808 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8810 SDValue Ins
= DAG
.getNode(PPCISD::VECINSERT
, dl
, MVT::v4i32
, Conv1
, Conv2
,
8811 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
8812 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Ins
);
8815 if (Subtarget
.hasP9Altivec()) {
8817 if ((NewISDNode
= lowerToVINSERTH(SVOp
, DAG
)))
8820 if ((NewISDNode
= lowerToVINSERTB(SVOp
, DAG
)))
8824 if (Subtarget
.hasVSX() &&
8825 PPC::isXXSLDWIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
8828 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8830 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V2
.isUndef() ? V1
: V2
);
8832 SDValue Shl
= DAG
.getNode(PPCISD::VECSHL
, dl
, MVT::v4i32
, Conv1
, Conv2
,
8833 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8834 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Shl
);
8837 if (Subtarget
.hasVSX() &&
8838 PPC::isXXPERMDIShuffleMask(SVOp
, ShiftElts
, Swap
, isLittleEndian
)) {
8841 SDValue Conv1
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
8843 DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V2
.isUndef() ? V1
: V2
);
8845 SDValue PermDI
= DAG
.getNode(PPCISD::XXPERMDI
, dl
, MVT::v2i64
, Conv1
, Conv2
,
8846 DAG
.getConstant(ShiftElts
, dl
, MVT::i32
));
8847 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, PermDI
);
8850 if (Subtarget
.hasP9Vector()) {
8851 if (PPC::isXXBRHShuffleMask(SVOp
)) {
8852 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, V1
);
8853 SDValue ReveHWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v8i16
, Conv
);
8854 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveHWord
);
8855 } else if (PPC::isXXBRWShuffleMask(SVOp
)) {
8856 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8857 SDValue ReveWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v4i32
, Conv
);
8858 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveWord
);
8859 } else if (PPC::isXXBRDShuffleMask(SVOp
)) {
8860 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2i64
, V1
);
8861 SDValue ReveDWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v2i64
, Conv
);
8862 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveDWord
);
8863 } else if (PPC::isXXBRQShuffleMask(SVOp
)) {
8864 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v1i128
, V1
);
8865 SDValue ReveQWord
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v1i128
, Conv
);
8866 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, ReveQWord
);
8870 if (Subtarget
.hasVSX()) {
8871 if (V2
.isUndef() && PPC::isSplatShuffleMask(SVOp
, 4)) {
8872 int SplatIdx
= PPC::getVSPLTImmediate(SVOp
, 4, DAG
);
8874 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v4i32
, V1
);
8875 SDValue Splat
= DAG
.getNode(PPCISD::XXSPLT
, dl
, MVT::v4i32
, Conv
,
8876 DAG
.getConstant(SplatIdx
, dl
, MVT::i32
));
8877 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Splat
);
8880 // Left shifts of 8 bytes are actually swaps. Convert accordingly.
8881 if (V2
.isUndef() && PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) == 8) {
8882 SDValue Conv
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, V1
);
8883 SDValue Swap
= DAG
.getNode(PPCISD::SWAP_NO_CHAIN
, dl
, MVT::v2f64
, Conv
);
8884 return DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, Swap
);
8888 if (Subtarget
.hasQPX()) {
8889 if (VT
.getVectorNumElements() != 4)
8892 if (V2
.isUndef()) V2
= V1
;
8894 int AlignIdx
= PPC::isQVALIGNIShuffleMask(SVOp
);
8895 if (AlignIdx
!= -1) {
8896 return DAG
.getNode(PPCISD::QVALIGNI
, dl
, VT
, V1
, V2
,
8897 DAG
.getConstant(AlignIdx
, dl
, MVT::i32
));
8898 } else if (SVOp
->isSplat()) {
8899 int SplatIdx
= SVOp
->getSplatIndex();
8900 if (SplatIdx
>= 4) {
8905 return DAG
.getNode(PPCISD::QVESPLATI
, dl
, VT
, V1
,
8906 DAG
.getConstant(SplatIdx
, dl
, MVT::i32
));
8909 // Lower this into a qvgpci/qvfperm pair.
8911 // Compute the qvgpci literal
8913 for (unsigned i
= 0; i
< 4; ++i
) {
8914 int m
= SVOp
->getMaskElt(i
);
8915 unsigned mm
= m
>= 0 ? (unsigned) m
: i
;
8916 idx
|= mm
<< (3-i
)*3;
8919 SDValue V3
= DAG
.getNode(PPCISD::QVGPCI
, dl
, MVT::v4f64
,
8920 DAG
.getConstant(idx
, dl
, MVT::i32
));
8921 return DAG
.getNode(PPCISD::QVFPERM
, dl
, VT
, V1
, V2
, V3
);
8924 // Cases that are handled by instructions that take permute immediates
8925 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
8926 // selected by the instruction selector.
8928 if (PPC::isSplatShuffleMask(SVOp
, 1) ||
8929 PPC::isSplatShuffleMask(SVOp
, 2) ||
8930 PPC::isSplatShuffleMask(SVOp
, 4) ||
8931 PPC::isVPKUWUMShuffleMask(SVOp
, 1, DAG
) ||
8932 PPC::isVPKUHUMShuffleMask(SVOp
, 1, DAG
) ||
8933 PPC::isVSLDOIShuffleMask(SVOp
, 1, DAG
) != -1 ||
8934 PPC::isVMRGLShuffleMask(SVOp
, 1, 1, DAG
) ||
8935 PPC::isVMRGLShuffleMask(SVOp
, 2, 1, DAG
) ||
8936 PPC::isVMRGLShuffleMask(SVOp
, 4, 1, DAG
) ||
8937 PPC::isVMRGHShuffleMask(SVOp
, 1, 1, DAG
) ||
8938 PPC::isVMRGHShuffleMask(SVOp
, 2, 1, DAG
) ||
8939 PPC::isVMRGHShuffleMask(SVOp
, 4, 1, DAG
) ||
8940 (Subtarget
.hasP8Altivec() && (
8941 PPC::isVPKUDUMShuffleMask(SVOp
, 1, DAG
) ||
8942 PPC::isVMRGEOShuffleMask(SVOp
, true, 1, DAG
) ||
8943 PPC::isVMRGEOShuffleMask(SVOp
, false, 1, DAG
)))) {
8948 // Altivec has a variety of "shuffle immediates" that take two vector inputs
8949 // and produce a fixed permutation. If any of these match, do not lower to
8951 unsigned int ShuffleKind
= isLittleEndian
? 2 : 0;
8952 if (PPC::isVPKUWUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
8953 PPC::isVPKUHUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
8954 PPC::isVSLDOIShuffleMask(SVOp
, ShuffleKind
, DAG
) != -1 ||
8955 PPC::isVMRGLShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
8956 PPC::isVMRGLShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
8957 PPC::isVMRGLShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
8958 PPC::isVMRGHShuffleMask(SVOp
, 1, ShuffleKind
, DAG
) ||
8959 PPC::isVMRGHShuffleMask(SVOp
, 2, ShuffleKind
, DAG
) ||
8960 PPC::isVMRGHShuffleMask(SVOp
, 4, ShuffleKind
, DAG
) ||
8961 (Subtarget
.hasP8Altivec() && (
8962 PPC::isVPKUDUMShuffleMask(SVOp
, ShuffleKind
, DAG
) ||
8963 PPC::isVMRGEOShuffleMask(SVOp
, true, ShuffleKind
, DAG
) ||
8964 PPC::isVMRGEOShuffleMask(SVOp
, false, ShuffleKind
, DAG
))))
8967 // Check to see if this is a shuffle of 4-byte values. If so, we can use our
8968 // perfect shuffle table to emit an optimal matching sequence.
8969 ArrayRef
<int> PermMask
= SVOp
->getMask();
8971 unsigned PFIndexes
[4];
8972 bool isFourElementShuffle
= true;
8973 for (unsigned i
= 0; i
!= 4 && isFourElementShuffle
; ++i
) { // Element number
8974 unsigned EltNo
= 8; // Start out undef.
8975 for (unsigned j
= 0; j
!= 4; ++j
) { // Intra-element byte.
8976 if (PermMask
[i
*4+j
] < 0)
8977 continue; // Undef, ignore it.
8979 unsigned ByteSource
= PermMask
[i
*4+j
];
8980 if ((ByteSource
& 3) != j
) {
8981 isFourElementShuffle
= false;
8986 EltNo
= ByteSource
/4;
8987 } else if (EltNo
!= ByteSource
/4) {
8988 isFourElementShuffle
= false;
8992 PFIndexes
[i
] = EltNo
;
8995 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
8996 // perfect shuffle vector to determine if it is cost effective to do this as
8997 // discrete instructions, or whether we should use a vperm.
8998 // For now, we skip this for little endian until such time as we have a
8999 // little-endian perfect shuffle table.
9000 if (isFourElementShuffle
&& !isLittleEndian
) {
9001 // Compute the index in the perfect shuffle table.
9002 unsigned PFTableIndex
=
9003 PFIndexes
[0]*9*9*9+PFIndexes
[1]*9*9+PFIndexes
[2]*9+PFIndexes
[3];
9005 unsigned PFEntry
= PerfectShuffleTable
[PFTableIndex
];
9006 unsigned Cost
= (PFEntry
>> 30);
9008 // Determining when to avoid vperm is tricky. Many things affect the cost
9009 // of vperm, particularly how many times the perm mask needs to be computed.
9010 // For example, if the perm mask can be hoisted out of a loop or is already
9011 // used (perhaps because there are multiple permutes with the same shuffle
9012 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
9013 // the loop requires an extra register.
9015 // As a compromise, we only emit discrete instructions if the shuffle can be
9016 // generated in 3 or fewer operations. When we have loop information
9017 // available, if this block is within a loop, we should avoid using vperm
9018 // for 3-operation perms and use a constant pool load instead.
9020 return GeneratePerfectShuffle(PFEntry
, V1
, V2
, DAG
, dl
);
9023 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
9024 // vector that will get spilled to the constant pool.
9025 if (V2
.isUndef()) V2
= V1
;
9027 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
9028 // that it is in input element units, not in bytes. Convert now.
9030 // For little endian, the order of the input vectors is reversed, and
9031 // the permutation mask is complemented with respect to 31. This is
9032 // necessary to produce proper semantics with the big-endian-biased vperm
9034 EVT EltVT
= V1
.getValueType().getVectorElementType();
9035 unsigned BytesPerElement
= EltVT
.getSizeInBits()/8;
9037 SmallVector
<SDValue
, 16> ResultMask
;
9038 for (unsigned i
= 0, e
= VT
.getVectorNumElements(); i
!= e
; ++i
) {
9039 unsigned SrcElt
= PermMask
[i
] < 0 ? 0 : PermMask
[i
];
9041 for (unsigned j
= 0; j
!= BytesPerElement
; ++j
)
9043 ResultMask
.push_back(DAG
.getConstant(31 - (SrcElt
*BytesPerElement
+ j
),
9046 ResultMask
.push_back(DAG
.getConstant(SrcElt
*BytesPerElement
+ j
, dl
,
9050 SDValue VPermMask
= DAG
.getBuildVector(MVT::v16i8
, dl
, ResultMask
);
9052 return DAG
.getNode(PPCISD::VPERM
, dl
, V1
.getValueType(),
9055 return DAG
.getNode(PPCISD::VPERM
, dl
, V1
.getValueType(),
9059 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
9060 /// vector comparison. If it is, return true and fill in Opc/isDot with
9061 /// information about the intrinsic.
9062 static bool getVectorCompareInfo(SDValue Intrin
, int &CompareOpc
,
9063 bool &isDot
, const PPCSubtarget
&Subtarget
) {
9064 unsigned IntrinsicID
=
9065 cast
<ConstantSDNode
>(Intrin
.getOperand(0))->getZExtValue();
9068 switch (IntrinsicID
) {
9071 // Comparison predicates.
9072 case Intrinsic::ppc_altivec_vcmpbfp_p
:
9076 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
9080 case Intrinsic::ppc_altivec_vcmpequb_p
:
9084 case Intrinsic::ppc_altivec_vcmpequh_p
:
9088 case Intrinsic::ppc_altivec_vcmpequw_p
:
9092 case Intrinsic::ppc_altivec_vcmpequd_p
:
9093 if (Subtarget
.hasP8Altivec()) {
9099 case Intrinsic::ppc_altivec_vcmpneb_p
:
9100 case Intrinsic::ppc_altivec_vcmpneh_p
:
9101 case Intrinsic::ppc_altivec_vcmpnew_p
:
9102 case Intrinsic::ppc_altivec_vcmpnezb_p
:
9103 case Intrinsic::ppc_altivec_vcmpnezh_p
:
9104 case Intrinsic::ppc_altivec_vcmpnezw_p
:
9105 if (Subtarget
.hasP9Altivec()) {
9106 switch (IntrinsicID
) {
9108 llvm_unreachable("Unknown comparison intrinsic.");
9109 case Intrinsic::ppc_altivec_vcmpneb_p
:
9112 case Intrinsic::ppc_altivec_vcmpneh_p
:
9115 case Intrinsic::ppc_altivec_vcmpnew_p
:
9118 case Intrinsic::ppc_altivec_vcmpnezb_p
:
9121 case Intrinsic::ppc_altivec_vcmpnezh_p
:
9124 case Intrinsic::ppc_altivec_vcmpnezw_p
:
9132 case Intrinsic::ppc_altivec_vcmpgefp_p
:
9136 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
9140 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
9144 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
9148 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
9152 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
9153 if (Subtarget
.hasP8Altivec()) {
9159 case Intrinsic::ppc_altivec_vcmpgtub_p
:
9163 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
9167 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
9171 case Intrinsic::ppc_altivec_vcmpgtud_p
:
9172 if (Subtarget
.hasP8Altivec()) {
9179 // VSX predicate comparisons use the same infrastructure
9180 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
9181 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
9182 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
9183 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
9184 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
9185 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
9186 if (Subtarget
.hasVSX()) {
9187 switch (IntrinsicID
) {
9188 case Intrinsic::ppc_vsx_xvcmpeqdp_p
:
9191 case Intrinsic::ppc_vsx_xvcmpgedp_p
:
9194 case Intrinsic::ppc_vsx_xvcmpgtdp_p
:
9197 case Intrinsic::ppc_vsx_xvcmpeqsp_p
:
9200 case Intrinsic::ppc_vsx_xvcmpgesp_p
:
9203 case Intrinsic::ppc_vsx_xvcmpgtsp_p
:
9212 // Normal Comparisons.
9213 case Intrinsic::ppc_altivec_vcmpbfp
:
9216 case Intrinsic::ppc_altivec_vcmpeqfp
:
9219 case Intrinsic::ppc_altivec_vcmpequb
:
9222 case Intrinsic::ppc_altivec_vcmpequh
:
9225 case Intrinsic::ppc_altivec_vcmpequw
:
9228 case Intrinsic::ppc_altivec_vcmpequd
:
9229 if (Subtarget
.hasP8Altivec())
9234 case Intrinsic::ppc_altivec_vcmpneb
:
9235 case Intrinsic::ppc_altivec_vcmpneh
:
9236 case Intrinsic::ppc_altivec_vcmpnew
:
9237 case Intrinsic::ppc_altivec_vcmpnezb
:
9238 case Intrinsic::ppc_altivec_vcmpnezh
:
9239 case Intrinsic::ppc_altivec_vcmpnezw
:
9240 if (Subtarget
.hasP9Altivec())
9241 switch (IntrinsicID
) {
9243 llvm_unreachable("Unknown comparison intrinsic.");
9244 case Intrinsic::ppc_altivec_vcmpneb
:
9247 case Intrinsic::ppc_altivec_vcmpneh
:
9250 case Intrinsic::ppc_altivec_vcmpnew
:
9253 case Intrinsic::ppc_altivec_vcmpnezb
:
9256 case Intrinsic::ppc_altivec_vcmpnezh
:
9259 case Intrinsic::ppc_altivec_vcmpnezw
:
9266 case Intrinsic::ppc_altivec_vcmpgefp
:
9269 case Intrinsic::ppc_altivec_vcmpgtfp
:
9272 case Intrinsic::ppc_altivec_vcmpgtsb
:
9275 case Intrinsic::ppc_altivec_vcmpgtsh
:
9278 case Intrinsic::ppc_altivec_vcmpgtsw
:
9281 case Intrinsic::ppc_altivec_vcmpgtsd
:
9282 if (Subtarget
.hasP8Altivec())
9287 case Intrinsic::ppc_altivec_vcmpgtub
:
9290 case Intrinsic::ppc_altivec_vcmpgtuh
:
9293 case Intrinsic::ppc_altivec_vcmpgtuw
:
9296 case Intrinsic::ppc_altivec_vcmpgtud
:
9297 if (Subtarget
.hasP8Altivec())
9306 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
9307 /// lower, do it, otherwise return null.
9308 SDValue
PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op
,
9309 SelectionDAG
&DAG
) const {
9310 unsigned IntrinsicID
=
9311 cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
9315 if (IntrinsicID
== Intrinsic::thread_pointer
) {
9316 // Reads the thread pointer register, used for __builtin_thread_pointer.
9317 if (Subtarget
.isPPC64())
9318 return DAG
.getRegister(PPC::X13
, MVT::i64
);
9319 return DAG
.getRegister(PPC::R2
, MVT::i32
);
9322 // If this is a lowered altivec predicate compare, CompareOpc is set to the
9323 // opcode number of the comparison.
9326 if (!getVectorCompareInfo(Op
, CompareOpc
, isDot
, Subtarget
))
9327 return SDValue(); // Don't custom lower most intrinsics.
9329 // If this is a non-dot comparison, make the VCMP node and we are done.
9331 SDValue Tmp
= DAG
.getNode(PPCISD::VCMP
, dl
, Op
.getOperand(2).getValueType(),
9332 Op
.getOperand(1), Op
.getOperand(2),
9333 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
));
9334 return DAG
.getNode(ISD::BITCAST
, dl
, Op
.getValueType(), Tmp
);
9337 // Create the PPCISD altivec 'dot' comparison node.
9339 Op
.getOperand(2), // LHS
9340 Op
.getOperand(3), // RHS
9341 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
9343 EVT VTs
[] = { Op
.getOperand(2).getValueType(), MVT::Glue
};
9344 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
);
9346 // Now that we have the comparison, emit a copy from the CR to a GPR.
9347 // This is flagged to the above dot comparison.
9348 SDValue Flags
= DAG
.getNode(PPCISD::MFOCRF
, dl
, MVT::i32
,
9349 DAG
.getRegister(PPC::CR6
, MVT::i32
),
9350 CompNode
.getValue(1));
9352 // Unpack the result based on how the target uses it.
9353 unsigned BitNo
; // Bit # of CR6.
9354 bool InvertBit
; // Invert result?
9355 switch (cast
<ConstantSDNode
>(Op
.getOperand(1))->getZExtValue()) {
9356 default: // Can't happen, don't crash on invalid number though.
9357 case 0: // Return the value of the EQ bit of CR6.
9358 BitNo
= 0; InvertBit
= false;
9360 case 1: // Return the inverted value of the EQ bit of CR6.
9361 BitNo
= 0; InvertBit
= true;
9363 case 2: // Return the value of the LT bit of CR6.
9364 BitNo
= 2; InvertBit
= false;
9366 case 3: // Return the inverted value of the LT bit of CR6.
9367 BitNo
= 2; InvertBit
= true;
9371 // Shift the bit into the low position.
9372 Flags
= DAG
.getNode(ISD::SRL
, dl
, MVT::i32
, Flags
,
9373 DAG
.getConstant(8 - (3 - BitNo
), dl
, MVT::i32
));
9375 Flags
= DAG
.getNode(ISD::AND
, dl
, MVT::i32
, Flags
,
9376 DAG
.getConstant(1, dl
, MVT::i32
));
9378 // If we are supposed to, toggle the bit.
9380 Flags
= DAG
.getNode(ISD::XOR
, dl
, MVT::i32
, Flags
,
9381 DAG
.getConstant(1, dl
, MVT::i32
));
9385 SDValue
PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op
,
9386 SelectionDAG
&DAG
) const {
9387 // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
9388 // the beginning of the argument list.
9389 int ArgStart
= isa
<ConstantSDNode
>(Op
.getOperand(0)) ? 0 : 1;
9391 switch (cast
<ConstantSDNode
>(Op
.getOperand(ArgStart
))->getZExtValue()) {
9392 case Intrinsic::ppc_cfence
: {
9393 assert(ArgStart
== 1 && "llvm.ppc.cfence must carry a chain argument.");
9394 assert(Subtarget
.isPPC64() && "Only 64-bit is supported for now.");
9395 return SDValue(DAG
.getMachineNode(PPC::CFENCE8
, DL
, MVT::Other
,
9396 DAG
.getNode(ISD::ANY_EXTEND
, DL
, MVT::i64
,
9397 Op
.getOperand(ArgStart
+ 1)),
9407 SDValue
PPCTargetLowering::LowerREM(SDValue Op
, SelectionDAG
&DAG
) const {
9408 // Check for a DIV with the same operands as this REM.
9409 for (auto UI
: Op
.getOperand(1)->uses()) {
9410 if ((Op
.getOpcode() == ISD::SREM
&& UI
->getOpcode() == ISD::SDIV
) ||
9411 (Op
.getOpcode() == ISD::UREM
&& UI
->getOpcode() == ISD::UDIV
))
9412 if (UI
->getOperand(0) == Op
.getOperand(0) &&
9413 UI
->getOperand(1) == Op
.getOperand(1))
9419 // Lower scalar BSWAP64 to xxbrd.
9420 SDValue
PPCTargetLowering::LowerBSWAP(SDValue Op
, SelectionDAG
&DAG
) const {
9423 Op
= DAG
.getNode(ISD::BUILD_VECTOR
, dl
, MVT::v2i64
, Op
.getOperand(0),
9426 Op
= DAG
.getNode(PPCISD::XXREVERSE
, dl
, MVT::v2i64
, Op
);
9428 int VectorIndex
= 0;
9429 if (Subtarget
.isLittleEndian())
9431 Op
= DAG
.getNode(ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Op
,
9432 DAG
.getTargetConstant(VectorIndex
, dl
, MVT::i32
));
9436 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
9437 // compared to a value that is atomically loaded (atomic loads zero-extend).
9438 SDValue
PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op
,
9439 SelectionDAG
&DAG
) const {
9440 assert(Op
.getOpcode() == ISD::ATOMIC_CMP_SWAP
&&
9441 "Expecting an atomic compare-and-swap here.");
9443 auto *AtomicNode
= cast
<AtomicSDNode
>(Op
.getNode());
9444 EVT MemVT
= AtomicNode
->getMemoryVT();
9445 if (MemVT
.getSizeInBits() >= 32)
9448 SDValue CmpOp
= Op
.getOperand(2);
9449 // If this is already correctly zero-extended, leave it alone.
9450 auto HighBits
= APInt::getHighBitsSet(32, 32 - MemVT
.getSizeInBits());
9451 if (DAG
.MaskedValueIsZero(CmpOp
, HighBits
))
9454 // Clear the high bits of the compare operand.
9455 unsigned MaskVal
= (1 << MemVT
.getSizeInBits()) - 1;
9457 DAG
.getNode(ISD::AND
, dl
, MVT::i32
, CmpOp
,
9458 DAG
.getConstant(MaskVal
, dl
, MVT::i32
));
9460 // Replace the existing compare operand with the properly zero-extended one.
9461 SmallVector
<SDValue
, 4> Ops
;
9462 for (int i
= 0, e
= AtomicNode
->getNumOperands(); i
< e
; i
++)
9463 Ops
.push_back(AtomicNode
->getOperand(i
));
9465 MachineMemOperand
*MMO
= AtomicNode
->getMemOperand();
9466 SDVTList Tys
= DAG
.getVTList(MVT::i32
, MVT::Other
);
9468 (MemVT
== MVT::i8
) ? PPCISD::ATOMIC_CMP_SWAP_8
: PPCISD::ATOMIC_CMP_SWAP_16
;
9469 return DAG
.getMemIntrinsicNode(NodeTy
, dl
, Tys
, Ops
, MemVT
, MMO
);
9472 SDValue
PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op
,
9473 SelectionDAG
&DAG
) const {
9475 // Create a stack slot that is 16-byte aligned.
9476 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9477 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9478 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9479 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9481 // Store the input value into Value#0 of the stack slot.
9482 SDValue Store
= DAG
.getStore(DAG
.getEntryNode(), dl
, Op
.getOperand(0), FIdx
,
9483 MachinePointerInfo());
9485 return DAG
.getLoad(Op
.getValueType(), dl
, Store
, FIdx
, MachinePointerInfo());
9488 SDValue
PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op
,
9489 SelectionDAG
&DAG
) const {
9490 assert(Op
.getOpcode() == ISD::INSERT_VECTOR_ELT
&&
9491 "Should only be called for ISD::INSERT_VECTOR_ELT");
9493 ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(Op
.getOperand(2));
9494 // We have legal lowering for constant indices but not for variable ones.
9498 EVT VT
= Op
.getValueType();
9500 SDValue V1
= Op
.getOperand(0);
9501 SDValue V2
= Op
.getOperand(1);
9502 // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
9503 if (VT
== MVT::v8i16
|| VT
== MVT::v16i8
) {
9504 SDValue Mtvsrz
= DAG
.getNode(PPCISD::MTVSRZ
, dl
, VT
, V2
);
9505 unsigned BytesInEachElement
= VT
.getVectorElementType().getSizeInBits() / 8;
9506 unsigned InsertAtElement
= C
->getZExtValue();
9507 unsigned InsertAtByte
= InsertAtElement
* BytesInEachElement
;
9508 if (Subtarget
.isLittleEndian()) {
9509 InsertAtByte
= (16 - BytesInEachElement
) - InsertAtByte
;
9511 return DAG
.getNode(PPCISD::VECINSERT
, dl
, VT
, V1
, Mtvsrz
,
9512 DAG
.getConstant(InsertAtByte
, dl
, MVT::i32
));
9517 SDValue
PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op
,
9518 SelectionDAG
&DAG
) const {
9520 SDNode
*N
= Op
.getNode();
9522 assert(N
->getOperand(0).getValueType() == MVT::v4i1
&&
9523 "Unknown extract_vector_elt type");
9525 SDValue Value
= N
->getOperand(0);
9527 // The first part of this is like the store lowering except that we don't
9528 // need to track the chain.
9530 // The values are now known to be -1 (false) or 1 (true). To convert this
9531 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9532 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9533 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
9535 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9536 // understand how to form the extending load.
9537 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
9539 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
9541 // Now convert to an integer and store.
9542 Value
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
9543 DAG
.getConstant(Intrinsic::ppc_qpx_qvfctiwu
, dl
, MVT::i32
),
9546 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9547 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9548 MachinePointerInfo PtrInfo
=
9549 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
9550 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9551 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9553 SDValue StoreChain
= DAG
.getEntryNode();
9554 SDValue Ops
[] = {StoreChain
,
9555 DAG
.getConstant(Intrinsic::ppc_qpx_qvstfiw
, dl
, MVT::i32
),
9557 SDVTList VTs
= DAG
.getVTList(/*chain*/ MVT::Other
);
9559 StoreChain
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
,
9560 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
9562 // Extract the value requested.
9563 unsigned Offset
= 4*cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue();
9564 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
9565 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
9568 DAG
.getLoad(MVT::i32
, dl
, StoreChain
, Idx
, PtrInfo
.getWithOffset(Offset
));
9570 if (!Subtarget
.useCRBits())
9573 return DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, IntVal
);
9576 /// Lowering for QPX v4i1 loads
9577 SDValue
PPCTargetLowering::LowerVectorLoad(SDValue Op
,
9578 SelectionDAG
&DAG
) const {
9580 LoadSDNode
*LN
= cast
<LoadSDNode
>(Op
.getNode());
9581 SDValue LoadChain
= LN
->getChain();
9582 SDValue BasePtr
= LN
->getBasePtr();
9584 if (Op
.getValueType() == MVT::v4f64
||
9585 Op
.getValueType() == MVT::v4f32
) {
9586 EVT MemVT
= LN
->getMemoryVT();
9587 unsigned Alignment
= LN
->getAlignment();
9589 // If this load is properly aligned, then it is legal.
9590 if (Alignment
>= MemVT
.getStoreSize())
9593 EVT ScalarVT
= Op
.getValueType().getScalarType(),
9594 ScalarMemVT
= MemVT
.getScalarType();
9595 unsigned Stride
= ScalarMemVT
.getStoreSize();
9597 SDValue Vals
[4], LoadChains
[4];
9598 for (unsigned Idx
= 0; Idx
< 4; ++Idx
) {
9600 if (ScalarVT
!= ScalarMemVT
)
9601 Load
= DAG
.getExtLoad(LN
->getExtensionType(), dl
, ScalarVT
, LoadChain
,
9603 LN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9604 ScalarMemVT
, MinAlign(Alignment
, Idx
* Stride
),
9605 LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9607 Load
= DAG
.getLoad(ScalarVT
, dl
, LoadChain
, BasePtr
,
9608 LN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9609 MinAlign(Alignment
, Idx
* Stride
),
9610 LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9612 if (Idx
== 0 && LN
->isIndexed()) {
9613 assert(LN
->getAddressingMode() == ISD::PRE_INC
&&
9614 "Unknown addressing mode on vector load");
9615 Load
= DAG
.getIndexedLoad(Load
, dl
, BasePtr
, LN
->getOffset(),
9616 LN
->getAddressingMode());
9620 LoadChains
[Idx
] = Load
.getValue(1);
9622 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
9623 DAG
.getConstant(Stride
, dl
,
9624 BasePtr
.getValueType()));
9627 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, LoadChains
);
9628 SDValue Value
= DAG
.getBuildVector(Op
.getValueType(), dl
, Vals
);
9630 if (LN
->isIndexed()) {
9631 SDValue RetOps
[] = { Value
, Vals
[0].getValue(1), TF
};
9632 return DAG
.getMergeValues(RetOps
, dl
);
9635 SDValue RetOps
[] = { Value
, TF
};
9636 return DAG
.getMergeValues(RetOps
, dl
);
9639 assert(Op
.getValueType() == MVT::v4i1
&& "Unknown load to lower");
9640 assert(LN
->isUnindexed() && "Indexed v4i1 loads are not supported");
9642 // To lower v4i1 from a byte array, we load the byte elements of the
9643 // vector and then reuse the BUILD_VECTOR logic.
9645 SDValue VectElmts
[4], VectElmtChains
[4];
9646 for (unsigned i
= 0; i
< 4; ++i
) {
9647 SDValue Idx
= DAG
.getConstant(i
, dl
, BasePtr
.getValueType());
9648 Idx
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
, Idx
);
9650 VectElmts
[i
] = DAG
.getExtLoad(
9651 ISD::EXTLOAD
, dl
, MVT::i32
, LoadChain
, Idx
,
9652 LN
->getPointerInfo().getWithOffset(i
), MVT::i8
,
9653 /* Alignment = */ 1, LN
->getMemOperand()->getFlags(), LN
->getAAInfo());
9654 VectElmtChains
[i
] = VectElmts
[i
].getValue(1);
9657 LoadChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, VectElmtChains
);
9658 SDValue Value
= DAG
.getBuildVector(MVT::v4i1
, dl
, VectElmts
);
9660 SDValue RVals
[] = { Value
, LoadChain
};
9661 return DAG
.getMergeValues(RVals
, dl
);
9664 /// Lowering for QPX v4i1 stores
9665 SDValue
PPCTargetLowering::LowerVectorStore(SDValue Op
,
9666 SelectionDAG
&DAG
) const {
9668 StoreSDNode
*SN
= cast
<StoreSDNode
>(Op
.getNode());
9669 SDValue StoreChain
= SN
->getChain();
9670 SDValue BasePtr
= SN
->getBasePtr();
9671 SDValue Value
= SN
->getValue();
9673 if (Value
.getValueType() == MVT::v4f64
||
9674 Value
.getValueType() == MVT::v4f32
) {
9675 EVT MemVT
= SN
->getMemoryVT();
9676 unsigned Alignment
= SN
->getAlignment();
9678 // If this store is properly aligned, then it is legal.
9679 if (Alignment
>= MemVT
.getStoreSize())
9682 EVT ScalarVT
= Value
.getValueType().getScalarType(),
9683 ScalarMemVT
= MemVT
.getScalarType();
9684 unsigned Stride
= ScalarMemVT
.getStoreSize();
9687 for (unsigned Idx
= 0; Idx
< 4; ++Idx
) {
9688 SDValue Ex
= DAG
.getNode(
9689 ISD::EXTRACT_VECTOR_ELT
, dl
, ScalarVT
, Value
,
9690 DAG
.getConstant(Idx
, dl
, getVectorIdxTy(DAG
.getDataLayout())));
9692 if (ScalarVT
!= ScalarMemVT
)
9694 DAG
.getTruncStore(StoreChain
, dl
, Ex
, BasePtr
,
9695 SN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9696 ScalarMemVT
, MinAlign(Alignment
, Idx
* Stride
),
9697 SN
->getMemOperand()->getFlags(), SN
->getAAInfo());
9699 Store
= DAG
.getStore(StoreChain
, dl
, Ex
, BasePtr
,
9700 SN
->getPointerInfo().getWithOffset(Idx
* Stride
),
9701 MinAlign(Alignment
, Idx
* Stride
),
9702 SN
->getMemOperand()->getFlags(), SN
->getAAInfo());
9704 if (Idx
== 0 && SN
->isIndexed()) {
9705 assert(SN
->getAddressingMode() == ISD::PRE_INC
&&
9706 "Unknown addressing mode on vector store");
9707 Store
= DAG
.getIndexedStore(Store
, dl
, BasePtr
, SN
->getOffset(),
9708 SN
->getAddressingMode());
9711 BasePtr
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
9712 DAG
.getConstant(Stride
, dl
,
9713 BasePtr
.getValueType()));
9714 Stores
[Idx
] = Store
;
9717 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
9719 if (SN
->isIndexed()) {
9720 SDValue RetOps
[] = { TF
, Stores
[0].getValue(1) };
9721 return DAG
.getMergeValues(RetOps
, dl
);
9727 assert(SN
->isUnindexed() && "Indexed v4i1 stores are not supported");
9728 assert(Value
.getValueType() == MVT::v4i1
&& "Unknown store to lower");
9730 // The values are now known to be -1 (false) or 1 (true). To convert this
9731 // into 0 (false) and 1 (true), add 1 and then divide by 2 (multiply by 0.5).
9732 // This can be done with an fma and the 0.5 constant: (V+1.0)*0.5 = 0.5*V+0.5
9733 Value
= DAG
.getNode(PPCISD::QBFLT
, dl
, MVT::v4f64
, Value
);
9735 // FIXME: We can make this an f32 vector, but the BUILD_VECTOR code needs to
9736 // understand how to form the extending load.
9737 SDValue FPHalfs
= DAG
.getConstantFP(0.5, dl
, MVT::v4f64
);
9739 Value
= DAG
.getNode(ISD::FMA
, dl
, MVT::v4f64
, Value
, FPHalfs
, FPHalfs
);
9741 // Now convert to an integer and store.
9742 Value
= DAG
.getNode(ISD::INTRINSIC_WO_CHAIN
, dl
, MVT::v4f64
,
9743 DAG
.getConstant(Intrinsic::ppc_qpx_qvfctiwu
, dl
, MVT::i32
),
9746 MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
9747 int FrameIdx
= MFI
.CreateStackObject(16, 16, false);
9748 MachinePointerInfo PtrInfo
=
9749 MachinePointerInfo::getFixedStack(DAG
.getMachineFunction(), FrameIdx
);
9750 EVT PtrVT
= getPointerTy(DAG
.getDataLayout());
9751 SDValue FIdx
= DAG
.getFrameIndex(FrameIdx
, PtrVT
);
9753 SDValue Ops
[] = {StoreChain
,
9754 DAG
.getConstant(Intrinsic::ppc_qpx_qvstfiw
, dl
, MVT::i32
),
9756 SDVTList VTs
= DAG
.getVTList(/*chain*/ MVT::Other
);
9758 StoreChain
= DAG
.getMemIntrinsicNode(ISD::INTRINSIC_VOID
,
9759 dl
, VTs
, Ops
, MVT::v4i32
, PtrInfo
);
9761 // Move data into the byte array.
9762 SDValue Loads
[4], LoadChains
[4];
9763 for (unsigned i
= 0; i
< 4; ++i
) {
9764 unsigned Offset
= 4*i
;
9765 SDValue Idx
= DAG
.getConstant(Offset
, dl
, FIdx
.getValueType());
9766 Idx
= DAG
.getNode(ISD::ADD
, dl
, FIdx
.getValueType(), FIdx
, Idx
);
9768 Loads
[i
] = DAG
.getLoad(MVT::i32
, dl
, StoreChain
, Idx
,
9769 PtrInfo
.getWithOffset(Offset
));
9770 LoadChains
[i
] = Loads
[i
].getValue(1);
9773 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, LoadChains
);
9776 for (unsigned i
= 0; i
< 4; ++i
) {
9777 SDValue Idx
= DAG
.getConstant(i
, dl
, BasePtr
.getValueType());
9778 Idx
= DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
, Idx
);
9780 Stores
[i
] = DAG
.getTruncStore(
9781 StoreChain
, dl
, Loads
[i
], Idx
, SN
->getPointerInfo().getWithOffset(i
),
9782 MVT::i8
, /* Alignment = */ 1, SN
->getMemOperand()->getFlags(),
9786 StoreChain
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
, Stores
);
9791 SDValue
PPCTargetLowering::LowerMUL(SDValue Op
, SelectionDAG
&DAG
) const {
9793 if (Op
.getValueType() == MVT::v4i32
) {
9794 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9796 SDValue Zero
= BuildSplatI( 0, 1, MVT::v4i32
, DAG
, dl
);
9797 SDValue Neg16
= BuildSplatI(-16, 4, MVT::v4i32
, DAG
, dl
);//+16 as shift amt.
9799 SDValue RHSSwap
= // = vrlw RHS, 16
9800 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw
, RHS
, Neg16
, DAG
, dl
);
9802 // Shrinkify inputs to v8i16.
9803 LHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, LHS
);
9804 RHS
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHS
);
9805 RHSSwap
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v8i16
, RHSSwap
);
9807 // Low parts multiplied together, generating 32-bit results (we ignore the
9809 SDValue LoProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh
,
9810 LHS
, RHS
, DAG
, dl
, MVT::v4i32
);
9812 SDValue HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm
,
9813 LHS
, RHSSwap
, Zero
, DAG
, dl
, MVT::v4i32
);
9814 // Shift the high parts up 16 bits.
9815 HiProd
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw
, HiProd
,
9817 return DAG
.getNode(ISD::ADD
, dl
, MVT::v4i32
, LoProd
, HiProd
);
9818 } else if (Op
.getValueType() == MVT::v8i16
) {
9819 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9821 SDValue Zero
= BuildSplatI(0, 1, MVT::v8i16
, DAG
, dl
);
9823 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm
,
9824 LHS
, RHS
, Zero
, DAG
, dl
);
9825 } else if (Op
.getValueType() == MVT::v16i8
) {
9826 SDValue LHS
= Op
.getOperand(0), RHS
= Op
.getOperand(1);
9827 bool isLittleEndian
= Subtarget
.isLittleEndian();
9829 // Multiply the even 8-bit parts, producing 16-bit sums.
9830 SDValue EvenParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub
,
9831 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
9832 EvenParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, EvenParts
);
9834 // Multiply the odd 8-bit parts, producing 16-bit sums.
9835 SDValue OddParts
= BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub
,
9836 LHS
, RHS
, DAG
, dl
, MVT::v8i16
);
9837 OddParts
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v16i8
, OddParts
);
9839 // Merge the results together. Because vmuleub and vmuloub are
9840 // instructions with a big-endian bias, we must reverse the
9841 // element numbering and reverse the meaning of "odd" and "even"
9842 // when generating little endian code.
9844 for (unsigned i
= 0; i
!= 8; ++i
) {
9845 if (isLittleEndian
) {
9847 Ops
[i
*2+1] = 2*i
+16;
9850 Ops
[i
*2+1] = 2*i
+1+16;
9854 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, OddParts
, EvenParts
, Ops
);
9856 return DAG
.getVectorShuffle(MVT::v16i8
, dl
, EvenParts
, OddParts
, Ops
);
9858 llvm_unreachable("Unknown mul to lower!");
9862 SDValue
PPCTargetLowering::LowerABS(SDValue Op
, SelectionDAG
&DAG
) const {
9864 assert(Op
.getOpcode() == ISD::ABS
&& "Should only be called for ISD::ABS");
9866 EVT VT
= Op
.getValueType();
9867 assert(VT
.isVector() &&
9868 "Only set vector abs as custom, scalar abs shouldn't reach here!");
9869 assert((VT
== MVT::v2i64
|| VT
== MVT::v4i32
|| VT
== MVT::v8i16
||
9870 VT
== MVT::v16i8
) &&
9871 "Unexpected vector element type!");
9872 assert((VT
!= MVT::v2i64
|| Subtarget
.hasP8Altivec()) &&
9873 "Current subtarget doesn't support smax v2i64!");
9875 // For vector abs, it can be lowered to:
9882 SDValue X
= Op
.getOperand(0);
9883 SDValue Zero
= DAG
.getConstant(0, dl
, VT
);
9884 SDValue Y
= DAG
.getNode(ISD::SUB
, dl
, VT
, Zero
, X
);
9886 // SMAX patch https://reviews.llvm.org/D47332
9887 // hasn't landed yet, so use intrinsic first here.
9888 // TODO: Should use SMAX directly once SMAX patch landed
9889 Intrinsic::ID BifID
= Intrinsic::ppc_altivec_vmaxsw
;
9890 if (VT
== MVT::v2i64
)
9891 BifID
= Intrinsic::ppc_altivec_vmaxsd
;
9892 else if (VT
== MVT::v8i16
)
9893 BifID
= Intrinsic::ppc_altivec_vmaxsh
;
9894 else if (VT
== MVT::v16i8
)
9895 BifID
= Intrinsic::ppc_altivec_vmaxsb
;
9897 return BuildIntrinsicOp(BifID
, X
, Y
, DAG
, dl
, VT
);
9900 // Custom lowering for fpext vf32 to v2f64
9901 SDValue
PPCTargetLowering::LowerFP_EXTEND(SDValue Op
, SelectionDAG
&DAG
) const {
9903 assert(Op
.getOpcode() == ISD::FP_EXTEND
&&
9904 "Should only be called for ISD::FP_EXTEND");
9906 // We only want to custom lower an extend from v2f32 to v2f64.
9907 if (Op
.getValueType() != MVT::v2f64
||
9908 Op
.getOperand(0).getValueType() != MVT::v2f32
)
9912 SDValue Op0
= Op
.getOperand(0);
9914 switch (Op0
.getOpcode()) {
9921 for (unsigned i
= 0, ie
= Op0
.getNumOperands(); i
!= ie
; ++i
) {
9922 // Ensure both input are loads.
9923 SDValue LdOp
= Op0
.getOperand(i
);
9924 if (LdOp
.getOpcode() != ISD::LOAD
)
9926 // Generate new load node.
9927 LoadSDNode
*LD
= cast
<LoadSDNode
>(LdOp
);
9928 SDValue LoadOps
[] = { LD
->getChain(), LD
->getBasePtr() };
9930 DAG
.getMemIntrinsicNode(PPCISD::LD_VSX_LH
, dl
,
9931 DAG
.getVTList(MVT::v4f32
, MVT::Other
),
9932 LoadOps
, LD
->getMemoryVT(),
9933 LD
->getMemOperand());
9935 SDValue NewOp
= DAG
.getNode(Op0
.getOpcode(), SDLoc(Op0
), MVT::v4f32
,
9936 NewLoad
[0], NewLoad
[1],
9937 Op0
.getNode()->getFlags());
9938 return DAG
.getNode(PPCISD::FP_EXTEND_LH
, dl
, MVT::v2f64
, NewOp
);
9941 LoadSDNode
*LD
= cast
<LoadSDNode
>(Op0
);
9942 SDValue LoadOps
[] = { LD
->getChain(), LD
->getBasePtr() };
9944 DAG
.getMemIntrinsicNode(PPCISD::LD_VSX_LH
, dl
,
9945 DAG
.getVTList(MVT::v4f32
, MVT::Other
),
9946 LoadOps
, LD
->getMemoryVT(), LD
->getMemOperand());
9947 return DAG
.getNode(PPCISD::FP_EXTEND_LH
, dl
, MVT::v2f64
, NewLd
);
9950 llvm_unreachable("ERROR:Should return for all cases within swtich.");
9953 /// LowerOperation - Provide custom lowering hooks for some operations.
9955 SDValue
PPCTargetLowering::LowerOperation(SDValue Op
, SelectionDAG
&DAG
) const {
9956 switch (Op
.getOpcode()) {
9957 default: llvm_unreachable("Wasn't expecting to be able to lower this!");
9958 case ISD::ConstantPool
: return LowerConstantPool(Op
, DAG
);
9959 case ISD::BlockAddress
: return LowerBlockAddress(Op
, DAG
);
9960 case ISD::GlobalAddress
: return LowerGlobalAddress(Op
, DAG
);
9961 case ISD::GlobalTLSAddress
: return LowerGlobalTLSAddress(Op
, DAG
);
9962 case ISD::JumpTable
: return LowerJumpTable(Op
, DAG
);
9963 case ISD::SETCC
: return LowerSETCC(Op
, DAG
);
9964 case ISD::INIT_TRAMPOLINE
: return LowerINIT_TRAMPOLINE(Op
, DAG
);
9965 case ISD::ADJUST_TRAMPOLINE
: return LowerADJUST_TRAMPOLINE(Op
, DAG
);
9967 // Variable argument lowering.
9968 case ISD::VASTART
: return LowerVASTART(Op
, DAG
);
9969 case ISD::VAARG
: return LowerVAARG(Op
, DAG
);
9970 case ISD::VACOPY
: return LowerVACOPY(Op
, DAG
);
9972 case ISD::STACKRESTORE
: return LowerSTACKRESTORE(Op
, DAG
);
9973 case ISD::DYNAMIC_STACKALLOC
: return LowerDYNAMIC_STACKALLOC(Op
, DAG
);
9974 case ISD::GET_DYNAMIC_AREA_OFFSET
:
9975 return LowerGET_DYNAMIC_AREA_OFFSET(Op
, DAG
);
9977 // Exception handling lowering.
9978 case ISD::EH_DWARF_CFA
: return LowerEH_DWARF_CFA(Op
, DAG
);
9979 case ISD::EH_SJLJ_SETJMP
: return lowerEH_SJLJ_SETJMP(Op
, DAG
);
9980 case ISD::EH_SJLJ_LONGJMP
: return lowerEH_SJLJ_LONGJMP(Op
, DAG
);
9982 case ISD::LOAD
: return LowerLOAD(Op
, DAG
);
9983 case ISD::STORE
: return LowerSTORE(Op
, DAG
);
9984 case ISD::TRUNCATE
: return LowerTRUNCATE(Op
, DAG
);
9985 case ISD::SELECT_CC
: return LowerSELECT_CC(Op
, DAG
);
9986 case ISD::FP_TO_UINT
:
9987 case ISD::FP_TO_SINT
: return LowerFP_TO_INT(Op
, DAG
, SDLoc(Op
));
9988 case ISD::UINT_TO_FP
:
9989 case ISD::SINT_TO_FP
: return LowerINT_TO_FP(Op
, DAG
);
9990 case ISD::FLT_ROUNDS_
: return LowerFLT_ROUNDS_(Op
, DAG
);
9992 // Lower 64-bit shifts.
9993 case ISD::SHL_PARTS
: return LowerSHL_PARTS(Op
, DAG
);
9994 case ISD::SRL_PARTS
: return LowerSRL_PARTS(Op
, DAG
);
9995 case ISD::SRA_PARTS
: return LowerSRA_PARTS(Op
, DAG
);
9997 // Vector-related lowering.
9998 case ISD::BUILD_VECTOR
: return LowerBUILD_VECTOR(Op
, DAG
);
9999 case ISD::VECTOR_SHUFFLE
: return LowerVECTOR_SHUFFLE(Op
, DAG
);
10000 case ISD::INTRINSIC_WO_CHAIN
: return LowerINTRINSIC_WO_CHAIN(Op
, DAG
);
10001 case ISD::SCALAR_TO_VECTOR
: return LowerSCALAR_TO_VECTOR(Op
, DAG
);
10002 case ISD::EXTRACT_VECTOR_ELT
: return LowerEXTRACT_VECTOR_ELT(Op
, DAG
);
10003 case ISD::INSERT_VECTOR_ELT
: return LowerINSERT_VECTOR_ELT(Op
, DAG
);
10004 case ISD::MUL
: return LowerMUL(Op
, DAG
);
10005 case ISD::ABS
: return LowerABS(Op
, DAG
);
10006 case ISD::FP_EXTEND
: return LowerFP_EXTEND(Op
, DAG
);
10008 // For counter-based loop handling.
10009 case ISD::INTRINSIC_W_CHAIN
: return SDValue();
10011 case ISD::BITCAST
: return LowerBITCAST(Op
, DAG
);
10013 // Frame & Return address.
10014 case ISD::RETURNADDR
: return LowerRETURNADDR(Op
, DAG
);
10015 case ISD::FRAMEADDR
: return LowerFRAMEADDR(Op
, DAG
);
10017 case ISD::INTRINSIC_VOID
:
10018 return LowerINTRINSIC_VOID(Op
, DAG
);
10021 return LowerREM(Op
, DAG
);
10023 return LowerBSWAP(Op
, DAG
);
10024 case ISD::ATOMIC_CMP_SWAP
:
10025 return LowerATOMIC_CMP_SWAP(Op
, DAG
);
10029 void PPCTargetLowering::ReplaceNodeResults(SDNode
*N
,
10030 SmallVectorImpl
<SDValue
>&Results
,
10031 SelectionDAG
&DAG
) const {
10033 switch (N
->getOpcode()) {
10035 llvm_unreachable("Do not know how to custom type legalize this operation!");
10036 case ISD::READCYCLECOUNTER
: {
10037 SDVTList VTs
= DAG
.getVTList(MVT::i32
, MVT::i32
, MVT::Other
);
10038 SDValue RTB
= DAG
.getNode(PPCISD::READ_TIME_BASE
, dl
, VTs
, N
->getOperand(0));
10040 Results
.push_back(RTB
);
10041 Results
.push_back(RTB
.getValue(1));
10042 Results
.push_back(RTB
.getValue(2));
10045 case ISD::INTRINSIC_W_CHAIN
: {
10046 if (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue() !=
10047 Intrinsic::loop_decrement
)
10050 assert(N
->getValueType(0) == MVT::i1
&&
10051 "Unexpected result type for CTR decrement intrinsic");
10052 EVT SVT
= getSetCCResultType(DAG
.getDataLayout(), *DAG
.getContext(),
10053 N
->getValueType(0));
10054 SDVTList VTs
= DAG
.getVTList(SVT
, MVT::Other
);
10055 SDValue NewInt
= DAG
.getNode(N
->getOpcode(), dl
, VTs
, N
->getOperand(0),
10058 Results
.push_back(DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, NewInt
));
10059 Results
.push_back(NewInt
.getValue(1));
10063 if (!Subtarget
.isSVR4ABI() || Subtarget
.isPPC64())
10066 EVT VT
= N
->getValueType(0);
10068 if (VT
== MVT::i64
) {
10069 SDValue NewNode
= LowerVAARG(SDValue(N
, 1), DAG
);
10071 Results
.push_back(NewNode
);
10072 Results
.push_back(NewNode
.getValue(1));
10076 case ISD::FP_TO_SINT
:
10077 case ISD::FP_TO_UINT
:
10078 // LowerFP_TO_INT() can only handle f32 and f64.
10079 if (N
->getOperand(0).getValueType() == MVT::ppcf128
)
10081 Results
.push_back(LowerFP_TO_INT(SDValue(N
, 0), DAG
, dl
));
10083 case ISD::TRUNCATE
: {
10084 EVT TrgVT
= N
->getValueType(0);
10085 EVT OpVT
= N
->getOperand(0).getValueType();
10086 if (TrgVT
.isVector() &&
10087 isOperationCustom(N
->getOpcode(), TrgVT
) &&
10088 OpVT
.getSizeInBits() <= 128 &&
10089 isPowerOf2_32(OpVT
.getVectorElementType().getSizeInBits()))
10090 Results
.push_back(LowerTRUNCATEVector(SDValue(N
, 0), DAG
));
10094 // Don't handle bitcast here.
10099 //===----------------------------------------------------------------------===//
10100 // Other Lowering Code
10101 //===----------------------------------------------------------------------===//
10103 static Instruction
* callIntrinsic(IRBuilder
<> &Builder
, Intrinsic::ID Id
) {
10104 Module
*M
= Builder
.GetInsertBlock()->getParent()->getParent();
10105 Function
*Func
= Intrinsic::getDeclaration(M
, Id
);
10106 return Builder
.CreateCall(Func
, {});
10109 // The mappings for emitLeading/TrailingFence is taken from
10110 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
10111 Instruction
*PPCTargetLowering::emitLeadingFence(IRBuilder
<> &Builder
,
10113 AtomicOrdering Ord
) const {
10114 if (Ord
== AtomicOrdering::SequentiallyConsistent
)
10115 return callIntrinsic(Builder
, Intrinsic::ppc_sync
);
10116 if (isReleaseOrStronger(Ord
))
10117 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
10121 Instruction
*PPCTargetLowering::emitTrailingFence(IRBuilder
<> &Builder
,
10123 AtomicOrdering Ord
) const {
10124 if (Inst
->hasAtomicLoad() && isAcquireOrStronger(Ord
)) {
10125 // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
10126 // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
10127 // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
10128 if (isa
<LoadInst
>(Inst
) && Subtarget
.isPPC64())
10129 return Builder
.CreateCall(
10130 Intrinsic::getDeclaration(
10131 Builder
.GetInsertBlock()->getParent()->getParent(),
10132 Intrinsic::ppc_cfence
, {Inst
->getType()}),
10134 // FIXME: Can use isync for rmw operation.
10135 return callIntrinsic(Builder
, Intrinsic::ppc_lwsync
);
10140 MachineBasicBlock
*
10141 PPCTargetLowering::EmitAtomicBinary(MachineInstr
&MI
, MachineBasicBlock
*BB
,
10142 unsigned AtomicSize
,
10143 unsigned BinOpcode
,
10144 unsigned CmpOpcode
,
10145 unsigned CmpPred
) const {
10146 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10147 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10149 auto LoadMnemonic
= PPC::LDARX
;
10150 auto StoreMnemonic
= PPC::STDCX
;
10151 switch (AtomicSize
) {
10153 llvm_unreachable("Unexpected size of atomic entity");
10155 LoadMnemonic
= PPC::LBARX
;
10156 StoreMnemonic
= PPC::STBCX
;
10157 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
10160 LoadMnemonic
= PPC::LHARX
;
10161 StoreMnemonic
= PPC::STHCX
;
10162 assert(Subtarget
.hasPartwordAtomics() && "Call this only with size >=4");
10165 LoadMnemonic
= PPC::LWARX
;
10166 StoreMnemonic
= PPC::STWCX
;
10169 LoadMnemonic
= PPC::LDARX
;
10170 StoreMnemonic
= PPC::STDCX
;
10174 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10175 MachineFunction
*F
= BB
->getParent();
10176 MachineFunction::iterator It
= ++BB
->getIterator();
10178 Register dest
= MI
.getOperand(0).getReg();
10179 Register ptrA
= MI
.getOperand(1).getReg();
10180 Register ptrB
= MI
.getOperand(2).getReg();
10181 Register incr
= MI
.getOperand(3).getReg();
10182 DebugLoc dl
= MI
.getDebugLoc();
10184 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10185 MachineBasicBlock
*loop2MBB
=
10186 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
10187 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10188 F
->insert(It
, loopMBB
);
10190 F
->insert(It
, loop2MBB
);
10191 F
->insert(It
, exitMBB
);
10192 exitMBB
->splice(exitMBB
->begin(), BB
,
10193 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10194 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10196 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10197 Register TmpReg
= (!BinOpcode
) ? incr
:
10198 RegInfo
.createVirtualRegister( AtomicSize
== 8 ? &PPC::G8RCRegClass
10199 : &PPC::GPRCRegClass
);
10203 // fallthrough --> loopMBB
10204 BB
->addSuccessor(loopMBB
);
10207 // l[wd]arx dest, ptr
10208 // add r0, dest, incr
10209 // st[wd]cx. r0, ptr
10211 // fallthrough --> exitMBB
10215 // l[wd]arx dest, ptr
10216 // cmpl?[wd] incr, dest
10219 // st[wd]cx. dest, ptr
10221 // fallthrough --> exitMBB
10224 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
)
10225 .addReg(ptrA
).addReg(ptrB
);
10227 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
).addReg(incr
).addReg(dest
);
10229 // Signed comparisons of byte or halfword values must be sign-extended.
10230 if (CmpOpcode
== PPC::CMPW
&& AtomicSize
< 4) {
10231 Register ExtReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
10232 BuildMI(BB
, dl
, TII
->get(AtomicSize
== 1 ? PPC::EXTSB
: PPC::EXTSH
),
10233 ExtReg
).addReg(dest
);
10234 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10235 .addReg(incr
).addReg(ExtReg
);
10237 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10238 .addReg(incr
).addReg(dest
);
10240 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10241 .addImm(CmpPred
).addReg(PPC::CR0
).addMBB(exitMBB
);
10242 BB
->addSuccessor(loop2MBB
);
10243 BB
->addSuccessor(exitMBB
);
10246 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
10247 .addReg(TmpReg
).addReg(ptrA
).addReg(ptrB
);
10248 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10249 .addImm(PPC::PRED_NE
).addReg(PPC::CR0
).addMBB(loopMBB
);
10250 BB
->addSuccessor(loopMBB
);
10251 BB
->addSuccessor(exitMBB
);
10259 MachineBasicBlock
*PPCTargetLowering::EmitPartwordAtomicBinary(
10260 MachineInstr
&MI
, MachineBasicBlock
*BB
,
10261 bool is8bit
, // operation
10262 unsigned BinOpcode
, unsigned CmpOpcode
, unsigned CmpPred
) const {
10263 // If we support part-word atomic mnemonics, just use them
10264 if (Subtarget
.hasPartwordAtomics())
10265 return EmitAtomicBinary(MI
, BB
, is8bit
? 1 : 2, BinOpcode
, CmpOpcode
,
10268 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
10269 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10270 // In 64 bit mode we have to use 64 bits for addresses, even though the
10271 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
10272 // registers without caring whether they're 32 or 64, but here we're
10273 // doing actual arithmetic on the addresses.
10274 bool is64bit
= Subtarget
.isPPC64();
10275 bool isLittleEndian
= Subtarget
.isLittleEndian();
10276 unsigned ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
10278 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10279 MachineFunction
*F
= BB
->getParent();
10280 MachineFunction::iterator It
= ++BB
->getIterator();
10282 Register dest
= MI
.getOperand(0).getReg();
10283 Register ptrA
= MI
.getOperand(1).getReg();
10284 Register ptrB
= MI
.getOperand(2).getReg();
10285 Register incr
= MI
.getOperand(3).getReg();
10286 DebugLoc dl
= MI
.getDebugLoc();
10288 MachineBasicBlock
*loopMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10289 MachineBasicBlock
*loop2MBB
=
10290 CmpOpcode
? F
->CreateMachineBasicBlock(LLVM_BB
) : nullptr;
10291 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10292 F
->insert(It
, loopMBB
);
10294 F
->insert(It
, loop2MBB
);
10295 F
->insert(It
, exitMBB
);
10296 exitMBB
->splice(exitMBB
->begin(), BB
,
10297 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10298 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10300 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10301 const TargetRegisterClass
*RC
=
10302 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
10303 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
10305 Register PtrReg
= RegInfo
.createVirtualRegister(RC
);
10306 Register Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
10307 Register ShiftReg
=
10308 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
10309 Register Incr2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10310 Register MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
10311 Register Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10312 Register Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
10313 Register Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
10314 Register Tmp3Reg
= RegInfo
.createVirtualRegister(GPRC
);
10315 Register Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
10316 Register TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
10319 (!BinOpcode
) ? Incr2Reg
: RegInfo
.createVirtualRegister(GPRC
);
10323 // fallthrough --> loopMBB
10324 BB
->addSuccessor(loopMBB
);
10326 // The 4-byte load must be aligned, while a char or short may be
10327 // anywhere in the word. Hence all this nasty bookkeeping code.
10328 // add ptr1, ptrA, ptrB [copy if ptrA==0]
10329 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
10330 // xori shift, shift1, 24 [16]
10331 // rlwinm ptr, ptr1, 0, 0, 29
10332 // slw incr2, incr, shift
10333 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
10334 // slw mask, mask2, shift
10336 // lwarx tmpDest, ptr
10337 // add tmp, tmpDest, incr2
10338 // andc tmp2, tmpDest, mask
10339 // and tmp3, tmp, mask
10340 // or tmp4, tmp3, tmp2
10341 // stwcx. tmp4, ptr
10343 // fallthrough --> exitMBB
10344 // srw dest, tmpDest, shift
10345 if (ptrA
!= ZeroReg
) {
10346 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
10347 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
10353 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
10355 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
10356 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
10359 .addImm(is8bit
? 28 : 27);
10360 if (!isLittleEndian
)
10361 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
10363 .addImm(is8bit
? 24 : 16);
10365 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
10370 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
10375 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), Incr2Reg
).addReg(incr
).addReg(ShiftReg
);
10377 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
10379 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
10380 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
10384 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
10389 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
10393 BuildMI(BB
, dl
, TII
->get(BinOpcode
), TmpReg
)
10395 .addReg(TmpDestReg
);
10396 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
10397 .addReg(TmpDestReg
)
10399 BuildMI(BB
, dl
, TII
->get(PPC::AND
), Tmp3Reg
).addReg(TmpReg
).addReg(MaskReg
);
10401 // For unsigned comparisons, we can directly compare the shifted values.
10402 // For signed comparisons we shift and sign extend.
10403 Register SReg
= RegInfo
.createVirtualRegister(GPRC
);
10404 BuildMI(BB
, dl
, TII
->get(PPC::AND
), SReg
)
10405 .addReg(TmpDestReg
)
10407 unsigned ValueReg
= SReg
;
10408 unsigned CmpReg
= Incr2Reg
;
10409 if (CmpOpcode
== PPC::CMPW
) {
10410 ValueReg
= RegInfo
.createVirtualRegister(GPRC
);
10411 BuildMI(BB
, dl
, TII
->get(PPC::SRW
), ValueReg
)
10414 Register ValueSReg
= RegInfo
.createVirtualRegister(GPRC
);
10415 BuildMI(BB
, dl
, TII
->get(is8bit
? PPC::EXTSB
: PPC::EXTSH
), ValueSReg
)
10417 ValueReg
= ValueSReg
;
10420 BuildMI(BB
, dl
, TII
->get(CmpOpcode
), PPC::CR0
)
10423 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10427 BB
->addSuccessor(loop2MBB
);
10428 BB
->addSuccessor(exitMBB
);
10431 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
).addReg(Tmp3Reg
).addReg(Tmp2Reg
);
10432 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
10436 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10437 .addImm(PPC::PRED_NE
)
10440 BB
->addSuccessor(loopMBB
);
10441 BB
->addSuccessor(exitMBB
);
10446 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), dest
)
10447 .addReg(TmpDestReg
)
10452 llvm::MachineBasicBlock
*
10453 PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr
&MI
,
10454 MachineBasicBlock
*MBB
) const {
10455 DebugLoc DL
= MI
.getDebugLoc();
10456 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10457 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
10459 MachineFunction
*MF
= MBB
->getParent();
10460 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
10462 const BasicBlock
*BB
= MBB
->getBasicBlock();
10463 MachineFunction::iterator I
= ++MBB
->getIterator();
10465 Register DstReg
= MI
.getOperand(0).getReg();
10466 const TargetRegisterClass
*RC
= MRI
.getRegClass(DstReg
);
10467 assert(TRI
->isTypeLegalForClass(*RC
, MVT::i32
) && "Invalid destination!");
10468 Register mainDstReg
= MRI
.createVirtualRegister(RC
);
10469 Register restoreDstReg
= MRI
.createVirtualRegister(RC
);
10471 MVT PVT
= getPointerTy(MF
->getDataLayout());
10472 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
10473 "Invalid Pointer Size!");
10474 // For v = setjmp(buf), we generate
10477 // SjLjSetup mainMBB
10483 // buf[LabelOffset] = LR
10487 // v = phi(main, restore)
10490 MachineBasicBlock
*thisMBB
= MBB
;
10491 MachineBasicBlock
*mainMBB
= MF
->CreateMachineBasicBlock(BB
);
10492 MachineBasicBlock
*sinkMBB
= MF
->CreateMachineBasicBlock(BB
);
10493 MF
->insert(I
, mainMBB
);
10494 MF
->insert(I
, sinkMBB
);
10496 MachineInstrBuilder MIB
;
10498 // Transfer the remainder of BB and its successor edges to sinkMBB.
10499 sinkMBB
->splice(sinkMBB
->begin(), MBB
,
10500 std::next(MachineBasicBlock::iterator(MI
)), MBB
->end());
10501 sinkMBB
->transferSuccessorsAndUpdatePHIs(MBB
);
10503 // Note that the structure of the jmp_buf used here is not compatible
10504 // with that used by libc, and is not designed to be. Specifically, it
10505 // stores only those 'reserved' registers that LLVM does not otherwise
10506 // understand how to spill. Also, by convention, by the time this
10507 // intrinsic is called, Clang has already stored the frame address in the
10508 // first slot of the buffer and stack address in the third. Following the
10509 // X86 target code, we'll store the jump address in the second slot. We also
10510 // need to save the TOC pointer (R2) to handle jumps between shared
10511 // libraries, and that will be stored in the fourth slot. The thread
10512 // identifier (R13) is not affected.
10515 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
10516 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
10517 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
10519 // Prepare IP either in reg.
10520 const TargetRegisterClass
*PtrRC
= getRegClassFor(PVT
);
10521 Register LabelReg
= MRI
.createVirtualRegister(PtrRC
);
10522 Register BufReg
= MI
.getOperand(1).getReg();
10524 if (Subtarget
.is64BitELFABI()) {
10525 setUsesTOCBasePtr(*MBB
->getParent());
10526 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::STD
))
10533 // Naked functions never have a base pointer, and so we use r1. For all
10534 // other functions, this decision must be delayed until during PEI.
10536 if (MF
->getFunction().hasFnAttribute(Attribute::Naked
))
10537 BaseReg
= Subtarget
.isPPC64() ? PPC::X1
: PPC::R1
;
10539 BaseReg
= Subtarget
.isPPC64() ? PPC::BP8
: PPC::BP
;
10541 MIB
= BuildMI(*thisMBB
, MI
, DL
,
10542 TII
->get(Subtarget
.isPPC64() ? PPC::STD
: PPC::STW
))
10549 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::BCLalways
)).addMBB(mainMBB
);
10550 MIB
.addRegMask(TRI
->getNoPreservedMask());
10552 BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::LI
), restoreDstReg
).addImm(1);
10554 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::EH_SjLj_Setup
))
10556 MIB
= BuildMI(*thisMBB
, MI
, DL
, TII
->get(PPC::B
)).addMBB(sinkMBB
);
10558 thisMBB
->addSuccessor(mainMBB
, BranchProbability::getZero());
10559 thisMBB
->addSuccessor(sinkMBB
, BranchProbability::getOne());
10564 BuildMI(mainMBB
, DL
,
10565 TII
->get(Subtarget
.isPPC64() ? PPC::MFLR8
: PPC::MFLR
), LabelReg
);
10568 if (Subtarget
.isPPC64()) {
10569 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STD
))
10571 .addImm(LabelOffset
)
10574 MIB
= BuildMI(mainMBB
, DL
, TII
->get(PPC::STW
))
10576 .addImm(LabelOffset
)
10579 MIB
.cloneMemRefs(MI
);
10581 BuildMI(mainMBB
, DL
, TII
->get(PPC::LI
), mainDstReg
).addImm(0);
10582 mainMBB
->addSuccessor(sinkMBB
);
10585 BuildMI(*sinkMBB
, sinkMBB
->begin(), DL
,
10586 TII
->get(PPC::PHI
), DstReg
)
10587 .addReg(mainDstReg
).addMBB(mainMBB
)
10588 .addReg(restoreDstReg
).addMBB(thisMBB
);
10590 MI
.eraseFromParent();
10594 MachineBasicBlock
*
10595 PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr
&MI
,
10596 MachineBasicBlock
*MBB
) const {
10597 DebugLoc DL
= MI
.getDebugLoc();
10598 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10600 MachineFunction
*MF
= MBB
->getParent();
10601 MachineRegisterInfo
&MRI
= MF
->getRegInfo();
10603 MVT PVT
= getPointerTy(MF
->getDataLayout());
10604 assert((PVT
== MVT::i64
|| PVT
== MVT::i32
) &&
10605 "Invalid Pointer Size!");
10607 const TargetRegisterClass
*RC
=
10608 (PVT
== MVT::i64
) ? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
10609 Register Tmp
= MRI
.createVirtualRegister(RC
);
10610 // Since FP is only updated here but NOT referenced, it's treated as GPR.
10611 unsigned FP
= (PVT
== MVT::i64
) ? PPC::X31
: PPC::R31
;
10612 unsigned SP
= (PVT
== MVT::i64
) ? PPC::X1
: PPC::R1
;
10616 : (Subtarget
.isSVR4ABI() && isPositionIndependent() ? PPC::R29
10619 MachineInstrBuilder MIB
;
10621 const int64_t LabelOffset
= 1 * PVT
.getStoreSize();
10622 const int64_t SPOffset
= 2 * PVT
.getStoreSize();
10623 const int64_t TOCOffset
= 3 * PVT
.getStoreSize();
10624 const int64_t BPOffset
= 4 * PVT
.getStoreSize();
10626 Register BufReg
= MI
.getOperand(0).getReg();
10628 // Reload FP (the jumped-to function may not have had a
10629 // frame pointer, and if so, then its r31 will be restored
10631 if (PVT
== MVT::i64
) {
10632 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), FP
)
10636 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), FP
)
10640 MIB
.cloneMemRefs(MI
);
10643 if (PVT
== MVT::i64
) {
10644 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), Tmp
)
10645 .addImm(LabelOffset
)
10648 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), Tmp
)
10649 .addImm(LabelOffset
)
10652 MIB
.cloneMemRefs(MI
);
10655 if (PVT
== MVT::i64
) {
10656 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), SP
)
10660 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), SP
)
10664 MIB
.cloneMemRefs(MI
);
10667 if (PVT
== MVT::i64
) {
10668 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), BP
)
10672 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LWZ
), BP
)
10676 MIB
.cloneMemRefs(MI
);
10679 if (PVT
== MVT::i64
&& Subtarget
.isSVR4ABI()) {
10680 setUsesTOCBasePtr(*MBB
->getParent());
10681 MIB
= BuildMI(*MBB
, MI
, DL
, TII
->get(PPC::LD
), PPC::X2
)
10688 BuildMI(*MBB
, MI
, DL
,
10689 TII
->get(PVT
== MVT::i64
? PPC::MTCTR8
: PPC::MTCTR
)).addReg(Tmp
);
10690 BuildMI(*MBB
, MI
, DL
, TII
->get(PVT
== MVT::i64
? PPC::BCTR8
: PPC::BCTR
));
10692 MI
.eraseFromParent();
10696 MachineBasicBlock
*
10697 PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr
&MI
,
10698 MachineBasicBlock
*BB
) const {
10699 if (MI
.getOpcode() == TargetOpcode::STACKMAP
||
10700 MI
.getOpcode() == TargetOpcode::PATCHPOINT
) {
10701 if (Subtarget
.is64BitELFABI() &&
10702 MI
.getOpcode() == TargetOpcode::PATCHPOINT
) {
10703 // Call lowering should have added an r2 operand to indicate a dependence
10704 // on the TOC base pointer value. It can't however, because there is no
10705 // way to mark the dependence as implicit there, and so the stackmap code
10706 // will confuse it with a regular operand. Instead, add the dependence
10708 MI
.addOperand(MachineOperand::CreateReg(PPC::X2
, false, true));
10711 return emitPatchPoint(MI
, BB
);
10714 if (MI
.getOpcode() == PPC::EH_SjLj_SetJmp32
||
10715 MI
.getOpcode() == PPC::EH_SjLj_SetJmp64
) {
10716 return emitEHSjLjSetJmp(MI
, BB
);
10717 } else if (MI
.getOpcode() == PPC::EH_SjLj_LongJmp32
||
10718 MI
.getOpcode() == PPC::EH_SjLj_LongJmp64
) {
10719 return emitEHSjLjLongJmp(MI
, BB
);
10722 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
10724 // To "insert" these instructions we actually have to insert their
10725 // control-flow patterns.
10726 const BasicBlock
*LLVM_BB
= BB
->getBasicBlock();
10727 MachineFunction::iterator It
= ++BB
->getIterator();
10729 MachineFunction
*F
= BB
->getParent();
10731 if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10732 MI
.getOpcode() == PPC::SELECT_CC_I8
|| MI
.getOpcode() == PPC::SELECT_I4
||
10733 MI
.getOpcode() == PPC::SELECT_I8
) {
10734 SmallVector
<MachineOperand
, 2> Cond
;
10735 if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10736 MI
.getOpcode() == PPC::SELECT_CC_I8
)
10737 Cond
.push_back(MI
.getOperand(4));
10739 Cond
.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET
));
10740 Cond
.push_back(MI
.getOperand(1));
10742 DebugLoc dl
= MI
.getDebugLoc();
10743 TII
->insertSelect(*BB
, MI
, dl
, MI
.getOperand(0).getReg(), Cond
,
10744 MI
.getOperand(2).getReg(), MI
.getOperand(3).getReg());
10745 } else if (MI
.getOpcode() == PPC::SELECT_CC_I4
||
10746 MI
.getOpcode() == PPC::SELECT_CC_I8
||
10747 MI
.getOpcode() == PPC::SELECT_CC_F4
||
10748 MI
.getOpcode() == PPC::SELECT_CC_F8
||
10749 MI
.getOpcode() == PPC::SELECT_CC_F16
||
10750 MI
.getOpcode() == PPC::SELECT_CC_QFRC
||
10751 MI
.getOpcode() == PPC::SELECT_CC_QSRC
||
10752 MI
.getOpcode() == PPC::SELECT_CC_QBRC
||
10753 MI
.getOpcode() == PPC::SELECT_CC_VRRC
||
10754 MI
.getOpcode() == PPC::SELECT_CC_VSFRC
||
10755 MI
.getOpcode() == PPC::SELECT_CC_VSSRC
||
10756 MI
.getOpcode() == PPC::SELECT_CC_VSRC
||
10757 MI
.getOpcode() == PPC::SELECT_CC_SPE4
||
10758 MI
.getOpcode() == PPC::SELECT_CC_SPE
||
10759 MI
.getOpcode() == PPC::SELECT_I4
||
10760 MI
.getOpcode() == PPC::SELECT_I8
||
10761 MI
.getOpcode() == PPC::SELECT_F4
||
10762 MI
.getOpcode() == PPC::SELECT_F8
||
10763 MI
.getOpcode() == PPC::SELECT_F16
||
10764 MI
.getOpcode() == PPC::SELECT_QFRC
||
10765 MI
.getOpcode() == PPC::SELECT_QSRC
||
10766 MI
.getOpcode() == PPC::SELECT_QBRC
||
10767 MI
.getOpcode() == PPC::SELECT_SPE
||
10768 MI
.getOpcode() == PPC::SELECT_SPE4
||
10769 MI
.getOpcode() == PPC::SELECT_VRRC
||
10770 MI
.getOpcode() == PPC::SELECT_VSFRC
||
10771 MI
.getOpcode() == PPC::SELECT_VSSRC
||
10772 MI
.getOpcode() == PPC::SELECT_VSRC
) {
10773 // The incoming instruction knows the destination vreg to set, the
10774 // condition code register to branch on, the true/false values to
10775 // select between, and a branch opcode to use.
10780 // cmpTY ccX, r1, r2
10782 // fallthrough --> copy0MBB
10783 MachineBasicBlock
*thisMBB
= BB
;
10784 MachineBasicBlock
*copy0MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10785 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10786 DebugLoc dl
= MI
.getDebugLoc();
10787 F
->insert(It
, copy0MBB
);
10788 F
->insert(It
, sinkMBB
);
10790 // Transfer the remainder of BB and its successor edges to sinkMBB.
10791 sinkMBB
->splice(sinkMBB
->begin(), BB
,
10792 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10793 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10795 // Next, add the true and fallthrough blocks as its successors.
10796 BB
->addSuccessor(copy0MBB
);
10797 BB
->addSuccessor(sinkMBB
);
10799 if (MI
.getOpcode() == PPC::SELECT_I4
|| MI
.getOpcode() == PPC::SELECT_I8
||
10800 MI
.getOpcode() == PPC::SELECT_F4
|| MI
.getOpcode() == PPC::SELECT_F8
||
10801 MI
.getOpcode() == PPC::SELECT_F16
||
10802 MI
.getOpcode() == PPC::SELECT_SPE4
||
10803 MI
.getOpcode() == PPC::SELECT_SPE
||
10804 MI
.getOpcode() == PPC::SELECT_QFRC
||
10805 MI
.getOpcode() == PPC::SELECT_QSRC
||
10806 MI
.getOpcode() == PPC::SELECT_QBRC
||
10807 MI
.getOpcode() == PPC::SELECT_VRRC
||
10808 MI
.getOpcode() == PPC::SELECT_VSFRC
||
10809 MI
.getOpcode() == PPC::SELECT_VSSRC
||
10810 MI
.getOpcode() == PPC::SELECT_VSRC
) {
10811 BuildMI(BB
, dl
, TII
->get(PPC::BC
))
10812 .addReg(MI
.getOperand(1).getReg())
10815 unsigned SelectPred
= MI
.getOperand(4).getImm();
10816 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10817 .addImm(SelectPred
)
10818 .addReg(MI
.getOperand(1).getReg())
10823 // %FalseValue = ...
10824 // # fallthrough to sinkMBB
10827 // Update machine-CFG edges
10828 BB
->addSuccessor(sinkMBB
);
10831 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
10834 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::PHI
), MI
.getOperand(0).getReg())
10835 .addReg(MI
.getOperand(3).getReg())
10837 .addReg(MI
.getOperand(2).getReg())
10839 } else if (MI
.getOpcode() == PPC::ReadTB
) {
10840 // To read the 64-bit time-base register on a 32-bit target, we read the
10841 // two halves. Should the counter have wrapped while it was being read, we
10842 // need to try again.
10845 // mfspr Rx,TBU # load from TBU
10846 // mfspr Ry,TB # load from TB
10847 // mfspr Rz,TBU # load from TBU
10848 // cmpw crX,Rx,Rz # check if 'old'='new'
10849 // bne readLoop # branch if they're not equal
10852 MachineBasicBlock
*readMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10853 MachineBasicBlock
*sinkMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
10854 DebugLoc dl
= MI
.getDebugLoc();
10855 F
->insert(It
, readMBB
);
10856 F
->insert(It
, sinkMBB
);
10858 // Transfer the remainder of BB and its successor edges to sinkMBB.
10859 sinkMBB
->splice(sinkMBB
->begin(), BB
,
10860 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
10861 sinkMBB
->transferSuccessorsAndUpdatePHIs(BB
);
10863 BB
->addSuccessor(readMBB
);
10866 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
10867 Register ReadAgainReg
= RegInfo
.createVirtualRegister(&PPC::GPRCRegClass
);
10868 Register LoReg
= MI
.getOperand(0).getReg();
10869 Register HiReg
= MI
.getOperand(1).getReg();
10871 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), HiReg
).addImm(269);
10872 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), LoReg
).addImm(268);
10873 BuildMI(BB
, dl
, TII
->get(PPC::MFSPR
), ReadAgainReg
).addImm(269);
10875 Register CmpReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
10877 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), CmpReg
)
10879 .addReg(ReadAgainReg
);
10880 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
10881 .addImm(PPC::PRED_NE
)
10885 BB
->addSuccessor(readMBB
);
10886 BB
->addSuccessor(sinkMBB
);
10887 } else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8
)
10888 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::ADD4
);
10889 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16
)
10890 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::ADD4
);
10891 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32
)
10892 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::ADD4
);
10893 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64
)
10894 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::ADD8
);
10896 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I8
)
10897 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::AND
);
10898 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I16
)
10899 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::AND
);
10900 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I32
)
10901 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::AND
);
10902 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_AND_I64
)
10903 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::AND8
);
10905 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I8
)
10906 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::OR
);
10907 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I16
)
10908 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::OR
);
10909 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I32
)
10910 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::OR
);
10911 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_OR_I64
)
10912 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::OR8
);
10914 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8
)
10915 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::XOR
);
10916 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16
)
10917 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::XOR
);
10918 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32
)
10919 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::XOR
);
10920 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64
)
10921 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::XOR8
);
10923 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8
)
10924 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::NAND
);
10925 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16
)
10926 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::NAND
);
10927 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32
)
10928 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::NAND
);
10929 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64
)
10930 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::NAND8
);
10932 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8
)
10933 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, PPC::SUBF
);
10934 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16
)
10935 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, PPC::SUBF
);
10936 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32
)
10937 BB
= EmitAtomicBinary(MI
, BB
, 4, PPC::SUBF
);
10938 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64
)
10939 BB
= EmitAtomicBinary(MI
, BB
, 8, PPC::SUBF8
);
10941 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8
)
10942 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_GE
);
10943 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16
)
10944 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_GE
);
10945 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32
)
10946 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_GE
);
10947 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64
)
10948 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_GE
);
10950 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8
)
10951 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPW
, PPC::PRED_LE
);
10952 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16
)
10953 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPW
, PPC::PRED_LE
);
10954 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32
)
10955 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPW
, PPC::PRED_LE
);
10956 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64
)
10957 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPD
, PPC::PRED_LE
);
10959 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8
)
10960 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_GE
);
10961 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16
)
10962 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_GE
);
10963 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32
)
10964 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_GE
);
10965 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64
)
10966 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_GE
);
10968 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8
)
10969 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0, PPC::CMPLW
, PPC::PRED_LE
);
10970 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16
)
10971 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0, PPC::CMPLW
, PPC::PRED_LE
);
10972 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32
)
10973 BB
= EmitAtomicBinary(MI
, BB
, 4, 0, PPC::CMPLW
, PPC::PRED_LE
);
10974 else if (MI
.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64
)
10975 BB
= EmitAtomicBinary(MI
, BB
, 8, 0, PPC::CMPLD
, PPC::PRED_LE
);
10977 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I8
)
10978 BB
= EmitPartwordAtomicBinary(MI
, BB
, true, 0);
10979 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I16
)
10980 BB
= EmitPartwordAtomicBinary(MI
, BB
, false, 0);
10981 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I32
)
10982 BB
= EmitAtomicBinary(MI
, BB
, 4, 0);
10983 else if (MI
.getOpcode() == PPC::ATOMIC_SWAP_I64
)
10984 BB
= EmitAtomicBinary(MI
, BB
, 8, 0);
10985 else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32
||
10986 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
||
10987 (Subtarget
.hasPartwordAtomics() &&
10988 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
) ||
10989 (Subtarget
.hasPartwordAtomics() &&
10990 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
)) {
10991 bool is64bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64
;
10993 auto LoadMnemonic
= PPC::LDARX
;
10994 auto StoreMnemonic
= PPC::STDCX
;
10995 switch (MI
.getOpcode()) {
10997 llvm_unreachable("Compare and swap of unknown size");
10998 case PPC::ATOMIC_CMP_SWAP_I8
:
10999 LoadMnemonic
= PPC::LBARX
;
11000 StoreMnemonic
= PPC::STBCX
;
11001 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
11003 case PPC::ATOMIC_CMP_SWAP_I16
:
11004 LoadMnemonic
= PPC::LHARX
;
11005 StoreMnemonic
= PPC::STHCX
;
11006 assert(Subtarget
.hasPartwordAtomics() && "No support partword atomics.");
11008 case PPC::ATOMIC_CMP_SWAP_I32
:
11009 LoadMnemonic
= PPC::LWARX
;
11010 StoreMnemonic
= PPC::STWCX
;
11012 case PPC::ATOMIC_CMP_SWAP_I64
:
11013 LoadMnemonic
= PPC::LDARX
;
11014 StoreMnemonic
= PPC::STDCX
;
11017 Register dest
= MI
.getOperand(0).getReg();
11018 Register ptrA
= MI
.getOperand(1).getReg();
11019 Register ptrB
= MI
.getOperand(2).getReg();
11020 Register oldval
= MI
.getOperand(3).getReg();
11021 Register newval
= MI
.getOperand(4).getReg();
11022 DebugLoc dl
= MI
.getDebugLoc();
11024 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11025 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11026 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11027 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11028 F
->insert(It
, loop1MBB
);
11029 F
->insert(It
, loop2MBB
);
11030 F
->insert(It
, midMBB
);
11031 F
->insert(It
, exitMBB
);
11032 exitMBB
->splice(exitMBB
->begin(), BB
,
11033 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
11034 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
11038 // fallthrough --> loopMBB
11039 BB
->addSuccessor(loop1MBB
);
11042 // l[bhwd]arx dest, ptr
11043 // cmp[wd] dest, oldval
11046 // st[bhwd]cx. newval, ptr
11050 // st[bhwd]cx. dest, ptr
11053 BuildMI(BB
, dl
, TII
->get(LoadMnemonic
), dest
).addReg(ptrA
).addReg(ptrB
);
11054 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::CMPD
: PPC::CMPW
), PPC::CR0
)
11057 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11058 .addImm(PPC::PRED_NE
)
11061 BB
->addSuccessor(loop2MBB
);
11062 BB
->addSuccessor(midMBB
);
11065 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
11069 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11070 .addImm(PPC::PRED_NE
)
11073 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
11074 BB
->addSuccessor(loop1MBB
);
11075 BB
->addSuccessor(exitMBB
);
11078 BuildMI(BB
, dl
, TII
->get(StoreMnemonic
))
11082 BB
->addSuccessor(exitMBB
);
11087 } else if (MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
||
11088 MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16
) {
11089 // We must use 64-bit registers for addresses when targeting 64-bit,
11090 // since we're actually doing arithmetic on them. Other registers
11092 bool is64bit
= Subtarget
.isPPC64();
11093 bool isLittleEndian
= Subtarget
.isLittleEndian();
11094 bool is8bit
= MI
.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8
;
11096 Register dest
= MI
.getOperand(0).getReg();
11097 Register ptrA
= MI
.getOperand(1).getReg();
11098 Register ptrB
= MI
.getOperand(2).getReg();
11099 Register oldval
= MI
.getOperand(3).getReg();
11100 Register newval
= MI
.getOperand(4).getReg();
11101 DebugLoc dl
= MI
.getDebugLoc();
11103 MachineBasicBlock
*loop1MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11104 MachineBasicBlock
*loop2MBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11105 MachineBasicBlock
*midMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11106 MachineBasicBlock
*exitMBB
= F
->CreateMachineBasicBlock(LLVM_BB
);
11107 F
->insert(It
, loop1MBB
);
11108 F
->insert(It
, loop2MBB
);
11109 F
->insert(It
, midMBB
);
11110 F
->insert(It
, exitMBB
);
11111 exitMBB
->splice(exitMBB
->begin(), BB
,
11112 std::next(MachineBasicBlock::iterator(MI
)), BB
->end());
11113 exitMBB
->transferSuccessorsAndUpdatePHIs(BB
);
11115 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11116 const TargetRegisterClass
*RC
=
11117 is64bit
? &PPC::G8RCRegClass
: &PPC::GPRCRegClass
;
11118 const TargetRegisterClass
*GPRC
= &PPC::GPRCRegClass
;
11120 Register PtrReg
= RegInfo
.createVirtualRegister(RC
);
11121 Register Shift1Reg
= RegInfo
.createVirtualRegister(GPRC
);
11122 Register ShiftReg
=
11123 isLittleEndian
? Shift1Reg
: RegInfo
.createVirtualRegister(GPRC
);
11124 Register NewVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11125 Register NewVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
11126 Register OldVal2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11127 Register OldVal3Reg
= RegInfo
.createVirtualRegister(GPRC
);
11128 Register MaskReg
= RegInfo
.createVirtualRegister(GPRC
);
11129 Register Mask2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11130 Register Mask3Reg
= RegInfo
.createVirtualRegister(GPRC
);
11131 Register Tmp2Reg
= RegInfo
.createVirtualRegister(GPRC
);
11132 Register Tmp4Reg
= RegInfo
.createVirtualRegister(GPRC
);
11133 Register TmpDestReg
= RegInfo
.createVirtualRegister(GPRC
);
11135 Register TmpReg
= RegInfo
.createVirtualRegister(GPRC
);
11136 Register ZeroReg
= is64bit
? PPC::ZERO8
: PPC::ZERO
;
11139 // fallthrough --> loopMBB
11140 BB
->addSuccessor(loop1MBB
);
11142 // The 4-byte load must be aligned, while a char or short may be
11143 // anywhere in the word. Hence all this nasty bookkeeping code.
11144 // add ptr1, ptrA, ptrB [copy if ptrA==0]
11145 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11146 // xori shift, shift1, 24 [16]
11147 // rlwinm ptr, ptr1, 0, 0, 29
11148 // slw newval2, newval, shift
11149 // slw oldval2, oldval,shift
11150 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11151 // slw mask, mask2, shift
11152 // and newval3, newval2, mask
11153 // and oldval3, oldval2, mask
11155 // lwarx tmpDest, ptr
11156 // and tmp, tmpDest, mask
11157 // cmpw tmp, oldval3
11160 // andc tmp2, tmpDest, mask
11161 // or tmp4, tmp2, newval3
11162 // stwcx. tmp4, ptr
11166 // stwcx. tmpDest, ptr
11168 // srw dest, tmpDest, shift
11169 if (ptrA
!= ZeroReg
) {
11170 Ptr1Reg
= RegInfo
.createVirtualRegister(RC
);
11171 BuildMI(BB
, dl
, TII
->get(is64bit
? PPC::ADD8
: PPC::ADD4
), Ptr1Reg
)
11178 // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11180 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), Shift1Reg
)
11181 .addReg(Ptr1Reg
, 0, is64bit
? PPC::sub_32
: 0)
11184 .addImm(is8bit
? 28 : 27);
11185 if (!isLittleEndian
)
11186 BuildMI(BB
, dl
, TII
->get(PPC::XORI
), ShiftReg
)
11188 .addImm(is8bit
? 24 : 16);
11190 BuildMI(BB
, dl
, TII
->get(PPC::RLDICR
), PtrReg
)
11195 BuildMI(BB
, dl
, TII
->get(PPC::RLWINM
), PtrReg
)
11200 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), NewVal2Reg
)
11203 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), OldVal2Reg
)
11207 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask2Reg
).addImm(255);
11209 BuildMI(BB
, dl
, TII
->get(PPC::LI
), Mask3Reg
).addImm(0);
11210 BuildMI(BB
, dl
, TII
->get(PPC::ORI
), Mask2Reg
)
11214 BuildMI(BB
, dl
, TII
->get(PPC::SLW
), MaskReg
)
11217 BuildMI(BB
, dl
, TII
->get(PPC::AND
), NewVal3Reg
)
11218 .addReg(NewVal2Reg
)
11220 BuildMI(BB
, dl
, TII
->get(PPC::AND
), OldVal3Reg
)
11221 .addReg(OldVal2Reg
)
11225 BuildMI(BB
, dl
, TII
->get(PPC::LWARX
), TmpDestReg
)
11228 BuildMI(BB
, dl
, TII
->get(PPC::AND
), TmpReg
)
11229 .addReg(TmpDestReg
)
11231 BuildMI(BB
, dl
, TII
->get(PPC::CMPW
), PPC::CR0
)
11233 .addReg(OldVal3Reg
);
11234 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11235 .addImm(PPC::PRED_NE
)
11238 BB
->addSuccessor(loop2MBB
);
11239 BB
->addSuccessor(midMBB
);
11242 BuildMI(BB
, dl
, TII
->get(PPC::ANDC
), Tmp2Reg
)
11243 .addReg(TmpDestReg
)
11245 BuildMI(BB
, dl
, TII
->get(PPC::OR
), Tmp4Reg
)
11247 .addReg(NewVal3Reg
);
11248 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
11252 BuildMI(BB
, dl
, TII
->get(PPC::BCC
))
11253 .addImm(PPC::PRED_NE
)
11256 BuildMI(BB
, dl
, TII
->get(PPC::B
)).addMBB(exitMBB
);
11257 BB
->addSuccessor(loop1MBB
);
11258 BB
->addSuccessor(exitMBB
);
11261 BuildMI(BB
, dl
, TII
->get(PPC::STWCX
))
11262 .addReg(TmpDestReg
)
11265 BB
->addSuccessor(exitMBB
);
11270 BuildMI(*BB
, BB
->begin(), dl
, TII
->get(PPC::SRW
), dest
)
11273 } else if (MI
.getOpcode() == PPC::FADDrtz
) {
11274 // This pseudo performs an FADD with rounding mode temporarily forced
11275 // to round-to-zero. We emit this via custom inserter since the FPSCR
11276 // is not modeled at the SelectionDAG level.
11277 Register Dest
= MI
.getOperand(0).getReg();
11278 Register Src1
= MI
.getOperand(1).getReg();
11279 Register Src2
= MI
.getOperand(2).getReg();
11280 DebugLoc dl
= MI
.getDebugLoc();
11282 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11283 Register MFFSReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
11285 // Save FPSCR value.
11286 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), MFFSReg
);
11288 // Set rounding mode to round-to-zero.
11289 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB1
)).addImm(31);
11290 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSB0
)).addImm(30);
11292 // Perform addition.
11293 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::FADD
), Dest
).addReg(Src1
).addReg(Src2
);
11295 // Restore FPSCR value.
11296 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSFb
)).addImm(1).addReg(MFFSReg
);
11297 } else if (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT
||
11298 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT
||
11299 MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
||
11300 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT8
) {
11301 unsigned Opcode
= (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
||
11302 MI
.getOpcode() == PPC::ANDIo_1_GT_BIT8
)
11305 bool isEQ
= (MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT
||
11306 MI
.getOpcode() == PPC::ANDIo_1_EQ_BIT8
);
11308 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11309 Register Dest
= RegInfo
.createVirtualRegister(
11310 Opcode
== PPC::ANDIo
? &PPC::GPRCRegClass
: &PPC::G8RCRegClass
);
11312 DebugLoc dl
= MI
.getDebugLoc();
11313 BuildMI(*BB
, MI
, dl
, TII
->get(Opcode
), Dest
)
11314 .addReg(MI
.getOperand(1).getReg())
11316 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::COPY
),
11317 MI
.getOperand(0).getReg())
11318 .addReg(isEQ
? PPC::CR0EQ
: PPC::CR0GT
);
11319 } else if (MI
.getOpcode() == PPC::TCHECK_RET
) {
11320 DebugLoc Dl
= MI
.getDebugLoc();
11321 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11322 Register CRReg
= RegInfo
.createVirtualRegister(&PPC::CRRCRegClass
);
11323 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::TCHECK
), CRReg
);
11324 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::COPY
),
11325 MI
.getOperand(0).getReg())
11327 } else if (MI
.getOpcode() == PPC::TBEGIN_RET
) {
11328 DebugLoc Dl
= MI
.getDebugLoc();
11329 unsigned Imm
= MI
.getOperand(1).getImm();
11330 BuildMI(*BB
, MI
, Dl
, TII
->get(PPC::TBEGIN
)).addImm(Imm
);
11331 BuildMI(*BB
, MI
, Dl
, TII
->get(TargetOpcode::COPY
),
11332 MI
.getOperand(0).getReg())
11333 .addReg(PPC::CR0EQ
);
11334 } else if (MI
.getOpcode() == PPC::SETRNDi
) {
11335 DebugLoc dl
= MI
.getDebugLoc();
11336 Register OldFPSCRReg
= MI
.getOperand(0).getReg();
11338 // Save FPSCR value.
11339 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), OldFPSCRReg
);
11341 // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
11342 // the following settings:
11343 // 00 Round to nearest
11345 // 10 Round to +inf
11346 // 11 Round to -inf
11348 // When the operand is immediate, using the two least significant bits of
11349 // the immediate to set the bits 62:63 of FPSCR.
11350 unsigned Mode
= MI
.getOperand(1).getImm();
11351 BuildMI(*BB
, MI
, dl
, TII
->get((Mode
& 1) ? PPC::MTFSB1
: PPC::MTFSB0
))
11354 BuildMI(*BB
, MI
, dl
, TII
->get((Mode
& 2) ? PPC::MTFSB1
: PPC::MTFSB0
))
11356 } else if (MI
.getOpcode() == PPC::SETRND
) {
11357 DebugLoc dl
= MI
.getDebugLoc();
11359 // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
11360 // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
11361 // If the target doesn't have DirectMove, we should use stack to do the
11362 // conversion, because the target doesn't have the instructions like mtvsrd
11363 // or mfvsrd to do this conversion directly.
11364 auto copyRegFromG8RCOrF8RC
= [&] (unsigned DestReg
, unsigned SrcReg
) {
11365 if (Subtarget
.hasDirectMove()) {
11366 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::COPY
), DestReg
)
11369 // Use stack to do the register copy.
11370 unsigned StoreOp
= PPC::STD
, LoadOp
= PPC::LFD
;
11371 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11372 const TargetRegisterClass
*RC
= RegInfo
.getRegClass(SrcReg
);
11373 if (RC
== &PPC::F8RCRegClass
) {
11374 // Copy register from F8RCRegClass to G8RCRegclass.
11375 assert((RegInfo
.getRegClass(DestReg
) == &PPC::G8RCRegClass
) &&
11376 "Unsupported RegClass.");
11378 StoreOp
= PPC::STFD
;
11381 // Copy register from G8RCRegClass to F8RCRegclass.
11382 assert((RegInfo
.getRegClass(SrcReg
) == &PPC::G8RCRegClass
) &&
11383 (RegInfo
.getRegClass(DestReg
) == &PPC::F8RCRegClass
) &&
11384 "Unsupported RegClass.");
11387 MachineFrameInfo
&MFI
= F
->getFrameInfo();
11388 int FrameIdx
= MFI
.CreateStackObject(8, 8, false);
11390 MachineMemOperand
*MMOStore
= F
->getMachineMemOperand(
11391 MachinePointerInfo::getFixedStack(*F
, FrameIdx
, 0),
11392 MachineMemOperand::MOStore
, MFI
.getObjectSize(FrameIdx
),
11393 MFI
.getObjectAlignment(FrameIdx
));
11395 // Store the SrcReg into the stack.
11396 BuildMI(*BB
, MI
, dl
, TII
->get(StoreOp
))
11399 .addFrameIndex(FrameIdx
)
11400 .addMemOperand(MMOStore
);
11402 MachineMemOperand
*MMOLoad
= F
->getMachineMemOperand(
11403 MachinePointerInfo::getFixedStack(*F
, FrameIdx
, 0),
11404 MachineMemOperand::MOLoad
, MFI
.getObjectSize(FrameIdx
),
11405 MFI
.getObjectAlignment(FrameIdx
));
11407 // Load from the stack where SrcReg is stored, and save to DestReg,
11408 // so we have done the RegClass conversion from RegClass::SrcReg to
11409 // RegClass::DestReg.
11410 BuildMI(*BB
, MI
, dl
, TII
->get(LoadOp
), DestReg
)
11412 .addFrameIndex(FrameIdx
)
11413 .addMemOperand(MMOLoad
);
11417 Register OldFPSCRReg
= MI
.getOperand(0).getReg();
11419 // Save FPSCR value.
11420 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MFFS
), OldFPSCRReg
);
11422 // When the operand is gprc register, use two least significant bits of the
11423 // register and mtfsf instruction to set the bits 62:63 of FPSCR.
11425 // copy OldFPSCRTmpReg, OldFPSCRReg
11426 // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
11427 // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
11428 // copy NewFPSCRReg, NewFPSCRTmpReg
11429 // mtfsf 255, NewFPSCRReg
11430 MachineOperand SrcOp
= MI
.getOperand(1);
11431 MachineRegisterInfo
&RegInfo
= F
->getRegInfo();
11432 Register OldFPSCRTmpReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11434 copyRegFromG8RCOrF8RC(OldFPSCRTmpReg
, OldFPSCRReg
);
11436 Register ImDefReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11437 Register ExtSrcReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11439 // The first operand of INSERT_SUBREG should be a register which has
11440 // subregisters, we only care about its RegClass, so we should use an
11441 // IMPLICIT_DEF register.
11442 BuildMI(*BB
, MI
, dl
, TII
->get(TargetOpcode::IMPLICIT_DEF
), ImDefReg
);
11443 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::INSERT_SUBREG
), ExtSrcReg
)
11448 Register NewFPSCRTmpReg
= RegInfo
.createVirtualRegister(&PPC::G8RCRegClass
);
11449 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::RLDIMI
), NewFPSCRTmpReg
)
11450 .addReg(OldFPSCRTmpReg
)
11455 Register NewFPSCRReg
= RegInfo
.createVirtualRegister(&PPC::F8RCRegClass
);
11456 copyRegFromG8RCOrF8RC(NewFPSCRReg
, NewFPSCRTmpReg
);
11458 // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
11460 BuildMI(*BB
, MI
, dl
, TII
->get(PPC::MTFSF
))
11462 .addReg(NewFPSCRReg
)
11466 llvm_unreachable("Unexpected instr type to insert");
11469 MI
.eraseFromParent(); // The pseudo instruction is gone now.
11473 //===----------------------------------------------------------------------===//
11474 // Target Optimization Hooks
11475 //===----------------------------------------------------------------------===//
11477 static int getEstimateRefinementSteps(EVT VT
, const PPCSubtarget
&Subtarget
) {
11478 // For the estimates, convergence is quadratic, so we essentially double the
11479 // number of digits correct after every iteration. For both FRE and FRSQRTE,
11480 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
11481 // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
11482 int RefinementSteps
= Subtarget
.hasRecipPrec() ? 1 : 3;
11483 if (VT
.getScalarType() == MVT::f64
)
11485 return RefinementSteps
;
11488 SDValue
PPCTargetLowering::getSqrtEstimate(SDValue Operand
, SelectionDAG
&DAG
,
11489 int Enabled
, int &RefinementSteps
,
11490 bool &UseOneConstNR
,
11491 bool Reciprocal
) const {
11492 EVT VT
= Operand
.getValueType();
11493 if ((VT
== MVT::f32
&& Subtarget
.hasFRSQRTES()) ||
11494 (VT
== MVT::f64
&& Subtarget
.hasFRSQRTE()) ||
11495 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
11496 (VT
== MVT::v2f64
&& Subtarget
.hasVSX()) ||
11497 (VT
== MVT::v4f32
&& Subtarget
.hasQPX()) ||
11498 (VT
== MVT::v4f64
&& Subtarget
.hasQPX())) {
11499 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
11500 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
11502 // The Newton-Raphson computation with a single constant does not provide
11503 // enough accuracy on some CPUs.
11504 UseOneConstNR
= !Subtarget
.needsTwoConstNR();
11505 return DAG
.getNode(PPCISD::FRSQRTE
, SDLoc(Operand
), VT
, Operand
);
11510 SDValue
PPCTargetLowering::getRecipEstimate(SDValue Operand
, SelectionDAG
&DAG
,
11512 int &RefinementSteps
) const {
11513 EVT VT
= Operand
.getValueType();
11514 if ((VT
== MVT::f32
&& Subtarget
.hasFRES()) ||
11515 (VT
== MVT::f64
&& Subtarget
.hasFRE()) ||
11516 (VT
== MVT::v4f32
&& Subtarget
.hasAltivec()) ||
11517 (VT
== MVT::v2f64
&& Subtarget
.hasVSX()) ||
11518 (VT
== MVT::v4f32
&& Subtarget
.hasQPX()) ||
11519 (VT
== MVT::v4f64
&& Subtarget
.hasQPX())) {
11520 if (RefinementSteps
== ReciprocalEstimate::Unspecified
)
11521 RefinementSteps
= getEstimateRefinementSteps(VT
, Subtarget
);
11522 return DAG
.getNode(PPCISD::FRE
, SDLoc(Operand
), VT
, Operand
);
11527 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
11528 // Note: This functionality is used only when unsafe-fp-math is enabled, and
11529 // on cores with reciprocal estimates (which are used when unsafe-fp-math is
11530 // enabled for division), this functionality is redundant with the default
11531 // combiner logic (once the division -> reciprocal/multiply transformation
11532 // has taken place). As a result, this matters more for older cores than for
11535 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
11536 // reciprocal if there are two or more FDIVs (for embedded cores with only
11537 // one FP pipeline) for three or more FDIVs (for generic OOO cores).
11538 switch (Subtarget
.getDarwinDirective()) {
11543 case PPC::DIR_E500
:
11544 case PPC::DIR_E500mc
:
11545 case PPC::DIR_E5500
:
11550 // isConsecutiveLSLoc needs to work even if all adds have not yet been
11551 // collapsed, and so we need to look through chains of them.
11552 static void getBaseWithConstantOffset(SDValue Loc
, SDValue
&Base
,
11553 int64_t& Offset
, SelectionDAG
&DAG
) {
11554 if (DAG
.isBaseWithConstantOffset(Loc
)) {
11555 Base
= Loc
.getOperand(0);
11556 Offset
+= cast
<ConstantSDNode
>(Loc
.getOperand(1))->getSExtValue();
11558 // The base might itself be a base plus an offset, and if so, accumulate
11560 getBaseWithConstantOffset(Loc
.getOperand(0), Base
, Offset
, DAG
);
11564 static bool isConsecutiveLSLoc(SDValue Loc
, EVT VT
, LSBaseSDNode
*Base
,
11565 unsigned Bytes
, int Dist
,
11566 SelectionDAG
&DAG
) {
11567 if (VT
.getSizeInBits() / 8 != Bytes
)
11570 SDValue BaseLoc
= Base
->getBasePtr();
11571 if (Loc
.getOpcode() == ISD::FrameIndex
) {
11572 if (BaseLoc
.getOpcode() != ISD::FrameIndex
)
11574 const MachineFrameInfo
&MFI
= DAG
.getMachineFunction().getFrameInfo();
11575 int FI
= cast
<FrameIndexSDNode
>(Loc
)->getIndex();
11576 int BFI
= cast
<FrameIndexSDNode
>(BaseLoc
)->getIndex();
11577 int FS
= MFI
.getObjectSize(FI
);
11578 int BFS
= MFI
.getObjectSize(BFI
);
11579 if (FS
!= BFS
|| FS
!= (int)Bytes
) return false;
11580 return MFI
.getObjectOffset(FI
) == (MFI
.getObjectOffset(BFI
) + Dist
*Bytes
);
11583 SDValue Base1
= Loc
, Base2
= BaseLoc
;
11584 int64_t Offset1
= 0, Offset2
= 0;
11585 getBaseWithConstantOffset(Loc
, Base1
, Offset1
, DAG
);
11586 getBaseWithConstantOffset(BaseLoc
, Base2
, Offset2
, DAG
);
11587 if (Base1
== Base2
&& Offset1
== (Offset2
+ Dist
* Bytes
))
11590 const TargetLowering
&TLI
= DAG
.getTargetLoweringInfo();
11591 const GlobalValue
*GV1
= nullptr;
11592 const GlobalValue
*GV2
= nullptr;
11595 bool isGA1
= TLI
.isGAPlusOffset(Loc
.getNode(), GV1
, Offset1
);
11596 bool isGA2
= TLI
.isGAPlusOffset(BaseLoc
.getNode(), GV2
, Offset2
);
11597 if (isGA1
&& isGA2
&& GV1
== GV2
)
11598 return Offset1
== (Offset2
+ Dist
*Bytes
);
11602 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
11603 // not enforce equality of the chain operands.
11604 static bool isConsecutiveLS(SDNode
*N
, LSBaseSDNode
*Base
,
11605 unsigned Bytes
, int Dist
,
11606 SelectionDAG
&DAG
) {
11607 if (LSBaseSDNode
*LS
= dyn_cast
<LSBaseSDNode
>(N
)) {
11608 EVT VT
= LS
->getMemoryVT();
11609 SDValue Loc
= LS
->getBasePtr();
11610 return isConsecutiveLSLoc(Loc
, VT
, Base
, Bytes
, Dist
, DAG
);
11613 if (N
->getOpcode() == ISD::INTRINSIC_W_CHAIN
) {
11615 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
11616 default: return false;
11617 case Intrinsic::ppc_qpx_qvlfd
:
11618 case Intrinsic::ppc_qpx_qvlfda
:
11621 case Intrinsic::ppc_qpx_qvlfs
:
11622 case Intrinsic::ppc_qpx_qvlfsa
:
11625 case Intrinsic::ppc_qpx_qvlfcd
:
11626 case Intrinsic::ppc_qpx_qvlfcda
:
11629 case Intrinsic::ppc_qpx_qvlfcs
:
11630 case Intrinsic::ppc_qpx_qvlfcsa
:
11633 case Intrinsic::ppc_qpx_qvlfiwa
:
11634 case Intrinsic::ppc_qpx_qvlfiwz
:
11635 case Intrinsic::ppc_altivec_lvx
:
11636 case Intrinsic::ppc_altivec_lvxl
:
11637 case Intrinsic::ppc_vsx_lxvw4x
:
11638 case Intrinsic::ppc_vsx_lxvw4x_be
:
11641 case Intrinsic::ppc_vsx_lxvd2x
:
11642 case Intrinsic::ppc_vsx_lxvd2x_be
:
11645 case Intrinsic::ppc_altivec_lvebx
:
11648 case Intrinsic::ppc_altivec_lvehx
:
11651 case Intrinsic::ppc_altivec_lvewx
:
11656 return isConsecutiveLSLoc(N
->getOperand(2), VT
, Base
, Bytes
, Dist
, DAG
);
11659 if (N
->getOpcode() == ISD::INTRINSIC_VOID
) {
11661 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
11662 default: return false;
11663 case Intrinsic::ppc_qpx_qvstfd
:
11664 case Intrinsic::ppc_qpx_qvstfda
:
11667 case Intrinsic::ppc_qpx_qvstfs
:
11668 case Intrinsic::ppc_qpx_qvstfsa
:
11671 case Intrinsic::ppc_qpx_qvstfcd
:
11672 case Intrinsic::ppc_qpx_qvstfcda
:
11675 case Intrinsic::ppc_qpx_qvstfcs
:
11676 case Intrinsic::ppc_qpx_qvstfcsa
:
11679 case Intrinsic::ppc_qpx_qvstfiw
:
11680 case Intrinsic::ppc_qpx_qvstfiwa
:
11681 case Intrinsic::ppc_altivec_stvx
:
11682 case Intrinsic::ppc_altivec_stvxl
:
11683 case Intrinsic::ppc_vsx_stxvw4x
:
11686 case Intrinsic::ppc_vsx_stxvd2x
:
11689 case Intrinsic::ppc_vsx_stxvw4x_be
:
11692 case Intrinsic::ppc_vsx_stxvd2x_be
:
11695 case Intrinsic::ppc_altivec_stvebx
:
11698 case Intrinsic::ppc_altivec_stvehx
:
11701 case Intrinsic::ppc_altivec_stvewx
:
11706 return isConsecutiveLSLoc(N
->getOperand(3), VT
, Base
, Bytes
, Dist
, DAG
);
11712 // Return true is there is a nearyby consecutive load to the one provided
11713 // (regardless of alignment). We search up and down the chain, looking though
11714 // token factors and other loads (but nothing else). As a result, a true result
11715 // indicates that it is safe to create a new consecutive load adjacent to the
11717 static bool findConsecutiveLoad(LoadSDNode
*LD
, SelectionDAG
&DAG
) {
11718 SDValue Chain
= LD
->getChain();
11719 EVT VT
= LD
->getMemoryVT();
11721 SmallSet
<SDNode
*, 16> LoadRoots
;
11722 SmallVector
<SDNode
*, 8> Queue(1, Chain
.getNode());
11723 SmallSet
<SDNode
*, 16> Visited
;
11725 // First, search up the chain, branching to follow all token-factor operands.
11726 // If we find a consecutive load, then we're done, otherwise, record all
11727 // nodes just above the top-level loads and token factors.
11728 while (!Queue
.empty()) {
11729 SDNode
*ChainNext
= Queue
.pop_back_val();
11730 if (!Visited
.insert(ChainNext
).second
)
11733 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(ChainNext
)) {
11734 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
11737 if (!Visited
.count(ChainLD
->getChain().getNode()))
11738 Queue
.push_back(ChainLD
->getChain().getNode());
11739 } else if (ChainNext
->getOpcode() == ISD::TokenFactor
) {
11740 for (const SDUse
&O
: ChainNext
->ops())
11741 if (!Visited
.count(O
.getNode()))
11742 Queue
.push_back(O
.getNode());
11744 LoadRoots
.insert(ChainNext
);
11747 // Second, search down the chain, starting from the top-level nodes recorded
11748 // in the first phase. These top-level nodes are the nodes just above all
11749 // loads and token factors. Starting with their uses, recursively look though
11750 // all loads (just the chain uses) and token factors to find a consecutive
11755 for (SmallSet
<SDNode
*, 16>::iterator I
= LoadRoots
.begin(),
11756 IE
= LoadRoots
.end(); I
!= IE
; ++I
) {
11757 Queue
.push_back(*I
);
11759 while (!Queue
.empty()) {
11760 SDNode
*LoadRoot
= Queue
.pop_back_val();
11761 if (!Visited
.insert(LoadRoot
).second
)
11764 if (MemSDNode
*ChainLD
= dyn_cast
<MemSDNode
>(LoadRoot
))
11765 if (isConsecutiveLS(ChainLD
, LD
, VT
.getStoreSize(), 1, DAG
))
11768 for (SDNode::use_iterator UI
= LoadRoot
->use_begin(),
11769 UE
= LoadRoot
->use_end(); UI
!= UE
; ++UI
)
11770 if (((isa
<MemSDNode
>(*UI
) &&
11771 cast
<MemSDNode
>(*UI
)->getChain().getNode() == LoadRoot
) ||
11772 UI
->getOpcode() == ISD::TokenFactor
) && !Visited
.count(*UI
))
11773 Queue
.push_back(*UI
);
11780 /// This function is called when we have proved that a SETCC node can be replaced
11781 /// by subtraction (and other supporting instructions) so that the result of
11782 /// comparison is kept in a GPR instead of CR. This function is purely for
11783 /// codegen purposes and has some flags to guide the codegen process.
11784 static SDValue
generateEquivalentSub(SDNode
*N
, int Size
, bool Complement
,
11785 bool Swap
, SDLoc
&DL
, SelectionDAG
&DAG
) {
11786 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
11788 // Zero extend the operands to the largest legal integer. Originally, they
11789 // must be of a strictly smaller size.
11790 auto Op0
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(0),
11791 DAG
.getConstant(Size
, DL
, MVT::i32
));
11792 auto Op1
= DAG
.getNode(ISD::ZERO_EXTEND
, DL
, MVT::i64
, N
->getOperand(1),
11793 DAG
.getConstant(Size
, DL
, MVT::i32
));
11795 // Swap if needed. Depends on the condition code.
11797 std::swap(Op0
, Op1
);
11799 // Subtract extended integers.
11800 auto SubNode
= DAG
.getNode(ISD::SUB
, DL
, MVT::i64
, Op0
, Op1
);
11802 // Move the sign bit to the least significant position and zero out the rest.
11803 // Now the least significant bit carries the result of original comparison.
11804 auto Shifted
= DAG
.getNode(ISD::SRL
, DL
, MVT::i64
, SubNode
,
11805 DAG
.getConstant(Size
- 1, DL
, MVT::i32
));
11806 auto Final
= Shifted
;
11808 // Complement the result if needed. Based on the condition code.
11810 Final
= DAG
.getNode(ISD::XOR
, DL
, MVT::i64
, Shifted
,
11811 DAG
.getConstant(1, DL
, MVT::i64
));
11813 return DAG
.getNode(ISD::TRUNCATE
, DL
, MVT::i1
, Final
);
11816 SDValue
PPCTargetLowering::ConvertSETCCToSubtract(SDNode
*N
,
11817 DAGCombinerInfo
&DCI
) const {
11818 assert(N
->getOpcode() == ISD::SETCC
&& "ISD::SETCC Expected.");
11820 SelectionDAG
&DAG
= DCI
.DAG
;
11823 // Size of integers being compared has a critical role in the following
11824 // analysis, so we prefer to do this when all types are legal.
11825 if (!DCI
.isAfterLegalizeDAG())
11828 // If all users of SETCC extend its value to a legal integer type
11829 // then we replace SETCC with a subtraction
11830 for (SDNode::use_iterator UI
= N
->use_begin(),
11831 UE
= N
->use_end(); UI
!= UE
; ++UI
) {
11832 if (UI
->getOpcode() != ISD::ZERO_EXTEND
)
11836 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
11837 auto OpSize
= N
->getOperand(0).getValueSizeInBits();
11839 unsigned Size
= DAG
.getDataLayout().getLargestLegalIntTypeSizeInBits();
11841 if (OpSize
< Size
) {
11845 return generateEquivalentSub(N
, Size
, false, false, DL
, DAG
);
11847 return generateEquivalentSub(N
, Size
, true, true, DL
, DAG
);
11849 return generateEquivalentSub(N
, Size
, false, true, DL
, DAG
);
11851 return generateEquivalentSub(N
, Size
, true, false, DL
, DAG
);
11858 SDValue
PPCTargetLowering::DAGCombineTruncBoolExt(SDNode
*N
,
11859 DAGCombinerInfo
&DCI
) const {
11860 SelectionDAG
&DAG
= DCI
.DAG
;
11863 assert(Subtarget
.useCRBits() && "Expecting to be tracking CR bits");
11864 // If we're tracking CR bits, we need to be careful that we don't have:
11865 // trunc(binary-ops(zext(x), zext(y)))
11867 // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
11868 // such that we're unnecessarily moving things into GPRs when it would be
11869 // better to keep them in CR bits.
11871 // Note that trunc here can be an actual i1 trunc, or can be the effective
11872 // truncation that comes from a setcc or select_cc.
11873 if (N
->getOpcode() == ISD::TRUNCATE
&&
11874 N
->getValueType(0) != MVT::i1
)
11877 if (N
->getOperand(0).getValueType() != MVT::i32
&&
11878 N
->getOperand(0).getValueType() != MVT::i64
)
11881 if (N
->getOpcode() == ISD::SETCC
||
11882 N
->getOpcode() == ISD::SELECT_CC
) {
11883 // If we're looking at a comparison, then we need to make sure that the
11884 // high bits (all except for the first) don't matter the result.
11886 cast
<CondCodeSDNode
>(N
->getOperand(
11887 N
->getOpcode() == ISD::SETCC
? 2 : 4))->get();
11888 unsigned OpBits
= N
->getOperand(0).getValueSizeInBits();
11890 if (ISD::isSignedIntSetCC(CC
)) {
11891 if (DAG
.ComputeNumSignBits(N
->getOperand(0)) != OpBits
||
11892 DAG
.ComputeNumSignBits(N
->getOperand(1)) != OpBits
)
11894 } else if (ISD::isUnsignedIntSetCC(CC
)) {
11895 if (!DAG
.MaskedValueIsZero(N
->getOperand(0),
11896 APInt::getHighBitsSet(OpBits
, OpBits
-1)) ||
11897 !DAG
.MaskedValueIsZero(N
->getOperand(1),
11898 APInt::getHighBitsSet(OpBits
, OpBits
-1)))
11899 return (N
->getOpcode() == ISD::SETCC
? ConvertSETCCToSubtract(N
, DCI
)
11902 // This is neither a signed nor an unsigned comparison, just make sure
11903 // that the high bits are equal.
11904 KnownBits Op1Known
= DAG
.computeKnownBits(N
->getOperand(0));
11905 KnownBits Op2Known
= DAG
.computeKnownBits(N
->getOperand(1));
11907 // We don't really care about what is known about the first bit (if
11908 // anything), so clear it in all masks prior to comparing them.
11909 Op1Known
.Zero
.clearBit(0); Op1Known
.One
.clearBit(0);
11910 Op2Known
.Zero
.clearBit(0); Op2Known
.One
.clearBit(0);
11912 if (Op1Known
.Zero
!= Op2Known
.Zero
|| Op1Known
.One
!= Op2Known
.One
)
11917 // We now know that the higher-order bits are irrelevant, we just need to
11918 // make sure that all of the intermediate operations are bit operations, and
11919 // all inputs are extensions.
11920 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
11921 N
->getOperand(0).getOpcode() != ISD::OR
&&
11922 N
->getOperand(0).getOpcode() != ISD::XOR
&&
11923 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
11924 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
&&
11925 N
->getOperand(0).getOpcode() != ISD::TRUNCATE
&&
11926 N
->getOperand(0).getOpcode() != ISD::SIGN_EXTEND
&&
11927 N
->getOperand(0).getOpcode() != ISD::ZERO_EXTEND
&&
11928 N
->getOperand(0).getOpcode() != ISD::ANY_EXTEND
)
11931 if ((N
->getOpcode() == ISD::SETCC
|| N
->getOpcode() == ISD::SELECT_CC
) &&
11932 N
->getOperand(1).getOpcode() != ISD::AND
&&
11933 N
->getOperand(1).getOpcode() != ISD::OR
&&
11934 N
->getOperand(1).getOpcode() != ISD::XOR
&&
11935 N
->getOperand(1).getOpcode() != ISD::SELECT
&&
11936 N
->getOperand(1).getOpcode() != ISD::SELECT_CC
&&
11937 N
->getOperand(1).getOpcode() != ISD::TRUNCATE
&&
11938 N
->getOperand(1).getOpcode() != ISD::SIGN_EXTEND
&&
11939 N
->getOperand(1).getOpcode() != ISD::ZERO_EXTEND
&&
11940 N
->getOperand(1).getOpcode() != ISD::ANY_EXTEND
)
11943 SmallVector
<SDValue
, 4> Inputs
;
11944 SmallVector
<SDValue
, 8> BinOps
, PromOps
;
11945 SmallPtrSet
<SDNode
*, 16> Visited
;
11947 for (unsigned i
= 0; i
< 2; ++i
) {
11948 if (((N
->getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
11949 N
->getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
11950 N
->getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
11951 N
->getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
11952 isa
<ConstantSDNode
>(N
->getOperand(i
)))
11953 Inputs
.push_back(N
->getOperand(i
));
11955 BinOps
.push_back(N
->getOperand(i
));
11957 if (N
->getOpcode() == ISD::TRUNCATE
)
11961 // Visit all inputs, collect all binary operations (and, or, xor and
11962 // select) that are all fed by extensions.
11963 while (!BinOps
.empty()) {
11964 SDValue BinOp
= BinOps
.back();
11967 if (!Visited
.insert(BinOp
.getNode()).second
)
11970 PromOps
.push_back(BinOp
);
11972 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
11973 // The condition of the select is not promoted.
11974 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
11976 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
11979 if (((BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
11980 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
11981 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) &&
11982 BinOp
.getOperand(i
).getOperand(0).getValueType() == MVT::i1
) ||
11983 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
11984 Inputs
.push_back(BinOp
.getOperand(i
));
11985 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
11986 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
11987 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
11988 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
11989 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
||
11990 BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
11991 BinOp
.getOperand(i
).getOpcode() == ISD::SIGN_EXTEND
||
11992 BinOp
.getOperand(i
).getOpcode() == ISD::ZERO_EXTEND
||
11993 BinOp
.getOperand(i
).getOpcode() == ISD::ANY_EXTEND
) {
11994 BinOps
.push_back(BinOp
.getOperand(i
));
11996 // We have an input that is not an extension or another binary
11997 // operation; we'll abort this transformation.
12003 // Make sure that this is a self-contained cluster of operations (which
12004 // is not quite the same thing as saying that everything has only one
12006 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12007 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12010 for (SDNode::use_iterator UI
= Inputs
[i
].getNode()->use_begin(),
12011 UE
= Inputs
[i
].getNode()->use_end();
12013 SDNode
*User
= *UI
;
12014 if (User
!= N
&& !Visited
.count(User
))
12017 // Make sure that we're not going to promote the non-output-value
12018 // operand(s) or SELECT or SELECT_CC.
12019 // FIXME: Although we could sometimes handle this, and it does occur in
12020 // practice that one of the condition inputs to the select is also one of
12021 // the outputs, we currently can't deal with this.
12022 if (User
->getOpcode() == ISD::SELECT
) {
12023 if (User
->getOperand(0) == Inputs
[i
])
12025 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12026 if (User
->getOperand(0) == Inputs
[i
] ||
12027 User
->getOperand(1) == Inputs
[i
])
12033 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
12034 for (SDNode::use_iterator UI
= PromOps
[i
].getNode()->use_begin(),
12035 UE
= PromOps
[i
].getNode()->use_end();
12037 SDNode
*User
= *UI
;
12038 if (User
!= N
&& !Visited
.count(User
))
12041 // Make sure that we're not going to promote the non-output-value
12042 // operand(s) or SELECT or SELECT_CC.
12043 // FIXME: Although we could sometimes handle this, and it does occur in
12044 // practice that one of the condition inputs to the select is also one of
12045 // the outputs, we currently can't deal with this.
12046 if (User
->getOpcode() == ISD::SELECT
) {
12047 if (User
->getOperand(0) == PromOps
[i
])
12049 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12050 if (User
->getOperand(0) == PromOps
[i
] ||
12051 User
->getOperand(1) == PromOps
[i
])
12057 // Replace all inputs with the extension operand.
12058 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12059 // Constants may have users outside the cluster of to-be-promoted nodes,
12060 // and so we need to replace those as we do the promotions.
12061 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12064 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], Inputs
[i
].getOperand(0));
12067 std::list
<HandleSDNode
> PromOpHandles
;
12068 for (auto &PromOp
: PromOps
)
12069 PromOpHandles
.emplace_back(PromOp
);
12071 // Replace all operations (these are all the same, but have a different
12072 // (i1) return type). DAG.getNode will validate that the types of
12073 // a binary operator match, so go through the list in reverse so that
12074 // we've likely promoted both operands first. Any intermediate truncations or
12075 // extensions disappear.
12076 while (!PromOpHandles
.empty()) {
12077 SDValue PromOp
= PromOpHandles
.back().getValue();
12078 PromOpHandles
.pop_back();
12080 if (PromOp
.getOpcode() == ISD::TRUNCATE
||
12081 PromOp
.getOpcode() == ISD::SIGN_EXTEND
||
12082 PromOp
.getOpcode() == ISD::ZERO_EXTEND
||
12083 PromOp
.getOpcode() == ISD::ANY_EXTEND
) {
12084 if (!isa
<ConstantSDNode
>(PromOp
.getOperand(0)) &&
12085 PromOp
.getOperand(0).getValueType() != MVT::i1
) {
12086 // The operand is not yet ready (see comment below).
12087 PromOpHandles
.emplace_front(PromOp
);
12091 SDValue RepValue
= PromOp
.getOperand(0);
12092 if (isa
<ConstantSDNode
>(RepValue
))
12093 RepValue
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, RepValue
);
12095 DAG
.ReplaceAllUsesOfValueWith(PromOp
, RepValue
);
12100 switch (PromOp
.getOpcode()) {
12101 default: C
= 0; break;
12102 case ISD::SELECT
: C
= 1; break;
12103 case ISD::SELECT_CC
: C
= 2; break;
12106 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
12107 PromOp
.getOperand(C
).getValueType() != MVT::i1
) ||
12108 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
12109 PromOp
.getOperand(C
+1).getValueType() != MVT::i1
)) {
12110 // The to-be-promoted operands of this node have not yet been
12111 // promoted (this should be rare because we're going through the
12112 // list backward, but if one of the operands has several users in
12113 // this cluster of to-be-promoted nodes, it is possible).
12114 PromOpHandles
.emplace_front(PromOp
);
12118 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
12119 PromOp
.getNode()->op_end());
12121 // If there are any constant inputs, make sure they're replaced now.
12122 for (unsigned i
= 0; i
< 2; ++i
)
12123 if (isa
<ConstantSDNode
>(Ops
[C
+i
]))
12124 Ops
[C
+i
] = DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i1
, Ops
[C
+i
]);
12126 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
12127 DAG
.getNode(PromOp
.getOpcode(), dl
, MVT::i1
, Ops
));
12130 // Now we're left with the initial truncation itself.
12131 if (N
->getOpcode() == ISD::TRUNCATE
)
12132 return N
->getOperand(0);
12134 // Otherwise, this is a comparison. The operands to be compared have just
12135 // changed type (to i1), but everything else is the same.
12136 return SDValue(N
, 0);
12139 SDValue
PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode
*N
,
12140 DAGCombinerInfo
&DCI
) const {
12141 SelectionDAG
&DAG
= DCI
.DAG
;
12144 // If we're tracking CR bits, we need to be careful that we don't have:
12145 // zext(binary-ops(trunc(x), trunc(y)))
12147 // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
12148 // such that we're unnecessarily moving things into CR bits that can more
12149 // efficiently stay in GPRs. Note that if we're not certain that the high
12150 // bits are set as required by the final extension, we still may need to do
12151 // some masking to get the proper behavior.
12153 // This same functionality is important on PPC64 when dealing with
12154 // 32-to-64-bit extensions; these occur often when 32-bit values are used as
12155 // the return values of functions. Because it is so similar, it is handled
12158 if (N
->getValueType(0) != MVT::i32
&&
12159 N
->getValueType(0) != MVT::i64
)
12162 if (!((N
->getOperand(0).getValueType() == MVT::i1
&& Subtarget
.useCRBits()) ||
12163 (N
->getOperand(0).getValueType() == MVT::i32
&& Subtarget
.isPPC64())))
12166 if (N
->getOperand(0).getOpcode() != ISD::AND
&&
12167 N
->getOperand(0).getOpcode() != ISD::OR
&&
12168 N
->getOperand(0).getOpcode() != ISD::XOR
&&
12169 N
->getOperand(0).getOpcode() != ISD::SELECT
&&
12170 N
->getOperand(0).getOpcode() != ISD::SELECT_CC
)
12173 SmallVector
<SDValue
, 4> Inputs
;
12174 SmallVector
<SDValue
, 8> BinOps(1, N
->getOperand(0)), PromOps
;
12175 SmallPtrSet
<SDNode
*, 16> Visited
;
12177 // Visit all inputs, collect all binary operations (and, or, xor and
12178 // select) that are all fed by truncations.
12179 while (!BinOps
.empty()) {
12180 SDValue BinOp
= BinOps
.back();
12183 if (!Visited
.insert(BinOp
.getNode()).second
)
12186 PromOps
.push_back(BinOp
);
12188 for (unsigned i
= 0, ie
= BinOp
.getNumOperands(); i
!= ie
; ++i
) {
12189 // The condition of the select is not promoted.
12190 if (BinOp
.getOpcode() == ISD::SELECT
&& i
== 0)
12192 if (BinOp
.getOpcode() == ISD::SELECT_CC
&& i
!= 2 && i
!= 3)
12195 if (BinOp
.getOperand(i
).getOpcode() == ISD::TRUNCATE
||
12196 isa
<ConstantSDNode
>(BinOp
.getOperand(i
))) {
12197 Inputs
.push_back(BinOp
.getOperand(i
));
12198 } else if (BinOp
.getOperand(i
).getOpcode() == ISD::AND
||
12199 BinOp
.getOperand(i
).getOpcode() == ISD::OR
||
12200 BinOp
.getOperand(i
).getOpcode() == ISD::XOR
||
12201 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT
||
12202 BinOp
.getOperand(i
).getOpcode() == ISD::SELECT_CC
) {
12203 BinOps
.push_back(BinOp
.getOperand(i
));
12205 // We have an input that is not a truncation or another binary
12206 // operation; we'll abort this transformation.
12212 // The operands of a select that must be truncated when the select is
12213 // promoted because the operand is actually part of the to-be-promoted set.
12214 DenseMap
<SDNode
*, EVT
> SelectTruncOp
[2];
12216 // Make sure that this is a self-contained cluster of operations (which
12217 // is not quite the same thing as saying that everything has only one
12219 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12220 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12223 for (SDNode::use_iterator UI
= Inputs
[i
].getNode()->use_begin(),
12224 UE
= Inputs
[i
].getNode()->use_end();
12226 SDNode
*User
= *UI
;
12227 if (User
!= N
&& !Visited
.count(User
))
12230 // If we're going to promote the non-output-value operand(s) or SELECT or
12231 // SELECT_CC, record them for truncation.
12232 if (User
->getOpcode() == ISD::SELECT
) {
12233 if (User
->getOperand(0) == Inputs
[i
])
12234 SelectTruncOp
[0].insert(std::make_pair(User
,
12235 User
->getOperand(0).getValueType()));
12236 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12237 if (User
->getOperand(0) == Inputs
[i
])
12238 SelectTruncOp
[0].insert(std::make_pair(User
,
12239 User
->getOperand(0).getValueType()));
12240 if (User
->getOperand(1) == Inputs
[i
])
12241 SelectTruncOp
[1].insert(std::make_pair(User
,
12242 User
->getOperand(1).getValueType()));
12247 for (unsigned i
= 0, ie
= PromOps
.size(); i
!= ie
; ++i
) {
12248 for (SDNode::use_iterator UI
= PromOps
[i
].getNode()->use_begin(),
12249 UE
= PromOps
[i
].getNode()->use_end();
12251 SDNode
*User
= *UI
;
12252 if (User
!= N
&& !Visited
.count(User
))
12255 // If we're going to promote the non-output-value operand(s) or SELECT or
12256 // SELECT_CC, record them for truncation.
12257 if (User
->getOpcode() == ISD::SELECT
) {
12258 if (User
->getOperand(0) == PromOps
[i
])
12259 SelectTruncOp
[0].insert(std::make_pair(User
,
12260 User
->getOperand(0).getValueType()));
12261 } else if (User
->getOpcode() == ISD::SELECT_CC
) {
12262 if (User
->getOperand(0) == PromOps
[i
])
12263 SelectTruncOp
[0].insert(std::make_pair(User
,
12264 User
->getOperand(0).getValueType()));
12265 if (User
->getOperand(1) == PromOps
[i
])
12266 SelectTruncOp
[1].insert(std::make_pair(User
,
12267 User
->getOperand(1).getValueType()));
12272 unsigned PromBits
= N
->getOperand(0).getValueSizeInBits();
12273 bool ReallyNeedsExt
= false;
12274 if (N
->getOpcode() != ISD::ANY_EXTEND
) {
12275 // If all of the inputs are not already sign/zero extended, then
12276 // we'll still need to do that at the end.
12277 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12278 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12282 Inputs
[i
].getOperand(0).getValueSizeInBits();
12283 assert(PromBits
< OpBits
&& "Truncation not to a smaller bit count?");
12285 if ((N
->getOpcode() == ISD::ZERO_EXTEND
&&
12286 !DAG
.MaskedValueIsZero(Inputs
[i
].getOperand(0),
12287 APInt::getHighBitsSet(OpBits
,
12288 OpBits
-PromBits
))) ||
12289 (N
->getOpcode() == ISD::SIGN_EXTEND
&&
12290 DAG
.ComputeNumSignBits(Inputs
[i
].getOperand(0)) <
12291 (OpBits
-(PromBits
-1)))) {
12292 ReallyNeedsExt
= true;
12298 // Replace all inputs, either with the truncation operand, or a
12299 // truncation or extension to the final output type.
12300 for (unsigned i
= 0, ie
= Inputs
.size(); i
!= ie
; ++i
) {
12301 // Constant inputs need to be replaced with the to-be-promoted nodes that
12302 // use them because they might have users outside of the cluster of
12304 if (isa
<ConstantSDNode
>(Inputs
[i
]))
12307 SDValue InSrc
= Inputs
[i
].getOperand(0);
12308 if (Inputs
[i
].getValueType() == N
->getValueType(0))
12309 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
], InSrc
);
12310 else if (N
->getOpcode() == ISD::SIGN_EXTEND
)
12311 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
12312 DAG
.getSExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
12313 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
12314 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
12315 DAG
.getZExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
12317 DAG
.ReplaceAllUsesOfValueWith(Inputs
[i
],
12318 DAG
.getAnyExtOrTrunc(InSrc
, dl
, N
->getValueType(0)));
12321 std::list
<HandleSDNode
> PromOpHandles
;
12322 for (auto &PromOp
: PromOps
)
12323 PromOpHandles
.emplace_back(PromOp
);
12325 // Replace all operations (these are all the same, but have a different
12326 // (promoted) return type). DAG.getNode will validate that the types of
12327 // a binary operator match, so go through the list in reverse so that
12328 // we've likely promoted both operands first.
12329 while (!PromOpHandles
.empty()) {
12330 SDValue PromOp
= PromOpHandles
.back().getValue();
12331 PromOpHandles
.pop_back();
12334 switch (PromOp
.getOpcode()) {
12335 default: C
= 0; break;
12336 case ISD::SELECT
: C
= 1; break;
12337 case ISD::SELECT_CC
: C
= 2; break;
12340 if ((!isa
<ConstantSDNode
>(PromOp
.getOperand(C
)) &&
12341 PromOp
.getOperand(C
).getValueType() != N
->getValueType(0)) ||
12342 (!isa
<ConstantSDNode
>(PromOp
.getOperand(C
+1)) &&
12343 PromOp
.getOperand(C
+1).getValueType() != N
->getValueType(0))) {
12344 // The to-be-promoted operands of this node have not yet been
12345 // promoted (this should be rare because we're going through the
12346 // list backward, but if one of the operands has several users in
12347 // this cluster of to-be-promoted nodes, it is possible).
12348 PromOpHandles
.emplace_front(PromOp
);
12352 // For SELECT and SELECT_CC nodes, we do a similar check for any
12353 // to-be-promoted comparison inputs.
12354 if (PromOp
.getOpcode() == ISD::SELECT
||
12355 PromOp
.getOpcode() == ISD::SELECT_CC
) {
12356 if ((SelectTruncOp
[0].count(PromOp
.getNode()) &&
12357 PromOp
.getOperand(0).getValueType() != N
->getValueType(0)) ||
12358 (SelectTruncOp
[1].count(PromOp
.getNode()) &&
12359 PromOp
.getOperand(1).getValueType() != N
->getValueType(0))) {
12360 PromOpHandles
.emplace_front(PromOp
);
12365 SmallVector
<SDValue
, 3> Ops(PromOp
.getNode()->op_begin(),
12366 PromOp
.getNode()->op_end());
12368 // If this node has constant inputs, then they'll need to be promoted here.
12369 for (unsigned i
= 0; i
< 2; ++i
) {
12370 if (!isa
<ConstantSDNode
>(Ops
[C
+i
]))
12372 if (Ops
[C
+i
].getValueType() == N
->getValueType(0))
12375 if (N
->getOpcode() == ISD::SIGN_EXTEND
)
12376 Ops
[C
+i
] = DAG
.getSExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
12377 else if (N
->getOpcode() == ISD::ZERO_EXTEND
)
12378 Ops
[C
+i
] = DAG
.getZExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
12380 Ops
[C
+i
] = DAG
.getAnyExtOrTrunc(Ops
[C
+i
], dl
, N
->getValueType(0));
12383 // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
12384 // truncate them again to the original value type.
12385 if (PromOp
.getOpcode() == ISD::SELECT
||
12386 PromOp
.getOpcode() == ISD::SELECT_CC
) {
12387 auto SI0
= SelectTruncOp
[0].find(PromOp
.getNode());
12388 if (SI0
!= SelectTruncOp
[0].end())
12389 Ops
[0] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI0
->second
, Ops
[0]);
12390 auto SI1
= SelectTruncOp
[1].find(PromOp
.getNode());
12391 if (SI1
!= SelectTruncOp
[1].end())
12392 Ops
[1] = DAG
.getNode(ISD::TRUNCATE
, dl
, SI1
->second
, Ops
[1]);
12395 DAG
.ReplaceAllUsesOfValueWith(PromOp
,
12396 DAG
.getNode(PromOp
.getOpcode(), dl
, N
->getValueType(0), Ops
));
12399 // Now we're left with the initial extension itself.
12400 if (!ReallyNeedsExt
)
12401 return N
->getOperand(0);
12403 // To zero extend, just mask off everything except for the first bit (in the
12405 if (N
->getOpcode() == ISD::ZERO_EXTEND
)
12406 return DAG
.getNode(ISD::AND
, dl
, N
->getValueType(0), N
->getOperand(0),
12407 DAG
.getConstant(APInt::getLowBitsSet(
12408 N
->getValueSizeInBits(0), PromBits
),
12409 dl
, N
->getValueType(0)));
12411 assert(N
->getOpcode() == ISD::SIGN_EXTEND
&&
12412 "Invalid extension type");
12413 EVT ShiftAmountTy
= getShiftAmountTy(N
->getValueType(0), DAG
.getDataLayout());
12415 DAG
.getConstant(N
->getValueSizeInBits(0) - PromBits
, dl
, ShiftAmountTy
);
12416 return DAG
.getNode(
12417 ISD::SRA
, dl
, N
->getValueType(0),
12418 DAG
.getNode(ISD::SHL
, dl
, N
->getValueType(0), N
->getOperand(0), ShiftCst
),
12422 SDValue
PPCTargetLowering::combineSetCC(SDNode
*N
,
12423 DAGCombinerInfo
&DCI
) const {
12424 assert(N
->getOpcode() == ISD::SETCC
&&
12425 "Should be called with a SETCC node");
12427 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(2))->get();
12428 if (CC
== ISD::SETNE
|| CC
== ISD::SETEQ
) {
12429 SDValue LHS
= N
->getOperand(0);
12430 SDValue RHS
= N
->getOperand(1);
12432 // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
12433 if (LHS
.getOpcode() == ISD::SUB
&& isNullConstant(LHS
.getOperand(0)) &&
12435 std::swap(LHS
, RHS
);
12437 // x == 0-y --> x+y == 0
12438 // x != 0-y --> x+y != 0
12439 if (RHS
.getOpcode() == ISD::SUB
&& isNullConstant(RHS
.getOperand(0)) &&
12442 SelectionDAG
&DAG
= DCI
.DAG
;
12443 EVT VT
= N
->getValueType(0);
12444 EVT OpVT
= LHS
.getValueType();
12445 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, OpVT
, LHS
, RHS
.getOperand(1));
12446 return DAG
.getSetCC(DL
, VT
, Add
, DAG
.getConstant(0, DL
, OpVT
), CC
);
12450 return DAGCombineTruncBoolExt(N
, DCI
);
12453 // Is this an extending load from an f32 to an f64?
12454 static bool isFPExtLoad(SDValue Op
) {
12455 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Op
.getNode()))
12456 return LD
->getExtensionType() == ISD::EXTLOAD
&&
12457 Op
.getValueType() == MVT::f64
;
12461 /// Reduces the number of fp-to-int conversion when building a vector.
12463 /// If this vector is built out of floating to integer conversions,
12464 /// transform it to a vector built out of floating point values followed by a
12465 /// single floating to integer conversion of the vector.
12466 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
12467 /// becomes (fptosi (build_vector ($A, $B, ...)))
12468 SDValue
PPCTargetLowering::
12469 combineElementTruncationToVectorTruncation(SDNode
*N
,
12470 DAGCombinerInfo
&DCI
) const {
12471 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12472 "Should be called with a BUILD_VECTOR node");
12474 SelectionDAG
&DAG
= DCI
.DAG
;
12477 SDValue FirstInput
= N
->getOperand(0);
12478 assert(FirstInput
.getOpcode() == PPCISD::MFVSR
&&
12479 "The input operand must be an fp-to-int conversion.");
12481 // This combine happens after legalization so the fp_to_[su]i nodes are
12482 // already converted to PPCSISD nodes.
12483 unsigned FirstConversion
= FirstInput
.getOperand(0).getOpcode();
12484 if (FirstConversion
== PPCISD::FCTIDZ
||
12485 FirstConversion
== PPCISD::FCTIDUZ
||
12486 FirstConversion
== PPCISD::FCTIWZ
||
12487 FirstConversion
== PPCISD::FCTIWUZ
) {
12488 bool IsSplat
= true;
12489 bool Is32Bit
= FirstConversion
== PPCISD::FCTIWZ
||
12490 FirstConversion
== PPCISD::FCTIWUZ
;
12491 EVT SrcVT
= FirstInput
.getOperand(0).getValueType();
12492 SmallVector
<SDValue
, 4> Ops
;
12493 EVT TargetVT
= N
->getValueType(0);
12494 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
12495 SDValue NextOp
= N
->getOperand(i
);
12496 if (NextOp
.getOpcode() != PPCISD::MFVSR
)
12498 unsigned NextConversion
= NextOp
.getOperand(0).getOpcode();
12499 if (NextConversion
!= FirstConversion
)
12501 // If we are converting to 32-bit integers, we need to add an FP_ROUND.
12502 // This is not valid if the input was originally double precision. It is
12503 // also not profitable to do unless this is an extending load in which
12504 // case doing this combine will allow us to combine consecutive loads.
12505 if (Is32Bit
&& !isFPExtLoad(NextOp
.getOperand(0).getOperand(0)))
12507 if (N
->getOperand(i
) != FirstInput
)
12511 // If this is a splat, we leave it as-is since there will be only a single
12512 // fp-to-int conversion followed by a splat of the integer. This is better
12513 // for 32-bit and smaller ints and neutral for 64-bit ints.
12517 // Now that we know we have the right type of node, get its operands
12518 for (int i
= 0, e
= N
->getNumOperands(); i
< e
; ++i
) {
12519 SDValue In
= N
->getOperand(i
).getOperand(0);
12521 // For 32-bit values, we need to add an FP_ROUND node (if we made it
12522 // here, we know that all inputs are extending loads so this is safe).
12524 Ops
.push_back(DAG
.getUNDEF(SrcVT
));
12526 SDValue Trunc
= DAG
.getNode(ISD::FP_ROUND
, dl
,
12527 MVT::f32
, In
.getOperand(0),
12528 DAG
.getIntPtrConstant(1, dl
));
12529 Ops
.push_back(Trunc
);
12532 Ops
.push_back(In
.isUndef() ? DAG
.getUNDEF(SrcVT
) : In
.getOperand(0));
12536 if (FirstConversion
== PPCISD::FCTIDZ
||
12537 FirstConversion
== PPCISD::FCTIWZ
)
12538 Opcode
= ISD::FP_TO_SINT
;
12540 Opcode
= ISD::FP_TO_UINT
;
12542 EVT NewVT
= TargetVT
== MVT::v2i64
? MVT::v2f64
: MVT::v4f32
;
12543 SDValue BV
= DAG
.getBuildVector(NewVT
, dl
, Ops
);
12544 return DAG
.getNode(Opcode
, dl
, TargetVT
, BV
);
12549 /// Reduce the number of loads when building a vector.
12551 /// Building a vector out of multiple loads can be converted to a load
12552 /// of the vector type if the loads are consecutive. If the loads are
12553 /// consecutive but in descending order, a shuffle is added at the end
12554 /// to reorder the vector.
12555 static SDValue
combineBVOfConsecutiveLoads(SDNode
*N
, SelectionDAG
&DAG
) {
12556 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12557 "Should be called with a BUILD_VECTOR node");
12561 // Return early for non byte-sized type, as they can't be consecutive.
12562 if (!N
->getValueType(0).getVectorElementType().isByteSized())
12565 bool InputsAreConsecutiveLoads
= true;
12566 bool InputsAreReverseConsecutive
= true;
12567 unsigned ElemSize
= N
->getValueType(0).getScalarType().getStoreSize();
12568 SDValue FirstInput
= N
->getOperand(0);
12569 bool IsRoundOfExtLoad
= false;
12571 if (FirstInput
.getOpcode() == ISD::FP_ROUND
&&
12572 FirstInput
.getOperand(0).getOpcode() == ISD::LOAD
) {
12573 LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(FirstInput
.getOperand(0));
12574 IsRoundOfExtLoad
= LD
->getExtensionType() == ISD::EXTLOAD
;
12576 // Not a build vector of (possibly fp_rounded) loads.
12577 if ((!IsRoundOfExtLoad
&& FirstInput
.getOpcode() != ISD::LOAD
) ||
12578 N
->getNumOperands() == 1)
12581 for (int i
= 1, e
= N
->getNumOperands(); i
< e
; ++i
) {
12582 // If any inputs are fp_round(extload), they all must be.
12583 if (IsRoundOfExtLoad
&& N
->getOperand(i
).getOpcode() != ISD::FP_ROUND
)
12586 SDValue NextInput
= IsRoundOfExtLoad
? N
->getOperand(i
).getOperand(0) :
12588 if (NextInput
.getOpcode() != ISD::LOAD
)
12591 SDValue PreviousInput
=
12592 IsRoundOfExtLoad
? N
->getOperand(i
-1).getOperand(0) : N
->getOperand(i
-1);
12593 LoadSDNode
*LD1
= dyn_cast
<LoadSDNode
>(PreviousInput
);
12594 LoadSDNode
*LD2
= dyn_cast
<LoadSDNode
>(NextInput
);
12596 // If any inputs are fp_round(extload), they all must be.
12597 if (IsRoundOfExtLoad
&& LD2
->getExtensionType() != ISD::EXTLOAD
)
12600 if (!isConsecutiveLS(LD2
, LD1
, ElemSize
, 1, DAG
))
12601 InputsAreConsecutiveLoads
= false;
12602 if (!isConsecutiveLS(LD1
, LD2
, ElemSize
, 1, DAG
))
12603 InputsAreReverseConsecutive
= false;
12605 // Exit early if the loads are neither consecutive nor reverse consecutive.
12606 if (!InputsAreConsecutiveLoads
&& !InputsAreReverseConsecutive
)
12610 assert(!(InputsAreConsecutiveLoads
&& InputsAreReverseConsecutive
) &&
12611 "The loads cannot be both consecutive and reverse consecutive.");
12613 SDValue FirstLoadOp
=
12614 IsRoundOfExtLoad
? FirstInput
.getOperand(0) : FirstInput
;
12615 SDValue LastLoadOp
=
12616 IsRoundOfExtLoad
? N
->getOperand(N
->getNumOperands()-1).getOperand(0) :
12617 N
->getOperand(N
->getNumOperands()-1);
12619 LoadSDNode
*LD1
= dyn_cast
<LoadSDNode
>(FirstLoadOp
);
12620 LoadSDNode
*LDL
= dyn_cast
<LoadSDNode
>(LastLoadOp
);
12621 if (InputsAreConsecutiveLoads
) {
12622 assert(LD1
&& "Input needs to be a LoadSDNode.");
12623 return DAG
.getLoad(N
->getValueType(0), dl
, LD1
->getChain(),
12624 LD1
->getBasePtr(), LD1
->getPointerInfo(),
12625 LD1
->getAlignment());
12627 if (InputsAreReverseConsecutive
) {
12628 assert(LDL
&& "Input needs to be a LoadSDNode.");
12629 SDValue Load
= DAG
.getLoad(N
->getValueType(0), dl
, LDL
->getChain(),
12630 LDL
->getBasePtr(), LDL
->getPointerInfo(),
12631 LDL
->getAlignment());
12632 SmallVector
<int, 16> Ops
;
12633 for (int i
= N
->getNumOperands() - 1; i
>= 0; i
--)
12636 return DAG
.getVectorShuffle(N
->getValueType(0), dl
, Load
,
12637 DAG
.getUNDEF(N
->getValueType(0)), Ops
);
12642 // This function adds the required vector_shuffle needed to get
12643 // the elements of the vector extract in the correct position
12644 // as specified by the CorrectElems encoding.
12645 static SDValue
addShuffleForVecExtend(SDNode
*N
, SelectionDAG
&DAG
,
12646 SDValue Input
, uint64_t Elems
,
12647 uint64_t CorrectElems
) {
12650 unsigned NumElems
= Input
.getValueType().getVectorNumElements();
12651 SmallVector
<int, 16> ShuffleMask(NumElems
, -1);
12653 // Knowing the element indices being extracted from the original
12654 // vector and the order in which they're being inserted, just put
12655 // them at element indices required for the instruction.
12656 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
12657 if (DAG
.getDataLayout().isLittleEndian())
12658 ShuffleMask
[CorrectElems
& 0xF] = Elems
& 0xF;
12660 ShuffleMask
[(CorrectElems
& 0xF0) >> 4] = (Elems
& 0xF0) >> 4;
12661 CorrectElems
= CorrectElems
>> 8;
12662 Elems
= Elems
>> 8;
12666 DAG
.getVectorShuffle(Input
.getValueType(), dl
, Input
,
12667 DAG
.getUNDEF(Input
.getValueType()), ShuffleMask
);
12669 EVT Ty
= N
->getValueType(0);
12670 SDValue BV
= DAG
.getNode(PPCISD::SExtVElems
, dl
, Ty
, Shuffle
);
12674 // Look for build vector patterns where input operands come from sign
12675 // extended vector_extract elements of specific indices. If the correct indices
12676 // aren't used, add a vector shuffle to fix up the indices and create a new
12677 // PPCISD:SExtVElems node which selects the vector sign extend instructions
12678 // during instruction selection.
12679 static SDValue
combineBVOfVecSExt(SDNode
*N
, SelectionDAG
&DAG
) {
12680 // This array encodes the indices that the vector sign extend instructions
12681 // extract from when extending from one type to another for both BE and LE.
12682 // The right nibble of each byte corresponds to the LE incides.
12683 // and the left nibble of each byte corresponds to the BE incides.
12684 // For example: 0x3074B8FC byte->word
12685 // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
12686 // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
12687 // For example: 0x000070F8 byte->double word
12688 // For LE: the allowed indices are: 0x0,0x8
12689 // For BE: the allowed indices are: 0x7,0xF
12690 uint64_t TargetElems
[] = {
12691 0x3074B8FC, // b->w
12692 0x000070F8, // b->d
12693 0x10325476, // h->w
12694 0x00003074, // h->d
12695 0x00001032, // w->d
12698 uint64_t Elems
= 0;
12702 auto isSExtOfVecExtract
= [&](SDValue Op
) -> bool {
12705 if (Op
.getOpcode() != ISD::SIGN_EXTEND
&&
12706 Op
.getOpcode() != ISD::SIGN_EXTEND_INREG
)
12709 // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
12710 // of the right width.
12711 SDValue Extract
= Op
.getOperand(0);
12712 if (Extract
.getOpcode() == ISD::ANY_EXTEND
)
12713 Extract
= Extract
.getOperand(0);
12714 if (Extract
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
12717 ConstantSDNode
*ExtOp
= dyn_cast
<ConstantSDNode
>(Extract
.getOperand(1));
12721 Index
= ExtOp
->getZExtValue();
12722 if (Input
&& Input
!= Extract
.getOperand(0))
12726 Input
= Extract
.getOperand(0);
12728 Elems
= Elems
<< 8;
12729 Index
= DAG
.getDataLayout().isLittleEndian() ? Index
: Index
<< 4;
12735 // If the build vector operands aren't sign extended vector extracts,
12736 // of the same input vector, then return.
12737 for (unsigned i
= 0; i
< N
->getNumOperands(); i
++) {
12738 if (!isSExtOfVecExtract(N
->getOperand(i
))) {
12743 // If the vector extract indicies are not correct, add the appropriate
12745 int TgtElemArrayIdx
;
12746 int InputSize
= Input
.getValueType().getScalarSizeInBits();
12747 int OutputSize
= N
->getValueType(0).getScalarSizeInBits();
12748 if (InputSize
+ OutputSize
== 40)
12749 TgtElemArrayIdx
= 0;
12750 else if (InputSize
+ OutputSize
== 72)
12751 TgtElemArrayIdx
= 1;
12752 else if (InputSize
+ OutputSize
== 48)
12753 TgtElemArrayIdx
= 2;
12754 else if (InputSize
+ OutputSize
== 80)
12755 TgtElemArrayIdx
= 3;
12756 else if (InputSize
+ OutputSize
== 96)
12757 TgtElemArrayIdx
= 4;
12761 uint64_t CorrectElems
= TargetElems
[TgtElemArrayIdx
];
12762 CorrectElems
= DAG
.getDataLayout().isLittleEndian()
12763 ? CorrectElems
& 0x0F0F0F0F0F0F0F0F
12764 : CorrectElems
& 0xF0F0F0F0F0F0F0F0;
12765 if (Elems
!= CorrectElems
) {
12766 return addShuffleForVecExtend(N
, DAG
, Input
, Elems
, CorrectElems
);
12769 // Regular lowering will catch cases where a shuffle is not needed.
12773 SDValue
PPCTargetLowering::DAGCombineBuildVector(SDNode
*N
,
12774 DAGCombinerInfo
&DCI
) const {
12775 assert(N
->getOpcode() == ISD::BUILD_VECTOR
&&
12776 "Should be called with a BUILD_VECTOR node");
12778 SelectionDAG
&DAG
= DCI
.DAG
;
12781 if (!Subtarget
.hasVSX())
12784 // The target independent DAG combiner will leave a build_vector of
12785 // float-to-int conversions intact. We can generate MUCH better code for
12786 // a float-to-int conversion of a vector of floats.
12787 SDValue FirstInput
= N
->getOperand(0);
12788 if (FirstInput
.getOpcode() == PPCISD::MFVSR
) {
12789 SDValue Reduced
= combineElementTruncationToVectorTruncation(N
, DCI
);
12794 // If we're building a vector out of consecutive loads, just load that
12796 SDValue Reduced
= combineBVOfConsecutiveLoads(N
, DAG
);
12800 // If we're building a vector out of extended elements from another vector
12801 // we have P9 vector integer extend instructions. The code assumes legal
12802 // input types (i.e. it can't handle things like v4i16) so do not run before
12804 if (Subtarget
.hasP9Altivec() && !DCI
.isBeforeLegalize()) {
12805 Reduced
= combineBVOfVecSExt(N
, DAG
);
12811 if (N
->getValueType(0) != MVT::v2f64
)
12815 // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
12816 if (FirstInput
.getOpcode() != ISD::SINT_TO_FP
&&
12817 FirstInput
.getOpcode() != ISD::UINT_TO_FP
)
12819 if (N
->getOperand(1).getOpcode() != ISD::SINT_TO_FP
&&
12820 N
->getOperand(1).getOpcode() != ISD::UINT_TO_FP
)
12822 if (FirstInput
.getOpcode() != N
->getOperand(1).getOpcode())
12825 SDValue Ext1
= FirstInput
.getOperand(0);
12826 SDValue Ext2
= N
->getOperand(1).getOperand(0);
12827 if(Ext1
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
||
12828 Ext2
.getOpcode() != ISD::EXTRACT_VECTOR_ELT
)
12831 ConstantSDNode
*Ext1Op
= dyn_cast
<ConstantSDNode
>(Ext1
.getOperand(1));
12832 ConstantSDNode
*Ext2Op
= dyn_cast
<ConstantSDNode
>(Ext2
.getOperand(1));
12833 if (!Ext1Op
|| !Ext2Op
)
12835 if (Ext1
.getOperand(0).getValueType() != MVT::v4i32
||
12836 Ext1
.getOperand(0) != Ext2
.getOperand(0))
12839 int FirstElem
= Ext1Op
->getZExtValue();
12840 int SecondElem
= Ext2Op
->getZExtValue();
12842 if (FirstElem
== 0 && SecondElem
== 1)
12843 SubvecIdx
= Subtarget
.isLittleEndian() ? 1 : 0;
12844 else if (FirstElem
== 2 && SecondElem
== 3)
12845 SubvecIdx
= Subtarget
.isLittleEndian() ? 0 : 1;
12849 SDValue SrcVec
= Ext1
.getOperand(0);
12850 auto NodeType
= (N
->getOperand(1).getOpcode() == ISD::SINT_TO_FP
) ?
12851 PPCISD::SINT_VEC_TO_FP
: PPCISD::UINT_VEC_TO_FP
;
12852 return DAG
.getNode(NodeType
, dl
, MVT::v2f64
,
12853 SrcVec
, DAG
.getIntPtrConstant(SubvecIdx
, dl
));
12856 SDValue
PPCTargetLowering::combineFPToIntToFP(SDNode
*N
,
12857 DAGCombinerInfo
&DCI
) const {
12858 assert((N
->getOpcode() == ISD::SINT_TO_FP
||
12859 N
->getOpcode() == ISD::UINT_TO_FP
) &&
12860 "Need an int -> FP conversion node here");
12862 if (useSoftFloat() || !Subtarget
.has64BitSupport())
12865 SelectionDAG
&DAG
= DCI
.DAG
;
12869 // Don't handle ppc_fp128 here or conversions that are out-of-range capable
12870 // from the hardware.
12871 if (Op
.getValueType() != MVT::f32
&& Op
.getValueType() != MVT::f64
)
12873 if (Op
.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1
) ||
12874 Op
.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64
))
12877 SDValue
FirstOperand(Op
.getOperand(0));
12878 bool SubWordLoad
= FirstOperand
.getOpcode() == ISD::LOAD
&&
12879 (FirstOperand
.getValueType() == MVT::i8
||
12880 FirstOperand
.getValueType() == MVT::i16
);
12881 if (Subtarget
.hasP9Vector() && Subtarget
.hasP9Altivec() && SubWordLoad
) {
12882 bool Signed
= N
->getOpcode() == ISD::SINT_TO_FP
;
12883 bool DstDouble
= Op
.getValueType() == MVT::f64
;
12884 unsigned ConvOp
= Signed
?
12885 (DstDouble
? PPCISD::FCFID
: PPCISD::FCFIDS
) :
12886 (DstDouble
? PPCISD::FCFIDU
: PPCISD::FCFIDUS
);
12887 SDValue WidthConst
=
12888 DAG
.getIntPtrConstant(FirstOperand
.getValueType() == MVT::i8
? 1 : 2,
12890 LoadSDNode
*LDN
= cast
<LoadSDNode
>(FirstOperand
.getNode());
12891 SDValue Ops
[] = { LDN
->getChain(), LDN
->getBasePtr(), WidthConst
};
12892 SDValue Ld
= DAG
.getMemIntrinsicNode(PPCISD::LXSIZX
, dl
,
12893 DAG
.getVTList(MVT::f64
, MVT::Other
),
12894 Ops
, MVT::i8
, LDN
->getMemOperand());
12896 // For signed conversion, we need to sign-extend the value in the VSR
12898 SDValue ExtOps
[] = { Ld
, WidthConst
};
12899 SDValue Ext
= DAG
.getNode(PPCISD::VEXTS
, dl
, MVT::f64
, ExtOps
);
12900 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ext
);
12902 return DAG
.getNode(ConvOp
, dl
, DstDouble
? MVT::f64
: MVT::f32
, Ld
);
12906 // For i32 intermediate values, unfortunately, the conversion functions
12907 // leave the upper 32 bits of the value are undefined. Within the set of
12908 // scalar instructions, we have no method for zero- or sign-extending the
12909 // value. Thus, we cannot handle i32 intermediate values here.
12910 if (Op
.getOperand(0).getValueType() == MVT::i32
)
12913 assert((Op
.getOpcode() == ISD::SINT_TO_FP
|| Subtarget
.hasFPCVT()) &&
12914 "UINT_TO_FP is supported only with FPCVT");
12916 // If we have FCFIDS, then use it when converting to single-precision.
12917 // Otherwise, convert to double-precision and then round.
12918 unsigned FCFOp
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
12919 ? (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDUS
12921 : (Op
.getOpcode() == ISD::UINT_TO_FP
? PPCISD::FCFIDU
12923 MVT FCFTy
= (Subtarget
.hasFPCVT() && Op
.getValueType() == MVT::f32
)
12927 // If we're converting from a float, to an int, and back to a float again,
12928 // then we don't need the store/load pair at all.
12929 if ((Op
.getOperand(0).getOpcode() == ISD::FP_TO_UINT
&&
12930 Subtarget
.hasFPCVT()) ||
12931 (Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
)) {
12932 SDValue Src
= Op
.getOperand(0).getOperand(0);
12933 if (Src
.getValueType() == MVT::f32
) {
12934 Src
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Src
);
12935 DCI
.AddToWorklist(Src
.getNode());
12936 } else if (Src
.getValueType() != MVT::f64
) {
12937 // Make sure that we don't pick up a ppc_fp128 source value.
12942 Op
.getOperand(0).getOpcode() == ISD::FP_TO_SINT
? PPCISD::FCTIDZ
:
12945 SDValue Tmp
= DAG
.getNode(FCTOp
, dl
, MVT::f64
, Src
);
12946 SDValue FP
= DAG
.getNode(FCFOp
, dl
, FCFTy
, Tmp
);
12948 if (Op
.getValueType() == MVT::f32
&& !Subtarget
.hasFPCVT()) {
12949 FP
= DAG
.getNode(ISD::FP_ROUND
, dl
,
12950 MVT::f32
, FP
, DAG
.getIntPtrConstant(0, dl
));
12951 DCI
.AddToWorklist(FP
.getNode());
12960 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
12961 // builtins) into loads with swaps.
12962 SDValue
PPCTargetLowering::expandVSXLoadForLE(SDNode
*N
,
12963 DAGCombinerInfo
&DCI
) const {
12964 SelectionDAG
&DAG
= DCI
.DAG
;
12968 MachineMemOperand
*MMO
;
12970 switch (N
->getOpcode()) {
12972 llvm_unreachable("Unexpected opcode for little endian VSX load");
12974 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
12975 Chain
= LD
->getChain();
12976 Base
= LD
->getBasePtr();
12977 MMO
= LD
->getMemOperand();
12978 // If the MMO suggests this isn't a load of a full vector, leave
12979 // things alone. For a built-in, we have to make the change for
12980 // correctness, so if there is a size problem that will be a bug.
12981 if (MMO
->getSize() < 16)
12985 case ISD::INTRINSIC_W_CHAIN
: {
12986 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
12987 Chain
= Intrin
->getChain();
12988 // Similarly to the store case below, Intrin->getBasePtr() doesn't get
12989 // us what we want. Get operand 2 instead.
12990 Base
= Intrin
->getOperand(2);
12991 MMO
= Intrin
->getMemOperand();
12996 MVT VecTy
= N
->getValueType(0).getSimpleVT();
12998 // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
12999 // aligned and the type is a vector with elements up to 4 bytes
13000 if (Subtarget
.needsSwapsForVSXMemOps() && !(MMO
->getAlignment()%16)
13001 && VecTy
.getScalarSizeInBits() <= 32 ) {
13005 SDValue LoadOps
[] = { Chain
, Base
};
13006 SDValue Load
= DAG
.getMemIntrinsicNode(PPCISD::LXVD2X
, dl
,
13007 DAG
.getVTList(MVT::v2f64
, MVT::Other
),
13008 LoadOps
, MVT::v2f64
, MMO
);
13010 DCI
.AddToWorklist(Load
.getNode());
13011 Chain
= Load
.getValue(1);
13012 SDValue Swap
= DAG
.getNode(
13013 PPCISD::XXSWAPD
, dl
, DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Load
);
13014 DCI
.AddToWorklist(Swap
.getNode());
13016 // Add a bitcast if the resulting load type doesn't match v2f64.
13017 if (VecTy
!= MVT::v2f64
) {
13018 SDValue N
= DAG
.getNode(ISD::BITCAST
, dl
, VecTy
, Swap
);
13019 DCI
.AddToWorklist(N
.getNode());
13020 // Package {bitcast value, swap's chain} to match Load's shape.
13021 return DAG
.getNode(ISD::MERGE_VALUES
, dl
, DAG
.getVTList(VecTy
, MVT::Other
),
13022 N
, Swap
.getValue(1));
13028 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
13029 // builtins) into stores with swaps.
13030 SDValue
PPCTargetLowering::expandVSXStoreForLE(SDNode
*N
,
13031 DAGCombinerInfo
&DCI
) const {
13032 SelectionDAG
&DAG
= DCI
.DAG
;
13037 MachineMemOperand
*MMO
;
13039 switch (N
->getOpcode()) {
13041 llvm_unreachable("Unexpected opcode for little endian VSX store");
13043 StoreSDNode
*ST
= cast
<StoreSDNode
>(N
);
13044 Chain
= ST
->getChain();
13045 Base
= ST
->getBasePtr();
13046 MMO
= ST
->getMemOperand();
13048 // If the MMO suggests this isn't a store of a full vector, leave
13049 // things alone. For a built-in, we have to make the change for
13050 // correctness, so if there is a size problem that will be a bug.
13051 if (MMO
->getSize() < 16)
13055 case ISD::INTRINSIC_VOID
: {
13056 MemIntrinsicSDNode
*Intrin
= cast
<MemIntrinsicSDNode
>(N
);
13057 Chain
= Intrin
->getChain();
13058 // Intrin->getBasePtr() oddly does not get what we want.
13059 Base
= Intrin
->getOperand(3);
13060 MMO
= Intrin
->getMemOperand();
13066 SDValue Src
= N
->getOperand(SrcOpnd
);
13067 MVT VecTy
= Src
.getValueType().getSimpleVT();
13069 // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
13070 // aligned and the type is a vector with elements up to 4 bytes
13071 if (Subtarget
.needsSwapsForVSXMemOps() && !(MMO
->getAlignment()%16)
13072 && VecTy
.getScalarSizeInBits() <= 32 ) {
13076 // All stores are done as v2f64 and possible bit cast.
13077 if (VecTy
!= MVT::v2f64
) {
13078 Src
= DAG
.getNode(ISD::BITCAST
, dl
, MVT::v2f64
, Src
);
13079 DCI
.AddToWorklist(Src
.getNode());
13082 SDValue Swap
= DAG
.getNode(PPCISD::XXSWAPD
, dl
,
13083 DAG
.getVTList(MVT::v2f64
, MVT::Other
), Chain
, Src
);
13084 DCI
.AddToWorklist(Swap
.getNode());
13085 Chain
= Swap
.getValue(1);
13086 SDValue StoreOps
[] = { Chain
, Swap
, Base
};
13087 SDValue Store
= DAG
.getMemIntrinsicNode(PPCISD::STXVD2X
, dl
,
13088 DAG
.getVTList(MVT::Other
),
13089 StoreOps
, VecTy
, MMO
);
13090 DCI
.AddToWorklist(Store
.getNode());
13094 // Handle DAG combine for STORE (FP_TO_INT F).
13095 SDValue
PPCTargetLowering::combineStoreFPToInt(SDNode
*N
,
13096 DAGCombinerInfo
&DCI
) const {
13098 SelectionDAG
&DAG
= DCI
.DAG
;
13100 unsigned Opcode
= N
->getOperand(1).getOpcode();
13102 assert((Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
)
13103 && "Not a FP_TO_INT Instruction!");
13105 SDValue Val
= N
->getOperand(1).getOperand(0);
13106 EVT Op1VT
= N
->getOperand(1).getValueType();
13107 EVT ResVT
= Val
.getValueType();
13109 // Floating point types smaller than 32 bits are not legal on Power.
13110 if (ResVT
.getScalarSizeInBits() < 32)
13113 // Only perform combine for conversion to i64/i32 or power9 i16/i8.
13114 bool ValidTypeForStoreFltAsInt
=
13115 (Op1VT
== MVT::i32
|| Op1VT
== MVT::i64
||
13116 (Subtarget
.hasP9Vector() && (Op1VT
== MVT::i16
|| Op1VT
== MVT::i8
)));
13118 if (ResVT
== MVT::ppcf128
|| !Subtarget
.hasP8Altivec() ||
13119 cast
<StoreSDNode
>(N
)->isTruncatingStore() || !ValidTypeForStoreFltAsInt
)
13122 // Extend f32 values to f64
13123 if (ResVT
.getScalarSizeInBits() == 32) {
13124 Val
= DAG
.getNode(ISD::FP_EXTEND
, dl
, MVT::f64
, Val
);
13125 DCI
.AddToWorklist(Val
.getNode());
13128 // Set signed or unsigned conversion opcode.
13129 unsigned ConvOpcode
= (Opcode
== ISD::FP_TO_SINT
) ?
13130 PPCISD::FP_TO_SINT_IN_VSR
:
13131 PPCISD::FP_TO_UINT_IN_VSR
;
13133 Val
= DAG
.getNode(ConvOpcode
,
13134 dl
, ResVT
== MVT::f128
? MVT::f128
: MVT::f64
, Val
);
13135 DCI
.AddToWorklist(Val
.getNode());
13137 // Set number of bytes being converted.
13138 unsigned ByteSize
= Op1VT
.getScalarSizeInBits() / 8;
13139 SDValue Ops
[] = { N
->getOperand(0), Val
, N
->getOperand(2),
13140 DAG
.getIntPtrConstant(ByteSize
, dl
, false),
13141 DAG
.getValueType(Op1VT
) };
13143 Val
= DAG
.getMemIntrinsicNode(PPCISD::ST_VSR_SCAL_INT
, dl
,
13144 DAG
.getVTList(MVT::Other
), Ops
,
13145 cast
<StoreSDNode
>(N
)->getMemoryVT(),
13146 cast
<StoreSDNode
>(N
)->getMemOperand());
13148 DCI
.AddToWorklist(Val
.getNode());
13152 SDValue
PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode
*SVN
,
13153 LSBaseSDNode
*LSBase
,
13154 DAGCombinerInfo
&DCI
) const {
13155 assert((ISD::isNormalLoad(LSBase
) || ISD::isNormalStore(LSBase
)) &&
13156 "Not a reverse memop pattern!");
13158 auto IsElementReverse
= [](const ShuffleVectorSDNode
*SVN
) -> bool {
13159 auto Mask
= SVN
->getMask();
13161 auto I
= Mask
.rbegin();
13162 auto E
= Mask
.rend();
13164 for (; I
!= E
; ++I
) {
13172 SelectionDAG
&DAG
= DCI
.DAG
;
13173 EVT VT
= SVN
->getValueType(0);
13175 if (!isTypeLegal(VT
) || !Subtarget
.isLittleEndian() || !Subtarget
.hasVSX())
13178 // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
13179 // See comment in PPCVSXSwapRemoval.cpp.
13180 // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
13181 if (!Subtarget
.hasP9Vector())
13184 if(!IsElementReverse(SVN
))
13187 if (LSBase
->getOpcode() == ISD::LOAD
) {
13189 SDValue LoadOps
[] = {LSBase
->getChain(), LSBase
->getBasePtr()};
13190 return DAG
.getMemIntrinsicNode(
13191 PPCISD::LOAD_VEC_BE
, dl
, DAG
.getVTList(VT
, MVT::Other
), LoadOps
,
13192 LSBase
->getMemoryVT(), LSBase
->getMemOperand());
13195 if (LSBase
->getOpcode() == ISD::STORE
) {
13197 SDValue StoreOps
[] = {LSBase
->getChain(), SVN
->getOperand(0),
13198 LSBase
->getBasePtr()};
13199 return DAG
.getMemIntrinsicNode(
13200 PPCISD::STORE_VEC_BE
, dl
, DAG
.getVTList(MVT::Other
), StoreOps
,
13201 LSBase
->getMemoryVT(), LSBase
->getMemOperand());
13204 llvm_unreachable("Expected a load or store node here");
13207 SDValue
PPCTargetLowering::PerformDAGCombine(SDNode
*N
,
13208 DAGCombinerInfo
&DCI
) const {
13209 SelectionDAG
&DAG
= DCI
.DAG
;
13211 switch (N
->getOpcode()) {
13214 return combineADD(N
, DCI
);
13216 return combineSHL(N
, DCI
);
13218 return combineSRA(N
, DCI
);
13220 return combineSRL(N
, DCI
);
13222 return combineMUL(N
, DCI
);
13224 if (isNullConstant(N
->getOperand(0))) // 0 << V -> 0.
13225 return N
->getOperand(0);
13228 if (isNullConstant(N
->getOperand(0))) // 0 >>u V -> 0.
13229 return N
->getOperand(0);
13232 if (ConstantSDNode
*C
= dyn_cast
<ConstantSDNode
>(N
->getOperand(0))) {
13233 if (C
->isNullValue() || // 0 >>s V -> 0.
13234 C
->isAllOnesValue()) // -1 >>s V -> -1.
13235 return N
->getOperand(0);
13238 case ISD::SIGN_EXTEND
:
13239 case ISD::ZERO_EXTEND
:
13240 case ISD::ANY_EXTEND
:
13241 return DAGCombineExtBoolTrunc(N
, DCI
);
13242 case ISD::TRUNCATE
:
13243 return combineTRUNCATE(N
, DCI
);
13245 if (SDValue CSCC
= combineSetCC(N
, DCI
))
13248 case ISD::SELECT_CC
:
13249 return DAGCombineTruncBoolExt(N
, DCI
);
13250 case ISD::SINT_TO_FP
:
13251 case ISD::UINT_TO_FP
:
13252 return combineFPToIntToFP(N
, DCI
);
13253 case ISD::VECTOR_SHUFFLE
:
13254 if (ISD::isNormalLoad(N
->getOperand(0).getNode())) {
13255 LSBaseSDNode
* LSBase
= cast
<LSBaseSDNode
>(N
->getOperand(0));
13256 return combineVReverseMemOP(cast
<ShuffleVectorSDNode
>(N
), LSBase
, DCI
);
13261 EVT Op1VT
= N
->getOperand(1).getValueType();
13262 unsigned Opcode
= N
->getOperand(1).getOpcode();
13264 if (Opcode
== ISD::FP_TO_SINT
|| Opcode
== ISD::FP_TO_UINT
) {
13265 SDValue Val
= combineStoreFPToInt(N
, DCI
);
13270 if (Opcode
== ISD::VECTOR_SHUFFLE
&& ISD::isNormalStore(N
)) {
13271 ShuffleVectorSDNode
*SVN
= cast
<ShuffleVectorSDNode
>(N
->getOperand(1));
13272 SDValue Val
= combineVReverseMemOP(SVN
, cast
<LSBaseSDNode
>(N
), DCI
);
13277 // Turn STORE (BSWAP) -> sthbrx/stwbrx.
13278 if (cast
<StoreSDNode
>(N
)->isUnindexed() && Opcode
== ISD::BSWAP
&&
13279 N
->getOperand(1).getNode()->hasOneUse() &&
13280 (Op1VT
== MVT::i32
|| Op1VT
== MVT::i16
||
13281 (Subtarget
.hasLDBRX() && Subtarget
.isPPC64() && Op1VT
== MVT::i64
))) {
13283 // STBRX can only handle simple types and it makes no sense to store less
13284 // two bytes in byte-reversed order.
13285 EVT mVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
13286 if (mVT
.isExtended() || mVT
.getSizeInBits() < 16)
13289 SDValue BSwapOp
= N
->getOperand(1).getOperand(0);
13290 // Do an any-extend to 32-bits if this is a half-word input.
13291 if (BSwapOp
.getValueType() == MVT::i16
)
13292 BSwapOp
= DAG
.getNode(ISD::ANY_EXTEND
, dl
, MVT::i32
, BSwapOp
);
13294 // If the type of BSWAP operand is wider than stored memory width
13295 // it need to be shifted to the right side before STBRX.
13296 if (Op1VT
.bitsGT(mVT
)) {
13297 int Shift
= Op1VT
.getSizeInBits() - mVT
.getSizeInBits();
13298 BSwapOp
= DAG
.getNode(ISD::SRL
, dl
, Op1VT
, BSwapOp
,
13299 DAG
.getConstant(Shift
, dl
, MVT::i32
));
13300 // Need to truncate if this is a bswap of i64 stored as i32/i16.
13301 if (Op1VT
== MVT::i64
)
13302 BSwapOp
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i32
, BSwapOp
);
13306 N
->getOperand(0), BSwapOp
, N
->getOperand(2), DAG
.getValueType(mVT
)
13309 DAG
.getMemIntrinsicNode(PPCISD::STBRX
, dl
, DAG
.getVTList(MVT::Other
),
13310 Ops
, cast
<StoreSDNode
>(N
)->getMemoryVT(),
13311 cast
<StoreSDNode
>(N
)->getMemOperand());
13314 // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
13315 // So it can increase the chance of CSE constant construction.
13316 if (Subtarget
.isPPC64() && !DCI
.isBeforeLegalize() &&
13317 isa
<ConstantSDNode
>(N
->getOperand(1)) && Op1VT
== MVT::i32
) {
13318 // Need to sign-extended to 64-bits to handle negative values.
13319 EVT MemVT
= cast
<StoreSDNode
>(N
)->getMemoryVT();
13320 uint64_t Val64
= SignExtend64(N
->getConstantOperandVal(1),
13321 MemVT
.getSizeInBits());
13322 SDValue Const64
= DAG
.getConstant(Val64
, dl
, MVT::i64
);
13324 // DAG.getTruncStore() can't be used here because it doesn't accept
13325 // the general (base + offset) addressing mode.
13326 // So we use UpdateNodeOperands and setTruncatingStore instead.
13327 DAG
.UpdateNodeOperands(N
, N
->getOperand(0), Const64
, N
->getOperand(2),
13329 cast
<StoreSDNode
>(N
)->setTruncatingStore(true);
13330 return SDValue(N
, 0);
13333 // For little endian, VSX stores require generating xxswapd/lxvd2x.
13334 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13335 if (Op1VT
.isSimple()) {
13336 MVT StoreVT
= Op1VT
.getSimpleVT();
13337 if (Subtarget
.needsSwapsForVSXMemOps() &&
13338 (StoreVT
== MVT::v2f64
|| StoreVT
== MVT::v2i64
||
13339 StoreVT
== MVT::v4f32
|| StoreVT
== MVT::v4i32
))
13340 return expandVSXStoreForLE(N
, DCI
);
13345 LoadSDNode
*LD
= cast
<LoadSDNode
>(N
);
13346 EVT VT
= LD
->getValueType(0);
13348 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13349 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13350 if (VT
.isSimple()) {
13351 MVT LoadVT
= VT
.getSimpleVT();
13352 if (Subtarget
.needsSwapsForVSXMemOps() &&
13353 (LoadVT
== MVT::v2f64
|| LoadVT
== MVT::v2i64
||
13354 LoadVT
== MVT::v4f32
|| LoadVT
== MVT::v4i32
))
13355 return expandVSXLoadForLE(N
, DCI
);
13358 // We sometimes end up with a 64-bit integer load, from which we extract
13359 // two single-precision floating-point numbers. This happens with
13360 // std::complex<float>, and other similar structures, because of the way we
13361 // canonicalize structure copies. However, if we lack direct moves,
13362 // then the final bitcasts from the extracted integer values to the
13363 // floating-point numbers turn into store/load pairs. Even with direct moves,
13364 // just loading the two floating-point numbers is likely better.
13365 auto ReplaceTwoFloatLoad
= [&]() {
13366 if (VT
!= MVT::i64
)
13369 if (LD
->getExtensionType() != ISD::NON_EXTLOAD
||
13373 // We're looking for a sequence like this:
13374 // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
13375 // t16: i64 = srl t13, Constant:i32<32>
13376 // t17: i32 = truncate t16
13377 // t18: f32 = bitcast t17
13378 // t19: i32 = truncate t13
13379 // t20: f32 = bitcast t19
13381 if (!LD
->hasNUsesOfValue(2, 0))
13384 auto UI
= LD
->use_begin();
13385 while (UI
.getUse().getResNo() != 0) ++UI
;
13386 SDNode
*Trunc
= *UI
++;
13387 while (UI
.getUse().getResNo() != 0) ++UI
;
13388 SDNode
*RightShift
= *UI
;
13389 if (Trunc
->getOpcode() != ISD::TRUNCATE
)
13390 std::swap(Trunc
, RightShift
);
13392 if (Trunc
->getOpcode() != ISD::TRUNCATE
||
13393 Trunc
->getValueType(0) != MVT::i32
||
13394 !Trunc
->hasOneUse())
13396 if (RightShift
->getOpcode() != ISD::SRL
||
13397 !isa
<ConstantSDNode
>(RightShift
->getOperand(1)) ||
13398 RightShift
->getConstantOperandVal(1) != 32 ||
13399 !RightShift
->hasOneUse())
13402 SDNode
*Trunc2
= *RightShift
->use_begin();
13403 if (Trunc2
->getOpcode() != ISD::TRUNCATE
||
13404 Trunc2
->getValueType(0) != MVT::i32
||
13405 !Trunc2
->hasOneUse())
13408 SDNode
*Bitcast
= *Trunc
->use_begin();
13409 SDNode
*Bitcast2
= *Trunc2
->use_begin();
13411 if (Bitcast
->getOpcode() != ISD::BITCAST
||
13412 Bitcast
->getValueType(0) != MVT::f32
)
13414 if (Bitcast2
->getOpcode() != ISD::BITCAST
||
13415 Bitcast2
->getValueType(0) != MVT::f32
)
13418 if (Subtarget
.isLittleEndian())
13419 std::swap(Bitcast
, Bitcast2
);
13421 // Bitcast has the second float (in memory-layout order) and Bitcast2
13422 // has the first one.
13424 SDValue BasePtr
= LD
->getBasePtr();
13425 if (LD
->isIndexed()) {
13426 assert(LD
->getAddressingMode() == ISD::PRE_INC
&&
13427 "Non-pre-inc AM on PPC?");
13429 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(), BasePtr
,
13434 LD
->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile
;
13435 SDValue FloatLoad
= DAG
.getLoad(MVT::f32
, dl
, LD
->getChain(), BasePtr
,
13436 LD
->getPointerInfo(), LD
->getAlignment(),
13437 MMOFlags
, LD
->getAAInfo());
13439 DAG
.getNode(ISD::ADD
, dl
, BasePtr
.getValueType(),
13440 BasePtr
, DAG
.getIntPtrConstant(4, dl
));
13441 SDValue FloatLoad2
= DAG
.getLoad(
13442 MVT::f32
, dl
, SDValue(FloatLoad
.getNode(), 1), AddPtr
,
13443 LD
->getPointerInfo().getWithOffset(4),
13444 MinAlign(LD
->getAlignment(), 4), MMOFlags
, LD
->getAAInfo());
13446 if (LD
->isIndexed()) {
13447 // Note that DAGCombine should re-form any pre-increment load(s) from
13448 // what is produced here if that makes sense.
13449 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, 1), BasePtr
);
13452 DCI
.CombineTo(Bitcast2
, FloatLoad
);
13453 DCI
.CombineTo(Bitcast
, FloatLoad2
);
13455 DAG
.ReplaceAllUsesOfValueWith(SDValue(LD
, LD
->isIndexed() ? 2 : 1),
13456 SDValue(FloatLoad2
.getNode(), 1));
13460 if (ReplaceTwoFloatLoad())
13461 return SDValue(N
, 0);
13463 EVT MemVT
= LD
->getMemoryVT();
13464 Type
*Ty
= MemVT
.getTypeForEVT(*DAG
.getContext());
13465 unsigned ABIAlignment
= DAG
.getDataLayout().getABITypeAlignment(Ty
);
13466 Type
*STy
= MemVT
.getScalarType().getTypeForEVT(*DAG
.getContext());
13467 unsigned ScalarABIAlignment
= DAG
.getDataLayout().getABITypeAlignment(STy
);
13468 if (LD
->isUnindexed() && VT
.isVector() &&
13469 ((Subtarget
.hasAltivec() && ISD::isNON_EXTLoad(N
) &&
13470 // P8 and later hardware should just use LOAD.
13471 !Subtarget
.hasP8Vector() && (VT
== MVT::v16i8
|| VT
== MVT::v8i16
||
13472 VT
== MVT::v4i32
|| VT
== MVT::v4f32
)) ||
13473 (Subtarget
.hasQPX() && (VT
== MVT::v4f64
|| VT
== MVT::v4f32
) &&
13474 LD
->getAlignment() >= ScalarABIAlignment
)) &&
13475 LD
->getAlignment() < ABIAlignment
) {
13476 // This is a type-legal unaligned Altivec or QPX load.
13477 SDValue Chain
= LD
->getChain();
13478 SDValue Ptr
= LD
->getBasePtr();
13479 bool isLittleEndian
= Subtarget
.isLittleEndian();
13481 // This implements the loading of unaligned vectors as described in
13482 // the venerable Apple Velocity Engine overview. Specifically:
13483 // https://developer.apple.com/hardwaredrivers/ve/alignment.html
13484 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
13486 // The general idea is to expand a sequence of one or more unaligned
13487 // loads into an alignment-based permutation-control instruction (lvsl
13488 // or lvsr), a series of regular vector loads (which always truncate
13489 // their input address to an aligned address), and a series of
13490 // permutations. The results of these permutations are the requested
13491 // loaded values. The trick is that the last "extra" load is not taken
13492 // from the address you might suspect (sizeof(vector) bytes after the
13493 // last requested load), but rather sizeof(vector) - 1 bytes after the
13494 // last requested vector. The point of this is to avoid a page fault if
13495 // the base address happened to be aligned. This works because if the
13496 // base address is aligned, then adding less than a full vector length
13497 // will cause the last vector in the sequence to be (re)loaded.
13498 // Otherwise, the next vector will be fetched as you might suspect was
13501 // We might be able to reuse the permutation generation from
13502 // a different base address offset from this one by an aligned amount.
13503 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
13504 // optimization later.
13505 Intrinsic::ID Intr
, IntrLD
, IntrPerm
;
13506 MVT PermCntlTy
, PermTy
, LDTy
;
13507 if (Subtarget
.hasAltivec()) {
13508 Intr
= isLittleEndian
? Intrinsic::ppc_altivec_lvsr
:
13509 Intrinsic::ppc_altivec_lvsl
;
13510 IntrLD
= Intrinsic::ppc_altivec_lvx
;
13511 IntrPerm
= Intrinsic::ppc_altivec_vperm
;
13512 PermCntlTy
= MVT::v16i8
;
13513 PermTy
= MVT::v4i32
;
13516 Intr
= MemVT
== MVT::v4f64
? Intrinsic::ppc_qpx_qvlpcld
:
13517 Intrinsic::ppc_qpx_qvlpcls
;
13518 IntrLD
= MemVT
== MVT::v4f64
? Intrinsic::ppc_qpx_qvlfd
:
13519 Intrinsic::ppc_qpx_qvlfs
;
13520 IntrPerm
= Intrinsic::ppc_qpx_qvfperm
;
13521 PermCntlTy
= MVT::v4f64
;
13522 PermTy
= MVT::v4f64
;
13523 LDTy
= MemVT
.getSimpleVT();
13526 SDValue PermCntl
= BuildIntrinsicOp(Intr
, Ptr
, DAG
, dl
, PermCntlTy
);
13528 // Create the new MMO for the new base load. It is like the original MMO,
13529 // but represents an area in memory almost twice the vector size centered
13530 // on the original address. If the address is unaligned, we might start
13531 // reading up to (sizeof(vector)-1) bytes below the address of the
13532 // original unaligned load.
13533 MachineFunction
&MF
= DAG
.getMachineFunction();
13534 MachineMemOperand
*BaseMMO
=
13535 MF
.getMachineMemOperand(LD
->getMemOperand(),
13536 -(long)MemVT
.getStoreSize()+1,
13537 2*MemVT
.getStoreSize()-1);
13539 // Create the new base load.
13541 DAG
.getTargetConstant(IntrLD
, dl
, getPointerTy(MF
.getDataLayout()));
13542 SDValue BaseLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
13544 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
13545 DAG
.getVTList(PermTy
, MVT::Other
),
13546 BaseLoadOps
, LDTy
, BaseMMO
);
13548 // Note that the value of IncOffset (which is provided to the next
13549 // load's pointer info offset value, and thus used to calculate the
13550 // alignment), and the value of IncValue (which is actually used to
13551 // increment the pointer value) are different! This is because we
13552 // require the next load to appear to be aligned, even though it
13553 // is actually offset from the base pointer by a lesser amount.
13554 int IncOffset
= VT
.getSizeInBits() / 8;
13555 int IncValue
= IncOffset
;
13557 // Walk (both up and down) the chain looking for another load at the real
13558 // (aligned) offset (the alignment of the other load does not matter in
13559 // this case). If found, then do not use the offset reduction trick, as
13560 // that will prevent the loads from being later combined (as they would
13561 // otherwise be duplicates).
13562 if (!findConsecutiveLoad(LD
, DAG
))
13565 SDValue Increment
=
13566 DAG
.getConstant(IncValue
, dl
, getPointerTy(MF
.getDataLayout()));
13567 Ptr
= DAG
.getNode(ISD::ADD
, dl
, Ptr
.getValueType(), Ptr
, Increment
);
13569 MachineMemOperand
*ExtraMMO
=
13570 MF
.getMachineMemOperand(LD
->getMemOperand(),
13571 1, 2*MemVT
.getStoreSize()-1);
13572 SDValue ExtraLoadOps
[] = { Chain
, LDXIntID
, Ptr
};
13573 SDValue ExtraLoad
=
13574 DAG
.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN
, dl
,
13575 DAG
.getVTList(PermTy
, MVT::Other
),
13576 ExtraLoadOps
, LDTy
, ExtraMMO
);
13578 SDValue TF
= DAG
.getNode(ISD::TokenFactor
, dl
, MVT::Other
,
13579 BaseLoad
.getValue(1), ExtraLoad
.getValue(1));
13581 // Because vperm has a big-endian bias, we must reverse the order
13582 // of the input vectors and complement the permute control vector
13583 // when generating little endian code. We have already handled the
13584 // latter by using lvsr instead of lvsl, so just reverse BaseLoad
13585 // and ExtraLoad here.
13587 if (isLittleEndian
)
13588 Perm
= BuildIntrinsicOp(IntrPerm
,
13589 ExtraLoad
, BaseLoad
, PermCntl
, DAG
, dl
);
13591 Perm
= BuildIntrinsicOp(IntrPerm
,
13592 BaseLoad
, ExtraLoad
, PermCntl
, DAG
, dl
);
13595 Perm
= Subtarget
.hasAltivec() ?
13596 DAG
.getNode(ISD::BITCAST
, dl
, VT
, Perm
) :
13597 DAG
.getNode(ISD::FP_ROUND
, dl
, VT
, Perm
, // QPX
13598 DAG
.getTargetConstant(1, dl
, MVT::i64
));
13599 // second argument is 1 because this rounding
13600 // is always exact.
13602 // The output of the permutation is our loaded result, the TokenFactor is
13604 DCI
.CombineTo(N
, Perm
, TF
);
13605 return SDValue(N
, 0);
13609 case ISD::INTRINSIC_WO_CHAIN
: {
13610 bool isLittleEndian
= Subtarget
.isLittleEndian();
13611 unsigned IID
= cast
<ConstantSDNode
>(N
->getOperand(0))->getZExtValue();
13612 Intrinsic::ID Intr
= (isLittleEndian
? Intrinsic::ppc_altivec_lvsr
13613 : Intrinsic::ppc_altivec_lvsl
);
13614 if ((IID
== Intr
||
13615 IID
== Intrinsic::ppc_qpx_qvlpcld
||
13616 IID
== Intrinsic::ppc_qpx_qvlpcls
) &&
13617 N
->getOperand(1)->getOpcode() == ISD::ADD
) {
13618 SDValue Add
= N
->getOperand(1);
13620 int Bits
= IID
== Intrinsic::ppc_qpx_qvlpcld
?
13621 5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
13623 if (DAG
.MaskedValueIsZero(Add
->getOperand(1),
13624 APInt::getAllOnesValue(Bits
/* alignment */)
13625 .zext(Add
.getScalarValueSizeInBits()))) {
13626 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
13627 for (SDNode::use_iterator UI
= BasePtr
->use_begin(),
13628 UE
= BasePtr
->use_end();
13630 if (UI
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13631 cast
<ConstantSDNode
>(UI
->getOperand(0))->getZExtValue() == IID
) {
13632 // We've found another LVSL/LVSR, and this address is an aligned
13633 // multiple of that one. The results will be the same, so use the
13634 // one we've just found instead.
13636 return SDValue(*UI
, 0);
13641 if (isa
<ConstantSDNode
>(Add
->getOperand(1))) {
13642 SDNode
*BasePtr
= Add
->getOperand(0).getNode();
13643 for (SDNode::use_iterator UI
= BasePtr
->use_begin(),
13644 UE
= BasePtr
->use_end(); UI
!= UE
; ++UI
) {
13645 if (UI
->getOpcode() == ISD::ADD
&&
13646 isa
<ConstantSDNode
>(UI
->getOperand(1)) &&
13647 (cast
<ConstantSDNode
>(Add
->getOperand(1))->getZExtValue() -
13648 cast
<ConstantSDNode
>(UI
->getOperand(1))->getZExtValue()) %
13649 (1ULL << Bits
) == 0) {
13650 SDNode
*OtherAdd
= *UI
;
13651 for (SDNode::use_iterator VI
= OtherAdd
->use_begin(),
13652 VE
= OtherAdd
->use_end(); VI
!= VE
; ++VI
) {
13653 if (VI
->getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13654 cast
<ConstantSDNode
>(VI
->getOperand(0))->getZExtValue() == IID
) {
13655 return SDValue(*VI
, 0);
13663 // Combine vmaxsw/h/b(a, a's negation) to abs(a)
13664 // Expose the vabsduw/h/b opportunity for down stream
13665 if (!DCI
.isAfterLegalizeDAG() && Subtarget
.hasP9Altivec() &&
13666 (IID
== Intrinsic::ppc_altivec_vmaxsw
||
13667 IID
== Intrinsic::ppc_altivec_vmaxsh
||
13668 IID
== Intrinsic::ppc_altivec_vmaxsb
)) {
13669 SDValue V1
= N
->getOperand(1);
13670 SDValue V2
= N
->getOperand(2);
13671 if ((V1
.getSimpleValueType() == MVT::v4i32
||
13672 V1
.getSimpleValueType() == MVT::v8i16
||
13673 V1
.getSimpleValueType() == MVT::v16i8
) &&
13674 V1
.getSimpleValueType() == V2
.getSimpleValueType()) {
13676 if (V1
.getOpcode() == ISD::SUB
&&
13677 ISD::isBuildVectorAllZeros(V1
.getOperand(0).getNode()) &&
13678 V1
.getOperand(1) == V2
) {
13679 return DAG
.getNode(ISD::ABS
, dl
, V2
.getValueType(), V2
);
13682 if (V2
.getOpcode() == ISD::SUB
&&
13683 ISD::isBuildVectorAllZeros(V2
.getOperand(0).getNode()) &&
13684 V2
.getOperand(1) == V1
) {
13685 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
13688 if (V1
.getOpcode() == ISD::SUB
&& V2
.getOpcode() == ISD::SUB
&&
13689 V1
.getOperand(0) == V2
.getOperand(1) &&
13690 V1
.getOperand(1) == V2
.getOperand(0)) {
13691 return DAG
.getNode(ISD::ABS
, dl
, V1
.getValueType(), V1
);
13698 case ISD::INTRINSIC_W_CHAIN
:
13699 // For little endian, VSX loads require generating lxvd2x/xxswapd.
13700 // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
13701 if (Subtarget
.needsSwapsForVSXMemOps()) {
13702 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
13705 case Intrinsic::ppc_vsx_lxvw4x
:
13706 case Intrinsic::ppc_vsx_lxvd2x
:
13707 return expandVSXLoadForLE(N
, DCI
);
13711 case ISD::INTRINSIC_VOID
:
13712 // For little endian, VSX stores require generating xxswapd/stxvd2x.
13713 // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
13714 if (Subtarget
.needsSwapsForVSXMemOps()) {
13715 switch (cast
<ConstantSDNode
>(N
->getOperand(1))->getZExtValue()) {
13718 case Intrinsic::ppc_vsx_stxvw4x
:
13719 case Intrinsic::ppc_vsx_stxvd2x
:
13720 return expandVSXStoreForLE(N
, DCI
);
13725 // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
13726 if (ISD::isNON_EXTLoad(N
->getOperand(0).getNode()) &&
13727 N
->getOperand(0).hasOneUse() &&
13728 (N
->getValueType(0) == MVT::i32
|| N
->getValueType(0) == MVT::i16
||
13729 (Subtarget
.hasLDBRX() && Subtarget
.isPPC64() &&
13730 N
->getValueType(0) == MVT::i64
))) {
13731 SDValue Load
= N
->getOperand(0);
13732 LoadSDNode
*LD
= cast
<LoadSDNode
>(Load
);
13733 // Create the byte-swapping load.
13735 LD
->getChain(), // Chain
13736 LD
->getBasePtr(), // Ptr
13737 DAG
.getValueType(N
->getValueType(0)) // VT
13740 DAG
.getMemIntrinsicNode(PPCISD::LBRX
, dl
,
13741 DAG
.getVTList(N
->getValueType(0) == MVT::i64
?
13742 MVT::i64
: MVT::i32
, MVT::Other
),
13743 Ops
, LD
->getMemoryVT(), LD
->getMemOperand());
13745 // If this is an i16 load, insert the truncate.
13746 SDValue ResVal
= BSLoad
;
13747 if (N
->getValueType(0) == MVT::i16
)
13748 ResVal
= DAG
.getNode(ISD::TRUNCATE
, dl
, MVT::i16
, BSLoad
);
13750 // First, combine the bswap away. This makes the value produced by the
13752 DCI
.CombineTo(N
, ResVal
);
13754 // Next, combine the load away, we give it a bogus result value but a real
13755 // chain result. The result value is dead because the bswap is dead.
13756 DCI
.CombineTo(Load
.getNode(), ResVal
, BSLoad
.getValue(1));
13758 // Return N so it doesn't get rechecked!
13759 return SDValue(N
, 0);
13763 // If a VCMPo node already exists with exactly the same operands as this
13764 // node, use its result instead of this node (VCMPo computes both a CR6 and
13765 // a normal output).
13767 if (!N
->getOperand(0).hasOneUse() &&
13768 !N
->getOperand(1).hasOneUse() &&
13769 !N
->getOperand(2).hasOneUse()) {
13771 // Scan all of the users of the LHS, looking for VCMPo's that match.
13772 SDNode
*VCMPoNode
= nullptr;
13774 SDNode
*LHSN
= N
->getOperand(0).getNode();
13775 for (SDNode::use_iterator UI
= LHSN
->use_begin(), E
= LHSN
->use_end();
13777 if (UI
->getOpcode() == PPCISD::VCMPo
&&
13778 UI
->getOperand(1) == N
->getOperand(1) &&
13779 UI
->getOperand(2) == N
->getOperand(2) &&
13780 UI
->getOperand(0) == N
->getOperand(0)) {
13785 // If there is no VCMPo node, or if the flag value has a single use, don't
13787 if (!VCMPoNode
|| VCMPoNode
->hasNUsesOfValue(0, 1))
13790 // Look at the (necessarily single) use of the flag value. If it has a
13791 // chain, this transformation is more complex. Note that multiple things
13792 // could use the value result, which we should ignore.
13793 SDNode
*FlagUser
= nullptr;
13794 for (SDNode::use_iterator UI
= VCMPoNode
->use_begin();
13795 FlagUser
== nullptr; ++UI
) {
13796 assert(UI
!= VCMPoNode
->use_end() && "Didn't find user!");
13797 SDNode
*User
= *UI
;
13798 for (unsigned i
= 0, e
= User
->getNumOperands(); i
!= e
; ++i
) {
13799 if (User
->getOperand(i
) == SDValue(VCMPoNode
, 1)) {
13806 // If the user is a MFOCRF instruction, we know this is safe.
13807 // Otherwise we give up for right now.
13808 if (FlagUser
->getOpcode() == PPCISD::MFOCRF
)
13809 return SDValue(VCMPoNode
, 0);
13812 case ISD::BRCOND
: {
13813 SDValue Cond
= N
->getOperand(1);
13814 SDValue Target
= N
->getOperand(2);
13816 if (Cond
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13817 cast
<ConstantSDNode
>(Cond
.getOperand(1))->getZExtValue() ==
13818 Intrinsic::loop_decrement
) {
13820 // We now need to make the intrinsic dead (it cannot be instruction
13822 DAG
.ReplaceAllUsesOfValueWith(Cond
.getValue(1), Cond
.getOperand(0));
13823 assert(Cond
.getNode()->hasOneUse() &&
13824 "Counter decrement has more than one use");
13826 return DAG
.getNode(PPCISD::BDNZ
, dl
, MVT::Other
,
13827 N
->getOperand(0), Target
);
13832 // If this is a branch on an altivec predicate comparison, lower this so
13833 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
13834 // lowering is done pre-legalize, because the legalizer lowers the predicate
13835 // compare down to code that is difficult to reassemble.
13836 ISD::CondCode CC
= cast
<CondCodeSDNode
>(N
->getOperand(1))->get();
13837 SDValue LHS
= N
->getOperand(2), RHS
= N
->getOperand(3);
13839 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
13840 // value. If so, pass-through the AND to get to the intrinsic.
13841 if (LHS
.getOpcode() == ISD::AND
&&
13842 LHS
.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13843 cast
<ConstantSDNode
>(LHS
.getOperand(0).getOperand(1))->getZExtValue() ==
13844 Intrinsic::loop_decrement
&&
13845 isa
<ConstantSDNode
>(LHS
.getOperand(1)) &&
13846 !isNullConstant(LHS
.getOperand(1)))
13847 LHS
= LHS
.getOperand(0);
13849 if (LHS
.getOpcode() == ISD::INTRINSIC_W_CHAIN
&&
13850 cast
<ConstantSDNode
>(LHS
.getOperand(1))->getZExtValue() ==
13851 Intrinsic::loop_decrement
&&
13852 isa
<ConstantSDNode
>(RHS
)) {
13853 assert((CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) &&
13854 "Counter decrement comparison is not EQ or NE");
13856 unsigned Val
= cast
<ConstantSDNode
>(RHS
)->getZExtValue();
13857 bool isBDNZ
= (CC
== ISD::SETEQ
&& Val
) ||
13858 (CC
== ISD::SETNE
&& !Val
);
13860 // We now need to make the intrinsic dead (it cannot be instruction
13862 DAG
.ReplaceAllUsesOfValueWith(LHS
.getValue(1), LHS
.getOperand(0));
13863 assert(LHS
.getNode()->hasOneUse() &&
13864 "Counter decrement has more than one use");
13866 return DAG
.getNode(isBDNZ
? PPCISD::BDNZ
: PPCISD::BDZ
, dl
, MVT::Other
,
13867 N
->getOperand(0), N
->getOperand(4));
13873 if (LHS
.getOpcode() == ISD::INTRINSIC_WO_CHAIN
&&
13874 isa
<ConstantSDNode
>(RHS
) && (CC
== ISD::SETEQ
|| CC
== ISD::SETNE
) &&
13875 getVectorCompareInfo(LHS
, CompareOpc
, isDot
, Subtarget
)) {
13876 assert(isDot
&& "Can't compare against a vector result!");
13878 // If this is a comparison against something other than 0/1, then we know
13879 // that the condition is never/always true.
13880 unsigned Val
= cast
<ConstantSDNode
>(RHS
)->getZExtValue();
13881 if (Val
!= 0 && Val
!= 1) {
13882 if (CC
== ISD::SETEQ
) // Cond never true, remove branch.
13883 return N
->getOperand(0);
13884 // Always !=, turn it into an unconditional branch.
13885 return DAG
.getNode(ISD::BR
, dl
, MVT::Other
,
13886 N
->getOperand(0), N
->getOperand(4));
13889 bool BranchOnWhenPredTrue
= (CC
== ISD::SETEQ
) ^ (Val
== 0);
13891 // Create the PPCISD altivec 'dot' comparison node.
13893 LHS
.getOperand(2), // LHS of compare
13894 LHS
.getOperand(3), // RHS of compare
13895 DAG
.getConstant(CompareOpc
, dl
, MVT::i32
)
13897 EVT VTs
[] = { LHS
.getOperand(2).getValueType(), MVT::Glue
};
13898 SDValue CompNode
= DAG
.getNode(PPCISD::VCMPo
, dl
, VTs
, Ops
);
13900 // Unpack the result based on how the target uses it.
13901 PPC::Predicate CompOpc
;
13902 switch (cast
<ConstantSDNode
>(LHS
.getOperand(1))->getZExtValue()) {
13903 default: // Can't happen, don't crash on invalid number though.
13904 case 0: // Branch on the value of the EQ bit of CR6.
13905 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_EQ
: PPC::PRED_NE
;
13907 case 1: // Branch on the inverted value of the EQ bit of CR6.
13908 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_NE
: PPC::PRED_EQ
;
13910 case 2: // Branch on the value of the LT bit of CR6.
13911 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_LT
: PPC::PRED_GE
;
13913 case 3: // Branch on the inverted value of the LT bit of CR6.
13914 CompOpc
= BranchOnWhenPredTrue
? PPC::PRED_GE
: PPC::PRED_LT
;
13918 return DAG
.getNode(PPCISD::COND_BRANCH
, dl
, MVT::Other
, N
->getOperand(0),
13919 DAG
.getConstant(CompOpc
, dl
, MVT::i32
),
13920 DAG
.getRegister(PPC::CR6
, MVT::i32
),
13921 N
->getOperand(4), CompNode
.getValue(1));
13925 case ISD::BUILD_VECTOR
:
13926 return DAGCombineBuildVector(N
, DCI
);
13928 return combineABS(N
, DCI
);
13930 return combineVSelect(N
, DCI
);
13937 PPCTargetLowering::BuildSDIVPow2(SDNode
*N
, const APInt
&Divisor
,
13939 SmallVectorImpl
<SDNode
*> &Created
) const {
13940 // fold (sdiv X, pow2)
13941 EVT VT
= N
->getValueType(0);
13942 if (VT
== MVT::i64
&& !Subtarget
.isPPC64())
13944 if ((VT
!= MVT::i32
&& VT
!= MVT::i64
) ||
13945 !(Divisor
.isPowerOf2() || (-Divisor
).isPowerOf2()))
13949 SDValue N0
= N
->getOperand(0);
13951 bool IsNegPow2
= (-Divisor
).isPowerOf2();
13952 unsigned Lg2
= (IsNegPow2
? -Divisor
: Divisor
).countTrailingZeros();
13953 SDValue ShiftAmt
= DAG
.getConstant(Lg2
, DL
, VT
);
13955 SDValue Op
= DAG
.getNode(PPCISD::SRA_ADDZE
, DL
, VT
, N0
, ShiftAmt
);
13956 Created
.push_back(Op
.getNode());
13959 Op
= DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Op
);
13960 Created
.push_back(Op
.getNode());
13966 //===----------------------------------------------------------------------===//
13967 // Inline Assembly Support
13968 //===----------------------------------------------------------------------===//
13970 void PPCTargetLowering::computeKnownBitsForTargetNode(const SDValue Op
,
13972 const APInt
&DemandedElts
,
13973 const SelectionDAG
&DAG
,
13974 unsigned Depth
) const {
13976 switch (Op
.getOpcode()) {
13978 case PPCISD::LBRX
: {
13979 // lhbrx is known to have the top bits cleared out.
13980 if (cast
<VTSDNode
>(Op
.getOperand(2))->getVT() == MVT::i16
)
13981 Known
.Zero
= 0xFFFF0000;
13984 case ISD::INTRINSIC_WO_CHAIN
: {
13985 switch (cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue()) {
13987 case Intrinsic::ppc_altivec_vcmpbfp_p
:
13988 case Intrinsic::ppc_altivec_vcmpeqfp_p
:
13989 case Intrinsic::ppc_altivec_vcmpequb_p
:
13990 case Intrinsic::ppc_altivec_vcmpequh_p
:
13991 case Intrinsic::ppc_altivec_vcmpequw_p
:
13992 case Intrinsic::ppc_altivec_vcmpequd_p
:
13993 case Intrinsic::ppc_altivec_vcmpgefp_p
:
13994 case Intrinsic::ppc_altivec_vcmpgtfp_p
:
13995 case Intrinsic::ppc_altivec_vcmpgtsb_p
:
13996 case Intrinsic::ppc_altivec_vcmpgtsh_p
:
13997 case Intrinsic::ppc_altivec_vcmpgtsw_p
:
13998 case Intrinsic::ppc_altivec_vcmpgtsd_p
:
13999 case Intrinsic::ppc_altivec_vcmpgtub_p
:
14000 case Intrinsic::ppc_altivec_vcmpgtuh_p
:
14001 case Intrinsic::ppc_altivec_vcmpgtuw_p
:
14002 case Intrinsic::ppc_altivec_vcmpgtud_p
:
14003 Known
.Zero
= ~1U; // All bits but the low one are known to be zero.
14010 unsigned PPCTargetLowering::getPrefLoopLogAlignment(MachineLoop
*ML
) const {
14011 switch (Subtarget
.getDarwinDirective()) {
14014 case PPC::DIR_PWR4
:
14015 case PPC::DIR_PWR5
:
14016 case PPC::DIR_PWR5X
:
14017 case PPC::DIR_PWR6
:
14018 case PPC::DIR_PWR6X
:
14019 case PPC::DIR_PWR7
:
14020 case PPC::DIR_PWR8
:
14021 case PPC::DIR_PWR9
: {
14025 if (!DisableInnermostLoopAlign32
) {
14026 // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
14027 // so that we can decrease cache misses and branch-prediction misses.
14028 // Actual alignment of the loop will depend on the hotness check and other
14029 // logic in alignBlocks.
14030 if (ML
->getLoopDepth() > 1 && ML
->getSubLoops().empty())
14034 const PPCInstrInfo
*TII
= Subtarget
.getInstrInfo();
14036 // For small loops (between 5 and 8 instructions), align to a 32-byte
14037 // boundary so that the entire loop fits in one instruction-cache line.
14038 uint64_t LoopSize
= 0;
14039 for (auto I
= ML
->block_begin(), IE
= ML
->block_end(); I
!= IE
; ++I
)
14040 for (auto J
= (*I
)->begin(), JE
= (*I
)->end(); J
!= JE
; ++J
) {
14041 LoopSize
+= TII
->getInstSizeInBytes(*J
);
14046 if (LoopSize
> 16 && LoopSize
<= 32)
14053 return TargetLowering::getPrefLoopLogAlignment(ML
);
14056 /// getConstraintType - Given a constraint, return the type of
14057 /// constraint it is for this target.
14058 PPCTargetLowering::ConstraintType
14059 PPCTargetLowering::getConstraintType(StringRef Constraint
) const {
14060 if (Constraint
.size() == 1) {
14061 switch (Constraint
[0]) {
14069 return C_RegisterClass
;
14071 // FIXME: While Z does indicate a memory constraint, it specifically
14072 // indicates an r+r address (used in conjunction with the 'y' modifier
14073 // in the replacement string). Currently, we're forcing the base
14074 // register to be r0 in the asm printer (which is interpreted as zero)
14075 // and forming the complete address in the second register. This is
14079 } else if (Constraint
== "wc") { // individual CR bits.
14080 return C_RegisterClass
;
14081 } else if (Constraint
== "wa" || Constraint
== "wd" ||
14082 Constraint
== "wf" || Constraint
== "ws" ||
14083 Constraint
== "wi" || Constraint
== "ww") {
14084 return C_RegisterClass
; // VSX registers.
14086 return TargetLowering::getConstraintType(Constraint
);
14089 /// Examine constraint type and operand type and determine a weight value.
14090 /// This object must already have been set up with the operand type
14091 /// and the current alternative constraint selected.
14092 TargetLowering::ConstraintWeight
14093 PPCTargetLowering::getSingleConstraintMatchWeight(
14094 AsmOperandInfo
&info
, const char *constraint
) const {
14095 ConstraintWeight weight
= CW_Invalid
;
14096 Value
*CallOperandVal
= info
.CallOperandVal
;
14097 // If we don't have a value, we can't do a match,
14098 // but allow it at the lowest weight.
14099 if (!CallOperandVal
)
14101 Type
*type
= CallOperandVal
->getType();
14103 // Look at the constraint type.
14104 if (StringRef(constraint
) == "wc" && type
->isIntegerTy(1))
14105 return CW_Register
; // an individual CR bit.
14106 else if ((StringRef(constraint
) == "wa" ||
14107 StringRef(constraint
) == "wd" ||
14108 StringRef(constraint
) == "wf") &&
14109 type
->isVectorTy())
14110 return CW_Register
;
14111 else if (StringRef(constraint
) == "wi" && type
->isIntegerTy(64))
14112 return CW_Register
; // just hold 64-bit integers data.
14113 else if (StringRef(constraint
) == "ws" && type
->isDoubleTy())
14114 return CW_Register
;
14115 else if (StringRef(constraint
) == "ww" && type
->isFloatTy())
14116 return CW_Register
;
14118 switch (*constraint
) {
14120 weight
= TargetLowering::getSingleConstraintMatchWeight(info
, constraint
);
14123 if (type
->isIntegerTy())
14124 weight
= CW_Register
;
14127 if (type
->isFloatTy())
14128 weight
= CW_Register
;
14131 if (type
->isDoubleTy())
14132 weight
= CW_Register
;
14135 if (type
->isVectorTy())
14136 weight
= CW_Register
;
14139 weight
= CW_Register
;
14142 weight
= CW_Memory
;
14148 std::pair
<unsigned, const TargetRegisterClass
*>
14149 PPCTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo
*TRI
,
14150 StringRef Constraint
,
14152 if (Constraint
.size() == 1) {
14153 // GCC RS6000 Constraint Letters
14154 switch (Constraint
[0]) {
14155 case 'b': // R1-R31
14156 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
14157 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass
);
14158 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass
);
14159 case 'r': // R0-R31
14160 if (VT
== MVT::i64
&& Subtarget
.isPPC64())
14161 return std::make_pair(0U, &PPC::G8RCRegClass
);
14162 return std::make_pair(0U, &PPC::GPRCRegClass
);
14163 // 'd' and 'f' constraints are both defined to be "the floating point
14164 // registers", where one is for 32-bit and the other for 64-bit. We don't
14165 // really care overly much here so just give them all the same reg classes.
14168 if (Subtarget
.hasSPE()) {
14169 if (VT
== MVT::f32
|| VT
== MVT::i32
)
14170 return std::make_pair(0U, &PPC::SPE4RCRegClass
);
14171 if (VT
== MVT::f64
|| VT
== MVT::i64
)
14172 return std::make_pair(0U, &PPC::SPERCRegClass
);
14174 if (VT
== MVT::f32
|| VT
== MVT::i32
)
14175 return std::make_pair(0U, &PPC::F4RCRegClass
);
14176 if (VT
== MVT::f64
|| VT
== MVT::i64
)
14177 return std::make_pair(0U, &PPC::F8RCRegClass
);
14178 if (VT
== MVT::v4f64
&& Subtarget
.hasQPX())
14179 return std::make_pair(0U, &PPC::QFRCRegClass
);
14180 if (VT
== MVT::v4f32
&& Subtarget
.hasQPX())
14181 return std::make_pair(0U, &PPC::QSRCRegClass
);
14185 if (VT
== MVT::v4f64
&& Subtarget
.hasQPX())
14186 return std::make_pair(0U, &PPC::QFRCRegClass
);
14187 if (VT
== MVT::v4f32
&& Subtarget
.hasQPX())
14188 return std::make_pair(0U, &PPC::QSRCRegClass
);
14189 if (Subtarget
.hasAltivec())
14190 return std::make_pair(0U, &PPC::VRRCRegClass
);
14193 return std::make_pair(0U, &PPC::CRRCRegClass
);
14195 } else if (Constraint
== "wc" && Subtarget
.useCRBits()) {
14196 // An individual CR bit.
14197 return std::make_pair(0U, &PPC::CRBITRCRegClass
);
14198 } else if ((Constraint
== "wa" || Constraint
== "wd" ||
14199 Constraint
== "wf" || Constraint
== "wi") &&
14200 Subtarget
.hasVSX()) {
14201 return std::make_pair(0U, &PPC::VSRCRegClass
);
14202 } else if ((Constraint
== "ws" || Constraint
== "ww") && Subtarget
.hasVSX()) {
14203 if (VT
== MVT::f32
&& Subtarget
.hasP8Vector())
14204 return std::make_pair(0U, &PPC::VSSRCRegClass
);
14206 return std::make_pair(0U, &PPC::VSFRCRegClass
);
14209 std::pair
<unsigned, const TargetRegisterClass
*> R
=
14210 TargetLowering::getRegForInlineAsmConstraint(TRI
, Constraint
, VT
);
14212 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
14213 // (which we call X[0-9]+). If a 64-bit value has been requested, and a
14214 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
14216 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
14217 // the AsmName field from *RegisterInfo.td, then this would not be necessary.
14218 if (R
.first
&& VT
== MVT::i64
&& Subtarget
.isPPC64() &&
14219 PPC::GPRCRegClass
.contains(R
.first
))
14220 return std::make_pair(TRI
->getMatchingSuperReg(R
.first
,
14221 PPC::sub_32
, &PPC::G8RCRegClass
),
14222 &PPC::G8RCRegClass
);
14224 // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
14225 if (!R
.second
&& StringRef("{cc}").equals_lower(Constraint
)) {
14226 R
.first
= PPC::CR0
;
14227 R
.second
= &PPC::CRRCRegClass
;
14233 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
14234 /// vector. If it is invalid, don't add anything to Ops.
14235 void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op
,
14236 std::string
&Constraint
,
14237 std::vector
<SDValue
>&Ops
,
14238 SelectionDAG
&DAG
) const {
14241 // Only support length 1 constraints.
14242 if (Constraint
.length() > 1) return;
14244 char Letter
= Constraint
[0];
14255 ConstantSDNode
*CST
= dyn_cast
<ConstantSDNode
>(Op
);
14256 if (!CST
) return; // Must be an immediate to match.
14258 int64_t Value
= CST
->getSExtValue();
14259 EVT TCVT
= MVT::i64
; // All constants taken to be 64 bits so that negative
14260 // numbers are printed as such.
14262 default: llvm_unreachable("Unknown constraint letter!");
14263 case 'I': // "I" is a signed 16-bit constant.
14264 if (isInt
<16>(Value
))
14265 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14267 case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
14268 if (isShiftedUInt
<16, 16>(Value
))
14269 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14271 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
14272 if (isShiftedInt
<16, 16>(Value
))
14273 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14275 case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
14276 if (isUInt
<16>(Value
))
14277 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14279 case 'M': // "M" is a constant that is greater than 31.
14281 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14283 case 'N': // "N" is a positive constant that is an exact power of two.
14284 if (Value
> 0 && isPowerOf2_64(Value
))
14285 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14287 case 'O': // "O" is the constant zero.
14289 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14291 case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
14292 if (isInt
<16>(-Value
))
14293 Result
= DAG
.getTargetConstant(Value
, dl
, TCVT
);
14300 if (Result
.getNode()) {
14301 Ops
.push_back(Result
);
14305 // Handle standard constraint letters.
14306 TargetLowering::LowerAsmOperandForConstraint(Op
, Constraint
, Ops
, DAG
);
14309 // isLegalAddressingMode - Return true if the addressing mode represented
14310 // by AM is legal for this target, for a load/store of the specified type.
14311 bool PPCTargetLowering::isLegalAddressingMode(const DataLayout
&DL
,
14312 const AddrMode
&AM
, Type
*Ty
,
14313 unsigned AS
, Instruction
*I
) const {
14314 // PPC does not allow r+i addressing modes for vectors!
14315 if (Ty
->isVectorTy() && AM
.BaseOffs
!= 0)
14318 // PPC allows a sign-extended 16-bit immediate field.
14319 if (AM
.BaseOffs
<= -(1LL << 16) || AM
.BaseOffs
>= (1LL << 16)-1)
14322 // No global is ever allowed as a base.
14326 // PPC only support r+r,
14327 switch (AM
.Scale
) {
14328 case 0: // "r+i" or just "i", depending on HasBaseReg.
14331 if (AM
.HasBaseReg
&& AM
.BaseOffs
) // "r+r+i" is not allowed.
14333 // Otherwise we have r+r or r+i.
14336 if (AM
.HasBaseReg
|| AM
.BaseOffs
) // 2*r+r or 2*r+i is not allowed.
14338 // Allow 2*r as r+r.
14341 // No other scales are supported.
14348 SDValue
PPCTargetLowering::LowerRETURNADDR(SDValue Op
,
14349 SelectionDAG
&DAG
) const {
14350 MachineFunction
&MF
= DAG
.getMachineFunction();
14351 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
14352 MFI
.setReturnAddressIsTaken(true);
14354 if (verifyReturnAddressArgumentIsConstant(Op
, DAG
))
14358 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
14360 // Make sure the function does not optimize away the store of the RA to
14362 PPCFunctionInfo
*FuncInfo
= MF
.getInfo
<PPCFunctionInfo
>();
14363 FuncInfo
->setLRStoreRequired();
14364 bool isPPC64
= Subtarget
.isPPC64();
14365 auto PtrVT
= getPointerTy(MF
.getDataLayout());
14368 SDValue FrameAddr
= LowerFRAMEADDR(Op
, DAG
);
14370 DAG
.getConstant(Subtarget
.getFrameLowering()->getReturnSaveOffset(), dl
,
14371 isPPC64
? MVT::i64
: MVT::i32
);
14372 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(),
14373 DAG
.getNode(ISD::ADD
, dl
, PtrVT
, FrameAddr
, Offset
),
14374 MachinePointerInfo());
14377 // Just load the return address off the stack.
14378 SDValue RetAddrFI
= getReturnAddrFrameIndex(DAG
);
14379 return DAG
.getLoad(PtrVT
, dl
, DAG
.getEntryNode(), RetAddrFI
,
14380 MachinePointerInfo());
14383 SDValue
PPCTargetLowering::LowerFRAMEADDR(SDValue Op
,
14384 SelectionDAG
&DAG
) const {
14386 unsigned Depth
= cast
<ConstantSDNode
>(Op
.getOperand(0))->getZExtValue();
14388 MachineFunction
&MF
= DAG
.getMachineFunction();
14389 MachineFrameInfo
&MFI
= MF
.getFrameInfo();
14390 MFI
.setFrameAddressIsTaken(true);
14392 EVT PtrVT
= getPointerTy(MF
.getDataLayout());
14393 bool isPPC64
= PtrVT
== MVT::i64
;
14395 // Naked functions never have a frame pointer, and so we use r1. For all
14396 // other functions, this decision must be delayed until during PEI.
14398 if (MF
.getFunction().hasFnAttribute(Attribute::Naked
))
14399 FrameReg
= isPPC64
? PPC::X1
: PPC::R1
;
14401 FrameReg
= isPPC64
? PPC::FP8
: PPC::FP
;
14403 SDValue FrameAddr
= DAG
.getCopyFromReg(DAG
.getEntryNode(), dl
, FrameReg
,
14406 FrameAddr
= DAG
.getLoad(Op
.getValueType(), dl
, DAG
.getEntryNode(),
14407 FrameAddr
, MachinePointerInfo());
14411 // FIXME? Maybe this could be a TableGen attribute on some registers and
14412 // this table could be generated automatically from RegInfo.
14413 unsigned PPCTargetLowering::getRegisterByName(const char* RegName
, EVT VT
,
14414 SelectionDAG
&DAG
) const {
14415 bool isPPC64
= Subtarget
.isPPC64();
14416 bool isDarwinABI
= Subtarget
.isDarwinABI();
14418 if ((isPPC64
&& VT
!= MVT::i64
&& VT
!= MVT::i32
) ||
14419 (!isPPC64
&& VT
!= MVT::i32
))
14420 report_fatal_error("Invalid register global variable type");
14422 bool is64Bit
= isPPC64
&& VT
== MVT::i64
;
14423 unsigned Reg
= StringSwitch
<unsigned>(RegName
)
14424 .Case("r1", is64Bit
? PPC::X1
: PPC::R1
)
14425 .Case("r2", (isDarwinABI
|| isPPC64
) ? 0 : PPC::R2
)
14426 .Case("r13", (!isPPC64
&& isDarwinABI
) ? 0 :
14427 (is64Bit
? PPC::X13
: PPC::R13
))
14432 report_fatal_error("Invalid register name global variable");
14435 bool PPCTargetLowering::isAccessedAsGotIndirect(SDValue GA
) const {
14436 // 32-bit SVR4 ABI access everything as got-indirect.
14437 if (Subtarget
.is32BitELFABI())
14440 // AIX accesses everything indirectly through the TOC, which is similar to
14442 if (Subtarget
.isAIXABI())
14445 CodeModel::Model CModel
= getTargetMachine().getCodeModel();
14446 // If it is small or large code model, module locals are accessed
14447 // indirectly by loading their address from .toc/.got.
14448 if (CModel
== CodeModel::Small
|| CModel
== CodeModel::Large
)
14451 // JumpTable and BlockAddress are accessed as got-indirect.
14452 if (isa
<JumpTableSDNode
>(GA
) || isa
<BlockAddressSDNode
>(GA
))
14455 if (GlobalAddressSDNode
*G
= dyn_cast
<GlobalAddressSDNode
>(GA
)) {
14456 const GlobalValue
*GV
= G
->getGlobal();
14457 unsigned char GVFlags
= Subtarget
.classifyGlobalReference(GV
);
14458 // The NLP flag indicates that a global access has to use an
14459 // extra indirection.
14460 if (GVFlags
& PPCII::MO_NLP_FLAG
)
14468 PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode
*GA
) const {
14469 // The PowerPC target isn't yet aware of offsets.
14473 bool PPCTargetLowering::getTgtMemIntrinsic(IntrinsicInfo
&Info
,
14475 MachineFunction
&MF
,
14476 unsigned Intrinsic
) const {
14477 switch (Intrinsic
) {
14478 case Intrinsic::ppc_qpx_qvlfd
:
14479 case Intrinsic::ppc_qpx_qvlfs
:
14480 case Intrinsic::ppc_qpx_qvlfcd
:
14481 case Intrinsic::ppc_qpx_qvlfcs
:
14482 case Intrinsic::ppc_qpx_qvlfiwa
:
14483 case Intrinsic::ppc_qpx_qvlfiwz
:
14484 case Intrinsic::ppc_altivec_lvx
:
14485 case Intrinsic::ppc_altivec_lvxl
:
14486 case Intrinsic::ppc_altivec_lvebx
:
14487 case Intrinsic::ppc_altivec_lvehx
:
14488 case Intrinsic::ppc_altivec_lvewx
:
14489 case Intrinsic::ppc_vsx_lxvd2x
:
14490 case Intrinsic::ppc_vsx_lxvw4x
: {
14492 switch (Intrinsic
) {
14493 case Intrinsic::ppc_altivec_lvebx
:
14496 case Intrinsic::ppc_altivec_lvehx
:
14499 case Intrinsic::ppc_altivec_lvewx
:
14502 case Intrinsic::ppc_vsx_lxvd2x
:
14505 case Intrinsic::ppc_qpx_qvlfd
:
14508 case Intrinsic::ppc_qpx_qvlfs
:
14511 case Intrinsic::ppc_qpx_qvlfcd
:
14514 case Intrinsic::ppc_qpx_qvlfcs
:
14522 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
14524 Info
.ptrVal
= I
.getArgOperand(0);
14525 Info
.offset
= -VT
.getStoreSize()+1;
14526 Info
.size
= 2*VT
.getStoreSize()-1;
14527 Info
.align
= Align(1);
14528 Info
.flags
= MachineMemOperand::MOLoad
;
14531 case Intrinsic::ppc_qpx_qvlfda
:
14532 case Intrinsic::ppc_qpx_qvlfsa
:
14533 case Intrinsic::ppc_qpx_qvlfcda
:
14534 case Intrinsic::ppc_qpx_qvlfcsa
:
14535 case Intrinsic::ppc_qpx_qvlfiwaa
:
14536 case Intrinsic::ppc_qpx_qvlfiwza
: {
14538 switch (Intrinsic
) {
14539 case Intrinsic::ppc_qpx_qvlfda
:
14542 case Intrinsic::ppc_qpx_qvlfsa
:
14545 case Intrinsic::ppc_qpx_qvlfcda
:
14548 case Intrinsic::ppc_qpx_qvlfcsa
:
14556 Info
.opc
= ISD::INTRINSIC_W_CHAIN
;
14558 Info
.ptrVal
= I
.getArgOperand(0);
14560 Info
.size
= VT
.getStoreSize();
14561 Info
.align
= Align(1);
14562 Info
.flags
= MachineMemOperand::MOLoad
;
14565 case Intrinsic::ppc_qpx_qvstfd
:
14566 case Intrinsic::ppc_qpx_qvstfs
:
14567 case Intrinsic::ppc_qpx_qvstfcd
:
14568 case Intrinsic::ppc_qpx_qvstfcs
:
14569 case Intrinsic::ppc_qpx_qvstfiw
:
14570 case Intrinsic::ppc_altivec_stvx
:
14571 case Intrinsic::ppc_altivec_stvxl
:
14572 case Intrinsic::ppc_altivec_stvebx
:
14573 case Intrinsic::ppc_altivec_stvehx
:
14574 case Intrinsic::ppc_altivec_stvewx
:
14575 case Intrinsic::ppc_vsx_stxvd2x
:
14576 case Intrinsic::ppc_vsx_stxvw4x
: {
14578 switch (Intrinsic
) {
14579 case Intrinsic::ppc_altivec_stvebx
:
14582 case Intrinsic::ppc_altivec_stvehx
:
14585 case Intrinsic::ppc_altivec_stvewx
:
14588 case Intrinsic::ppc_vsx_stxvd2x
:
14591 case Intrinsic::ppc_qpx_qvstfd
:
14594 case Intrinsic::ppc_qpx_qvstfs
:
14597 case Intrinsic::ppc_qpx_qvstfcd
:
14600 case Intrinsic::ppc_qpx_qvstfcs
:
14608 Info
.opc
= ISD::INTRINSIC_VOID
;
14610 Info
.ptrVal
= I
.getArgOperand(1);
14611 Info
.offset
= -VT
.getStoreSize()+1;
14612 Info
.size
= 2*VT
.getStoreSize()-1;
14613 Info
.align
= Align(1);
14614 Info
.flags
= MachineMemOperand::MOStore
;
14617 case Intrinsic::ppc_qpx_qvstfda
:
14618 case Intrinsic::ppc_qpx_qvstfsa
:
14619 case Intrinsic::ppc_qpx_qvstfcda
:
14620 case Intrinsic::ppc_qpx_qvstfcsa
:
14621 case Intrinsic::ppc_qpx_qvstfiwa
: {
14623 switch (Intrinsic
) {
14624 case Intrinsic::ppc_qpx_qvstfda
:
14627 case Intrinsic::ppc_qpx_qvstfsa
:
14630 case Intrinsic::ppc_qpx_qvstfcda
:
14633 case Intrinsic::ppc_qpx_qvstfcsa
:
14641 Info
.opc
= ISD::INTRINSIC_VOID
;
14643 Info
.ptrVal
= I
.getArgOperand(1);
14645 Info
.size
= VT
.getStoreSize();
14646 Info
.align
= Align(1);
14647 Info
.flags
= MachineMemOperand::MOStore
;
14657 /// getOptimalMemOpType - Returns the target specific optimal type for load
14658 /// and store operations as a result of memset, memcpy, and memmove
14659 /// lowering. If DstAlign is zero that means it's safe to destination
14660 /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
14661 /// means there isn't a need to check it against alignment requirement,
14662 /// probably because the source does not need to be loaded. If 'IsMemset' is
14663 /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
14664 /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
14665 /// source is constant so it does not need to be loaded.
14666 /// It returns EVT::Other if the type should be determined using generic
14667 /// target-independent logic.
14668 EVT
PPCTargetLowering::getOptimalMemOpType(
14669 uint64_t Size
, unsigned DstAlign
, unsigned SrcAlign
, bool IsMemset
,
14670 bool ZeroMemset
, bool MemcpyStrSrc
,
14671 const AttributeList
&FuncAttributes
) const {
14672 if (getTargetMachine().getOptLevel() != CodeGenOpt::None
) {
14673 // When expanding a memset, require at least two QPX instructions to cover
14674 // the cost of loading the value to be stored from the constant pool.
14675 if (Subtarget
.hasQPX() && Size
>= 32 && (!IsMemset
|| Size
>= 64) &&
14676 (!SrcAlign
|| SrcAlign
>= 32) && (!DstAlign
|| DstAlign
>= 32) &&
14677 !FuncAttributes
.hasFnAttribute(Attribute::NoImplicitFloat
)) {
14681 // We should use Altivec/VSX loads and stores when available. For unaligned
14682 // addresses, unaligned VSX loads are only fast starting with the P8.
14683 if (Subtarget
.hasAltivec() && Size
>= 16 &&
14684 (((!SrcAlign
|| SrcAlign
>= 16) && (!DstAlign
|| DstAlign
>= 16)) ||
14685 ((IsMemset
&& Subtarget
.hasVSX()) || Subtarget
.hasP8Vector())))
14689 if (Subtarget
.isPPC64()) {
14696 /// Returns true if it is beneficial to convert a load of a constant
14697 /// to just the constant itself.
14698 bool PPCTargetLowering::shouldConvertConstantLoadToIntImm(const APInt
&Imm
,
14700 assert(Ty
->isIntegerTy());
14702 unsigned BitSize
= Ty
->getPrimitiveSizeInBits();
14703 return !(BitSize
== 0 || BitSize
> 64);
14706 bool PPCTargetLowering::isTruncateFree(Type
*Ty1
, Type
*Ty2
) const {
14707 if (!Ty1
->isIntegerTy() || !Ty2
->isIntegerTy())
14709 unsigned NumBits1
= Ty1
->getPrimitiveSizeInBits();
14710 unsigned NumBits2
= Ty2
->getPrimitiveSizeInBits();
14711 return NumBits1
== 64 && NumBits2
== 32;
14714 bool PPCTargetLowering::isTruncateFree(EVT VT1
, EVT VT2
) const {
14715 if (!VT1
.isInteger() || !VT2
.isInteger())
14717 unsigned NumBits1
= VT1
.getSizeInBits();
14718 unsigned NumBits2
= VT2
.getSizeInBits();
14719 return NumBits1
== 64 && NumBits2
== 32;
14722 bool PPCTargetLowering::isZExtFree(SDValue Val
, EVT VT2
) const {
14723 // Generally speaking, zexts are not free, but they are free when they can be
14724 // folded with other operations.
14725 if (LoadSDNode
*LD
= dyn_cast
<LoadSDNode
>(Val
)) {
14726 EVT MemVT
= LD
->getMemoryVT();
14727 if ((MemVT
== MVT::i1
|| MemVT
== MVT::i8
|| MemVT
== MVT::i16
||
14728 (Subtarget
.isPPC64() && MemVT
== MVT::i32
)) &&
14729 (LD
->getExtensionType() == ISD::NON_EXTLOAD
||
14730 LD
->getExtensionType() == ISD::ZEXTLOAD
))
14734 // FIXME: Add other cases...
14735 // - 32-bit shifts with a zext to i64
14736 // - zext after ctlz, bswap, etc.
14737 // - zext after and by a constant mask
14739 return TargetLowering::isZExtFree(Val
, VT2
);
14742 bool PPCTargetLowering::isFPExtFree(EVT DestVT
, EVT SrcVT
) const {
14743 assert(DestVT
.isFloatingPoint() && SrcVT
.isFloatingPoint() &&
14744 "invalid fpext types");
14745 // Extending to float128 is not free.
14746 if (DestVT
== MVT::f128
)
14751 bool PPCTargetLowering::isLegalICmpImmediate(int64_t Imm
) const {
14752 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
14755 bool PPCTargetLowering::isLegalAddImmediate(int64_t Imm
) const {
14756 return isInt
<16>(Imm
) || isUInt
<16>(Imm
);
14759 bool PPCTargetLowering::allowsMisalignedMemoryAccesses(EVT VT
,
14762 MachineMemOperand::Flags
,
14763 bool *Fast
) const {
14764 if (DisablePPCUnaligned
)
14767 // PowerPC supports unaligned memory access for simple non-vector types.
14768 // Although accessing unaligned addresses is not as efficient as accessing
14769 // aligned addresses, it is generally more efficient than manual expansion,
14770 // and generally only traps for software emulation when crossing page
14773 if (!VT
.isSimple())
14776 if (VT
.getSimpleVT().isVector()) {
14777 if (Subtarget
.hasVSX()) {
14778 if (VT
!= MVT::v2f64
&& VT
!= MVT::v2i64
&&
14779 VT
!= MVT::v4f32
&& VT
!= MVT::v4i32
)
14786 if (VT
== MVT::ppcf128
)
14795 bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT
) const {
14796 VT
= VT
.getScalarType();
14798 if (!VT
.isSimple())
14801 switch (VT
.getSimpleVT().SimpleTy
) {
14806 return (EnableQuadPrecision
&& Subtarget
.hasP9Vector());
14815 PPCTargetLowering::getScratchRegisters(CallingConv::ID
) const {
14816 // LR is a callee-save register, but we must treat it as clobbered by any call
14817 // site. Hence we include LR in the scratch registers, which are in turn added
14818 // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
14819 // to CTR, which is used by any indirect call.
14820 static const MCPhysReg ScratchRegs
[] = {
14821 PPC::X12
, PPC::LR8
, PPC::CTR8
, 0
14824 return ScratchRegs
;
14827 unsigned PPCTargetLowering::getExceptionPointerRegister(
14828 const Constant
*PersonalityFn
) const {
14829 return Subtarget
.isPPC64() ? PPC::X3
: PPC::R3
;
14832 unsigned PPCTargetLowering::getExceptionSelectorRegister(
14833 const Constant
*PersonalityFn
) const {
14834 return Subtarget
.isPPC64() ? PPC::X4
: PPC::R4
;
14838 PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
14839 EVT VT
, unsigned DefinedValues
) const {
14840 if (VT
== MVT::v2i64
)
14841 return Subtarget
.hasDirectMove(); // Don't need stack ops with direct moves
14843 if (Subtarget
.hasVSX() || Subtarget
.hasQPX())
14846 return TargetLowering::shouldExpandBuildVectorWithShuffles(VT
, DefinedValues
);
14849 Sched::Preference
PPCTargetLowering::getSchedulingPreference(SDNode
*N
) const {
14850 if (DisableILPPref
|| Subtarget
.enableMachineScheduler())
14851 return TargetLowering::getSchedulingPreference(N
);
14856 // Create a fast isel object.
14858 PPCTargetLowering::createFastISel(FunctionLoweringInfo
&FuncInfo
,
14859 const TargetLibraryInfo
*LibInfo
) const {
14860 return PPC::createFastISel(FuncInfo
, LibInfo
);
14863 void PPCTargetLowering::initializeSplitCSR(MachineBasicBlock
*Entry
) const {
14864 if (Subtarget
.isDarwinABI()) return;
14865 if (!Subtarget
.isPPC64()) return;
14867 // Update IsSplitCSR in PPCFunctionInfo
14868 PPCFunctionInfo
*PFI
= Entry
->getParent()->getInfo
<PPCFunctionInfo
>();
14869 PFI
->setIsSplitCSR(true);
14872 void PPCTargetLowering::insertCopiesSplitCSR(
14873 MachineBasicBlock
*Entry
,
14874 const SmallVectorImpl
<MachineBasicBlock
*> &Exits
) const {
14875 const PPCRegisterInfo
*TRI
= Subtarget
.getRegisterInfo();
14876 const MCPhysReg
*IStart
= TRI
->getCalleeSavedRegsViaCopy(Entry
->getParent());
14880 const TargetInstrInfo
*TII
= Subtarget
.getInstrInfo();
14881 MachineRegisterInfo
*MRI
= &Entry
->getParent()->getRegInfo();
14882 MachineBasicBlock::iterator MBBI
= Entry
->begin();
14883 for (const MCPhysReg
*I
= IStart
; *I
; ++I
) {
14884 const TargetRegisterClass
*RC
= nullptr;
14885 if (PPC::G8RCRegClass
.contains(*I
))
14886 RC
= &PPC::G8RCRegClass
;
14887 else if (PPC::F8RCRegClass
.contains(*I
))
14888 RC
= &PPC::F8RCRegClass
;
14889 else if (PPC::CRRCRegClass
.contains(*I
))
14890 RC
= &PPC::CRRCRegClass
;
14891 else if (PPC::VRRCRegClass
.contains(*I
))
14892 RC
= &PPC::VRRCRegClass
;
14894 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
14896 Register NewVR
= MRI
->createVirtualRegister(RC
);
14897 // Create copy from CSR to a virtual register.
14898 // FIXME: this currently does not emit CFI pseudo-instructions, it works
14899 // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
14900 // nounwind. If we want to generalize this later, we may need to emit
14901 // CFI pseudo-instructions.
14902 assert(Entry
->getParent()->getFunction().hasFnAttribute(
14903 Attribute::NoUnwind
) &&
14904 "Function should be nounwind in insertCopiesSplitCSR!");
14905 Entry
->addLiveIn(*I
);
14906 BuildMI(*Entry
, MBBI
, DebugLoc(), TII
->get(TargetOpcode::COPY
), NewVR
)
14909 // Insert the copy-back instructions right before the terminator.
14910 for (auto *Exit
: Exits
)
14911 BuildMI(*Exit
, Exit
->getFirstTerminator(), DebugLoc(),
14912 TII
->get(TargetOpcode::COPY
), *I
)
14917 // Override to enable LOAD_STACK_GUARD lowering on Linux.
14918 bool PPCTargetLowering::useLoadStackGuardNode() const {
14919 if (!Subtarget
.isTargetLinux())
14920 return TargetLowering::useLoadStackGuardNode();
14924 // Override to disable global variable loading on Linux.
14925 void PPCTargetLowering::insertSSPDeclarations(Module
&M
) const {
14926 if (!Subtarget
.isTargetLinux())
14927 return TargetLowering::insertSSPDeclarations(M
);
14930 bool PPCTargetLowering::isFPImmLegal(const APFloat
&Imm
, EVT VT
,
14931 bool ForCodeSize
) const {
14932 if (!VT
.isSimple() || !Subtarget
.hasVSX())
14935 switch(VT
.getSimpleVT().SimpleTy
) {
14937 // For FP types that are currently not supported by PPC backend, return
14938 // false. Examples: f16, f80.
14943 return Imm
.isPosZero();
14947 // For vector shift operation op, fold
14948 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
14949 static SDValue
stripModuloOnShift(const TargetLowering
&TLI
, SDNode
*N
,
14950 SelectionDAG
&DAG
) {
14951 SDValue N0
= N
->getOperand(0);
14952 SDValue N1
= N
->getOperand(1);
14953 EVT VT
= N0
.getValueType();
14954 unsigned OpSizeInBits
= VT
.getScalarSizeInBits();
14955 unsigned Opcode
= N
->getOpcode();
14956 unsigned TargetOpcode
;
14960 llvm_unreachable("Unexpected shift operation");
14962 TargetOpcode
= PPCISD::SHL
;
14965 TargetOpcode
= PPCISD::SRL
;
14968 TargetOpcode
= PPCISD::SRA
;
14972 if (VT
.isVector() && TLI
.isOperationLegal(Opcode
, VT
) &&
14973 N1
->getOpcode() == ISD::AND
)
14974 if (ConstantSDNode
*Mask
= isConstOrConstSplat(N1
->getOperand(1)))
14975 if (Mask
->getZExtValue() == OpSizeInBits
- 1)
14976 return DAG
.getNode(TargetOpcode
, SDLoc(N
), VT
, N0
, N1
->getOperand(0));
14981 SDValue
PPCTargetLowering::combineSHL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
14982 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
14985 SDValue N0
= N
->getOperand(0);
14986 ConstantSDNode
*CN1
= dyn_cast
<ConstantSDNode
>(N
->getOperand(1));
14987 if (!Subtarget
.isISA3_0() ||
14988 N0
.getOpcode() != ISD::SIGN_EXTEND
||
14989 N0
.getOperand(0).getValueType() != MVT::i32
||
14990 CN1
== nullptr || N
->getValueType(0) != MVT::i64
)
14993 // We can't save an operation here if the value is already extended, and
14994 // the existing shift is easier to combine.
14995 SDValue ExtsSrc
= N0
.getOperand(0);
14996 if (ExtsSrc
.getOpcode() == ISD::TRUNCATE
&&
14997 ExtsSrc
.getOperand(0).getOpcode() == ISD::AssertSext
)
15001 SDValue ShiftBy
= SDValue(CN1
, 0);
15002 // We want the shift amount to be i32 on the extswli, but the shift could
15004 if (ShiftBy
.getValueType() == MVT::i64
)
15005 ShiftBy
= DCI
.DAG
.getConstant(CN1
->getZExtValue(), DL
, MVT::i32
);
15007 return DCI
.DAG
.getNode(PPCISD::EXTSWSLI
, DL
, MVT::i64
, N0
->getOperand(0),
15011 SDValue
PPCTargetLowering::combineSRA(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15012 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
15018 SDValue
PPCTargetLowering::combineSRL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15019 if (auto Value
= stripModuloOnShift(*this, N
, DCI
.DAG
))
15025 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
15026 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
15027 // When C is zero, the equation (addi Z, -C) can be simplified to Z
15028 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
15029 static SDValue
combineADDToADDZE(SDNode
*N
, SelectionDAG
&DAG
,
15030 const PPCSubtarget
&Subtarget
) {
15031 if (!Subtarget
.isPPC64())
15034 SDValue LHS
= N
->getOperand(0);
15035 SDValue RHS
= N
->getOperand(1);
15037 auto isZextOfCompareWithConstant
= [](SDValue Op
) {
15038 if (Op
.getOpcode() != ISD::ZERO_EXTEND
|| !Op
.hasOneUse() ||
15039 Op
.getValueType() != MVT::i64
)
15042 SDValue Cmp
= Op
.getOperand(0);
15043 if (Cmp
.getOpcode() != ISD::SETCC
|| !Cmp
.hasOneUse() ||
15044 Cmp
.getOperand(0).getValueType() != MVT::i64
)
15047 if (auto *Constant
= dyn_cast
<ConstantSDNode
>(Cmp
.getOperand(1))) {
15048 int64_t NegConstant
= 0 - Constant
->getSExtValue();
15049 // Due to the limitations of the addi instruction,
15050 // -C is required to be [-32768, 32767].
15051 return isInt
<16>(NegConstant
);
15057 bool LHSHasPattern
= isZextOfCompareWithConstant(LHS
);
15058 bool RHSHasPattern
= isZextOfCompareWithConstant(RHS
);
15060 // If there is a pattern, canonicalize a zext operand to the RHS.
15061 if (LHSHasPattern
&& !RHSHasPattern
)
15062 std::swap(LHS
, RHS
);
15063 else if (!LHSHasPattern
&& !RHSHasPattern
)
15067 SDVTList VTs
= DAG
.getVTList(MVT::i64
, MVT::Glue
);
15068 SDValue Cmp
= RHS
.getOperand(0);
15069 SDValue Z
= Cmp
.getOperand(0);
15070 auto *Constant
= dyn_cast
<ConstantSDNode
>(Cmp
.getOperand(1));
15072 assert(Constant
&& "Constant Should not be a null pointer.");
15073 int64_t NegConstant
= 0 - Constant
->getSExtValue();
15075 switch(cast
<CondCodeSDNode
>(Cmp
.getOperand(2))->get()) {
15079 // --> addze X, (addic Z, -1).carry
15081 // add X, (zext(setne Z, C))--
15082 // \ when -32768 <= -C <= 32767 && C != 0
15083 // --> addze X, (addic (addi Z, -C), -1).carry
15084 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
15085 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
15086 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
15087 SDValue Addc
= DAG
.getNode(ISD::ADDC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
15088 AddOrZ
, DAG
.getConstant(-1ULL, DL
, MVT::i64
));
15089 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
15090 SDValue(Addc
.getNode(), 1));
15094 // --> addze X, (subfic Z, 0).carry
15096 // add X, (zext(sete Z, C))--
15097 // \ when -32768 <= -C <= 32767 && C != 0
15098 // --> addze X, (subfic (addi Z, -C), 0).carry
15099 SDValue Add
= DAG
.getNode(ISD::ADD
, DL
, MVT::i64
, Z
,
15100 DAG
.getConstant(NegConstant
, DL
, MVT::i64
));
15101 SDValue AddOrZ
= NegConstant
!= 0 ? Add
: Z
;
15102 SDValue Subc
= DAG
.getNode(ISD::SUBC
, DL
, DAG
.getVTList(MVT::i64
, MVT::Glue
),
15103 DAG
.getConstant(0, DL
, MVT::i64
), AddOrZ
);
15104 return DAG
.getNode(ISD::ADDE
, DL
, VTs
, LHS
, DAG
.getConstant(0, DL
, MVT::i64
),
15105 SDValue(Subc
.getNode(), 1));
15112 SDValue
PPCTargetLowering::combineADD(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15113 if (auto Value
= combineADDToADDZE(N
, DCI
.DAG
, Subtarget
))
15119 // Detect TRUNCATE operations on bitcasts of float128 values.
15120 // What we are looking for here is the situtation where we extract a subset
15121 // of bits from a 128 bit float.
15122 // This can be of two forms:
15123 // 1) BITCAST of f128 feeding TRUNCATE
15124 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
15125 // The reason this is required is because we do not have a legal i128 type
15126 // and so we want to prevent having to store the f128 and then reload part
15128 SDValue
PPCTargetLowering::combineTRUNCATE(SDNode
*N
,
15129 DAGCombinerInfo
&DCI
) const {
15130 // If we are using CRBits then try that first.
15131 if (Subtarget
.useCRBits()) {
15132 // Check if CRBits did anything and return that if it did.
15133 if (SDValue CRTruncValue
= DAGCombineTruncBoolExt(N
, DCI
))
15134 return CRTruncValue
;
15138 SDValue Op0
= N
->getOperand(0);
15140 // Looking for a truncate of i128 to i64.
15141 if (Op0
.getValueType() != MVT::i128
|| N
->getValueType(0) != MVT::i64
)
15144 int EltToExtract
= DCI
.DAG
.getDataLayout().isBigEndian() ? 1 : 0;
15146 // SRL feeding TRUNCATE.
15147 if (Op0
.getOpcode() == ISD::SRL
) {
15148 ConstantSDNode
*ConstNode
= dyn_cast
<ConstantSDNode
>(Op0
.getOperand(1));
15149 // The right shift has to be by 64 bits.
15150 if (!ConstNode
|| ConstNode
->getZExtValue() != 64)
15153 // Switch the element number to extract.
15154 EltToExtract
= EltToExtract
? 0 : 1;
15155 // Update Op0 past the SRL.
15156 Op0
= Op0
.getOperand(0);
15159 // BITCAST feeding a TRUNCATE possibly via SRL.
15160 if (Op0
.getOpcode() == ISD::BITCAST
&&
15161 Op0
.getValueType() == MVT::i128
&&
15162 Op0
.getOperand(0).getValueType() == MVT::f128
) {
15163 SDValue Bitcast
= DCI
.DAG
.getBitcast(MVT::v2i64
, Op0
.getOperand(0));
15164 return DCI
.DAG
.getNode(
15165 ISD::EXTRACT_VECTOR_ELT
, dl
, MVT::i64
, Bitcast
,
15166 DCI
.DAG
.getTargetConstant(EltToExtract
, dl
, MVT::i32
));
15171 SDValue
PPCTargetLowering::combineMUL(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15172 SelectionDAG
&DAG
= DCI
.DAG
;
15174 ConstantSDNode
*ConstOpOrElement
= isConstOrConstSplat(N
->getOperand(1));
15175 if (!ConstOpOrElement
)
15178 // An imul is usually smaller than the alternative sequence for legal type.
15179 if (DAG
.getMachineFunction().getFunction().hasMinSize() &&
15180 isOperationLegal(ISD::MUL
, N
->getValueType(0)))
15183 auto IsProfitable
= [this](bool IsNeg
, bool IsAddOne
, EVT VT
) -> bool {
15184 switch (this->Subtarget
.getDarwinDirective()) {
15186 // TODO: enhance the condition for subtarget before pwr8
15188 case PPC::DIR_PWR8
:
15189 // type mul add shl
15193 case PPC::DIR_PWR9
:
15194 // type mul add shl
15198 // The cycle RATIO of related operations are showed as a table above.
15199 // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
15200 // scalar and vector type. For 2 instrs patterns, add/sub + shl
15201 // are 4, it is always profitable; but for 3 instrs patterns
15202 // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
15203 // So we should only do it for vector type.
15204 return IsAddOne
&& IsNeg
? VT
.isVector() : true;
15208 EVT VT
= N
->getValueType(0);
15211 const APInt
&MulAmt
= ConstOpOrElement
->getAPIntValue();
15212 bool IsNeg
= MulAmt
.isNegative();
15213 APInt MulAmtAbs
= MulAmt
.abs();
15215 if ((MulAmtAbs
- 1).isPowerOf2()) {
15216 // (mul x, 2^N + 1) => (add (shl x, N), x)
15217 // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
15219 if (!IsProfitable(IsNeg
, true, VT
))
15222 SDValue Op0
= N
->getOperand(0);
15224 DAG
.getNode(ISD::SHL
, DL
, VT
, N
->getOperand(0),
15225 DAG
.getConstant((MulAmtAbs
- 1).logBase2(), DL
, VT
));
15226 SDValue Res
= DAG
.getNode(ISD::ADD
, DL
, VT
, Op0
, Op1
);
15231 return DAG
.getNode(ISD::SUB
, DL
, VT
, DAG
.getConstant(0, DL
, VT
), Res
);
15232 } else if ((MulAmtAbs
+ 1).isPowerOf2()) {
15233 // (mul x, 2^N - 1) => (sub (shl x, N), x)
15234 // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
15236 if (!IsProfitable(IsNeg
, false, VT
))
15239 SDValue Op0
= N
->getOperand(0);
15241 DAG
.getNode(ISD::SHL
, DL
, VT
, N
->getOperand(0),
15242 DAG
.getConstant((MulAmtAbs
+ 1).logBase2(), DL
, VT
));
15245 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op1
, Op0
);
15247 return DAG
.getNode(ISD::SUB
, DL
, VT
, Op0
, Op1
);
15254 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst
*CI
) const {
15255 // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
15256 if (!Subtarget
.is64BitELFABI())
15259 // If not a tail call then no need to proceed.
15260 if (!CI
->isTailCall())
15263 // If tail calls are disabled for the caller then we are done.
15264 const Function
*Caller
= CI
->getParent()->getParent();
15265 auto Attr
= Caller
->getFnAttribute("disable-tail-calls");
15266 if (Attr
.getValueAsString() == "true")
15269 // If sibling calls have been disabled and tail-calls aren't guaranteed
15270 // there is no reason to duplicate.
15271 auto &TM
= getTargetMachine();
15272 if (!TM
.Options
.GuaranteedTailCallOpt
&& DisableSCO
)
15275 // Can't tail call a function called indirectly, or if it has variadic args.
15276 const Function
*Callee
= CI
->getCalledFunction();
15277 if (!Callee
|| Callee
->isVarArg())
15280 // Make sure the callee and caller calling conventions are eligible for tco.
15281 if (!areCallingConvEligibleForTCO_64SVR4(Caller
->getCallingConv(),
15282 CI
->getCallingConv()))
15285 // If the function is local then we have a good chance at tail-calling it
15286 return getTargetMachine().shouldAssumeDSOLocal(*Caller
->getParent(), Callee
);
15289 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT
) const {
15290 if (!Subtarget
.hasVSX())
15292 if (Subtarget
.hasP9Vector() && VT
== MVT::f128
)
15294 return VT
== MVT::f32
|| VT
== MVT::f64
||
15295 VT
== MVT::v4f32
|| VT
== MVT::v2f64
;
15298 bool PPCTargetLowering::
15299 isMaskAndCmp0FoldingBeneficial(const Instruction
&AndI
) const {
15300 const Value
*Mask
= AndI
.getOperand(1);
15301 // If the mask is suitable for andi. or andis. we should sink the and.
15302 if (const ConstantInt
*CI
= dyn_cast
<ConstantInt
>(Mask
)) {
15303 // Can't handle constants wider than 64-bits.
15304 if (CI
->getBitWidth() > 64)
15306 int64_t ConstVal
= CI
->getZExtValue();
15307 return isUInt
<16>(ConstVal
) ||
15308 (isUInt
<16>(ConstVal
>> 16) && !(ConstVal
& 0xFFFF));
15311 // For non-constant masks, we can always use the record-form and.
15315 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
15316 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
15317 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
15318 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
15319 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
15320 SDValue
PPCTargetLowering::combineABS(SDNode
*N
, DAGCombinerInfo
&DCI
) const {
15321 assert((N
->getOpcode() == ISD::ABS
) && "Need ABS node here");
15322 assert(Subtarget
.hasP9Altivec() &&
15323 "Only combine this when P9 altivec supported!");
15324 EVT VT
= N
->getValueType(0);
15325 if (VT
!= MVT::v4i32
&& VT
!= MVT::v8i16
&& VT
!= MVT::v16i8
)
15328 SelectionDAG
&DAG
= DCI
.DAG
;
15330 if (N
->getOperand(0).getOpcode() == ISD::SUB
) {
15331 // Even for signed integers, if it's known to be positive (as signed
15332 // integer) due to zero-extended inputs.
15333 unsigned SubOpcd0
= N
->getOperand(0)->getOperand(0).getOpcode();
15334 unsigned SubOpcd1
= N
->getOperand(0)->getOperand(1).getOpcode();
15335 if ((SubOpcd0
== ISD::ZERO_EXTEND
||
15336 SubOpcd0
== ISD::ZERO_EXTEND_VECTOR_INREG
) &&
15337 (SubOpcd1
== ISD::ZERO_EXTEND
||
15338 SubOpcd1
== ISD::ZERO_EXTEND_VECTOR_INREG
)) {
15339 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(0).getValueType(),
15340 N
->getOperand(0)->getOperand(0),
15341 N
->getOperand(0)->getOperand(1),
15342 DAG
.getTargetConstant(0, dl
, MVT::i32
));
15345 // For type v4i32, it can be optimized with xvnegsp + vabsduw
15346 if (N
->getOperand(0).getValueType() == MVT::v4i32
&&
15347 N
->getOperand(0).hasOneUse()) {
15348 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(0).getValueType(),
15349 N
->getOperand(0)->getOperand(0),
15350 N
->getOperand(0)->getOperand(1),
15351 DAG
.getTargetConstant(1, dl
, MVT::i32
));
15358 // For type v4i32/v8ii16/v16i8, transform
15359 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
15360 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
15361 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
15362 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
15363 SDValue
PPCTargetLowering::combineVSelect(SDNode
*N
,
15364 DAGCombinerInfo
&DCI
) const {
15365 assert((N
->getOpcode() == ISD::VSELECT
) && "Need VSELECT node here");
15366 assert(Subtarget
.hasP9Altivec() &&
15367 "Only combine this when P9 altivec supported!");
15369 SelectionDAG
&DAG
= DCI
.DAG
;
15371 SDValue Cond
= N
->getOperand(0);
15372 SDValue TrueOpnd
= N
->getOperand(1);
15373 SDValue FalseOpnd
= N
->getOperand(2);
15374 EVT VT
= N
->getOperand(1).getValueType();
15376 if (Cond
.getOpcode() != ISD::SETCC
|| TrueOpnd
.getOpcode() != ISD::SUB
||
15377 FalseOpnd
.getOpcode() != ISD::SUB
)
15380 // ABSD only available for type v4i32/v8i16/v16i8
15381 if (VT
!= MVT::v4i32
&& VT
!= MVT::v8i16
&& VT
!= MVT::v16i8
)
15384 // At least to save one more dependent computation
15385 if (!(Cond
.hasOneUse() || TrueOpnd
.hasOneUse() || FalseOpnd
.hasOneUse()))
15388 ISD::CondCode CC
= cast
<CondCodeSDNode
>(Cond
.getOperand(2))->get();
15390 // Can only handle unsigned comparison here
15399 std::swap(TrueOpnd
, FalseOpnd
);
15403 SDValue CmpOpnd1
= Cond
.getOperand(0);
15404 SDValue CmpOpnd2
= Cond
.getOperand(1);
15406 // SETCC CmpOpnd1 CmpOpnd2 cond
15407 // TrueOpnd = CmpOpnd1 - CmpOpnd2
15408 // FalseOpnd = CmpOpnd2 - CmpOpnd1
15409 if (TrueOpnd
.getOperand(0) == CmpOpnd1
&&
15410 TrueOpnd
.getOperand(1) == CmpOpnd2
&&
15411 FalseOpnd
.getOperand(0) == CmpOpnd2
&&
15412 FalseOpnd
.getOperand(1) == CmpOpnd1
) {
15413 return DAG
.getNode(PPCISD::VABSD
, dl
, N
->getOperand(1).getValueType(),
15414 CmpOpnd1
, CmpOpnd2
,
15415 DAG
.getTargetConstant(0, dl
, MVT::i32
));